|
| 1 | +# -*- coding: utf-8 -*- |
| 2 | + |
| 3 | +# Copyright © 2017, Chris Warrick and others. |
| 4 | + |
| 5 | +# Permission is hereby granted, free of charge, to any |
| 6 | +# person obtaining a copy of this software and associated |
| 7 | +# documentation files (the "Software"), to deal in the |
| 8 | +# Software without restriction, including without limitation |
| 9 | +# the rights to use, copy, modify, merge, publish, |
| 10 | +# distribute, sublicense, and/or sell copies of the |
| 11 | +# Software, and to permit persons to whom the Software is |
| 12 | +# furnished to do so, subject to the following conditions: |
| 13 | +# |
| 14 | +# The above copyright notice and this permission notice |
| 15 | +# shall be included in all copies or substantial portions of |
| 16 | +# the Software. |
| 17 | +# |
| 18 | +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY |
| 19 | +# KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE |
| 20 | +# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR |
| 21 | +# PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS |
| 22 | +# OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR |
| 23 | +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
| 24 | +# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
| 25 | +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
| 26 | + |
| 27 | +"""Generate JSON Feeds.""" |
| 28 | + |
| 29 | +from __future__ import unicode_literals |
| 30 | +import json |
| 31 | +import io |
| 32 | +import os |
| 33 | +import lxml |
| 34 | + |
| 35 | +from nikola.plugin_categories import Task |
| 36 | +from nikola import utils |
| 37 | + |
| 38 | +try: |
| 39 | + from urlparse import urljoin |
| 40 | +except ImportError: |
| 41 | + from urllib.parse import urljoin # NOQA |
| 42 | + |
| 43 | + |
| 44 | +class JSONFeed(Task): |
| 45 | + """Generate JSON feeds.""" |
| 46 | + |
| 47 | + name = "jsonfeed" |
| 48 | + supported_taxonomies = { |
| 49 | + 'archive': 'archive_jsonfeed', |
| 50 | + 'author': 'author_jsonfeed', |
| 51 | + 'category': 'category_jsonfeed', |
| 52 | + 'section_index': 'section_index_jsonfeed', |
| 53 | + 'tag': 'tag_jsonfeed', |
| 54 | + } |
| 55 | + _section_archive_link_warned = False |
| 56 | + |
| 57 | + def set_site(self, site): |
| 58 | + """Set site, which is a Nikola instance.""" |
| 59 | + super(JSONFeed, self).set_site(site) |
| 60 | + |
| 61 | + self.kw = { |
| 62 | + 'feed_links_append_query': self.site.config['FEED_LINKS_APPEND_QUERY'], |
| 63 | + 'feed_length': self.site.config['FEED_LENGTH'], |
| 64 | + 'feed_plain': self.site.config['FEED_PLAIN'], |
| 65 | + 'feed_previewimage': self.site.config['FEED_PREVIEWIMAGE'], |
| 66 | + 'feed_read_more_link': self.site.config['FEED_READ_MORE_LINK'], |
| 67 | + 'feed_teasers': self.site.config['FEED_TEASERS'], |
| 68 | + 'jsonfeed_append_links': self.site.config.get('JSONFEED_APPEND_LINKS', True), |
| 69 | + 'site_url': self.site.config['SITE_URL'], |
| 70 | + 'blog_title': self.site.config['BLOG_TITLE'], |
| 71 | + 'blog_description': self.site.config['BLOG_DESCRIPTION'], |
| 72 | + 'blog_author': self.site.config['BLOG_AUTHOR'], |
| 73 | + 'tag_pages_titles': self.site.config['TAG_PAGES_TITLES'], |
| 74 | + 'category_pages_titles': self.site.config['CATEGORY_PAGES_TITLES'], |
| 75 | + 'posts_section_title': self.site.config['POSTS_SECTION_TITLE'], |
| 76 | + 'archives_are_indexes': self.site.config['ARCHIVES_ARE_INDEXES'], |
| 77 | + } |
| 78 | + |
| 79 | + self.site.register_path_handler("index_jsonfeed", self.index_jsonfeed_path) |
| 80 | + for t in self.supported_taxonomies.values(): |
| 81 | + self.site.register_path_handler(t, getattr(self, t + '_path')) |
| 82 | + |
| 83 | + # Add links if desired |
| 84 | + if self.kw['jsonfeed_append_links']: |
| 85 | + self.site.template_hooks['extra_head'].append(self.jsonfeed_html_link, True) |
| 86 | + |
| 87 | + def gen_tasks(self): |
| 88 | + """Generate JSON feeds.""" |
| 89 | + self.site.scan_posts() |
| 90 | + yield self.group_task() |
| 91 | + |
| 92 | + for lang in self.site.translations: |
| 93 | + # Main feed |
| 94 | + title = self.kw['blog_title'](lang) |
| 95 | + link = self.kw['site_url'] |
| 96 | + description = self.kw['blog_description'](lang) |
| 97 | + timeline = self.site.posts[:self.kw['feed_length']] |
| 98 | + output_name = os.path.normpath(os.path.join(self.site.config['OUTPUT_FOLDER'], self.site.path("index_jsonfeed", "", lang))) |
| 99 | + feed_url = self.get_link("index_jsonfeed", "", lang) |
| 100 | + |
| 101 | + yield self.generate_feed_task(lang, title, link, description, |
| 102 | + timeline, feed_url, output_name) |
| 103 | + |
| 104 | + for classification_name, path_handler in self.supported_taxonomies.items(): |
| 105 | + taxonomy = self.site.taxonomy_plugins[classification_name] |
| 106 | + |
| 107 | + if classification_name == "archive" and not self.kw['archives_are_indexes']: |
| 108 | + continue |
| 109 | + |
| 110 | + classification_timelines = {} |
| 111 | + for tlang, posts_per_classification in self.site.posts_per_classification[taxonomy.classification_name].items(): |
| 112 | + if lang != tlang and not taxonomy.also_create_classifications_from_other_languages: |
| 113 | + continue |
| 114 | + classification_timelines.update(posts_per_classification) |
| 115 | + |
| 116 | + for classification, timeline in classification_timelines.items(): |
| 117 | + if not classification: |
| 118 | + continue |
| 119 | + if taxonomy.has_hierarchy: |
| 120 | + node = self.site.hierarchy_lookup_per_classification[taxonomy.classification_name][lang][classification] |
| 121 | + taxo_context = taxonomy.provide_context_and_uptodate(classification, lang, node)[0] |
| 122 | + else: |
| 123 | + taxo_context = taxonomy.provide_context_and_uptodate(classification, lang)[0] |
| 124 | + title = taxo_context.get('title', classification) |
| 125 | + link = self.get_link(classification_name, classification, lang) |
| 126 | + description = taxo_context.get('description', self.kw['blog_description'](lang)) |
| 127 | + timeline = timeline[:self.kw['feed_length']] |
| 128 | + output_name = os.path.normpath(os.path.join(self.site.config['OUTPUT_FOLDER'], self.site.path(path_handler, classification, lang))) |
| 129 | + feed_url = self.get_link(path_handler, classification, lang) |
| 130 | + |
| 131 | + # Special handling for author pages |
| 132 | + if classification_name == "author": |
| 133 | + primary_author = { |
| 134 | + 'name': classification, |
| 135 | + 'url': link |
| 136 | + } |
| 137 | + else: |
| 138 | + primary_author = None |
| 139 | + |
| 140 | + yield self.generate_feed_task(lang, title, link, description, |
| 141 | + timeline, feed_url, output_name, primary_author) |
| 142 | + |
| 143 | + def index_jsonfeed_path(self, name, lang, **kwargs): |
| 144 | + """Return path to main JSON Feed.""" |
| 145 | + return [_f for _f in [self.site.config['TRANSLATIONS'][lang], 'feed.json'] if _f] |
| 146 | + |
| 147 | + def archive_jsonfeed_path(self, name, lang, **kwargs): |
| 148 | + """Return path to archive JSON Feed.""" |
| 149 | + return [_f for _f in [self.site.config['TRANSLATIONS'][lang], |
| 150 | + self.site.config['ARCHIVE_PATH'], name, 'feed.json'] if _f] |
| 151 | + |
| 152 | + def author_jsonfeed_path(self, name, lang, **kwargs): |
| 153 | + """Return path to author JSON Feed.""" |
| 154 | + if self.site.config['SLUG_AUTHOR_PATH']: |
| 155 | + filename = utils.slugify(name, lang) + '-feed.json' |
| 156 | + else: |
| 157 | + filename = name + '-feed.json' |
| 158 | + return [_f for _f in [self.site.config['TRANSLATIONS'][lang], |
| 159 | + self.site.config['AUTHOR_PATH'](lang), filename] if _f] |
| 160 | + |
| 161 | + def category_jsonfeed_path(self, name, lang, **kwargs): |
| 162 | + """Return path to category JSON Feed.""" |
| 163 | + t = self.site.taxonomy_plugins['category'] |
| 164 | + name = t.slugify_category_name(t.extract_hierarchy(name), lang)[0] |
| 165 | + return [_f for _f in [self.site.config['TRANSLATIONS'][lang], |
| 166 | + self.site.config['CATEGORY_PATH'](lang), name + '-feed.json'] if _f] |
| 167 | + |
| 168 | + def section_index_jsonfeed_path(self, name, lang, **kwargs): |
| 169 | + """Return path to section JSON Feed.""" |
| 170 | + return [_f for _f in [self.site.config['TRANSLATIONS'][lang], |
| 171 | + self.site.config['SECTION_PATH'](lang), name, 'feed.json'] if _f] |
| 172 | + |
| 173 | + def tag_jsonfeed_path(self, name, lang, **kwargs): |
| 174 | + """Return path to tag JSON Feed.""" |
| 175 | + t = self.site.taxonomy_plugins['tag'] |
| 176 | + name = t.slugify_tag_name(name, lang) |
| 177 | + return [_f for _f in [self.site.config['TRANSLATIONS'][lang], |
| 178 | + self.site.config['TAG_PATH'](lang), name + '-feed.json'] if _f] |
| 179 | + |
| 180 | + def get_link(self, path_handler, classification, lang): |
| 181 | + """Get link for a page.""" |
| 182 | + return urljoin(self.site.config['BASE_URL'], self.site.link(path_handler, classification, lang).lstrip('/')) |
| 183 | + |
| 184 | + def jsonfeed_html_link(self, site, context): |
| 185 | + """Generate HTML fragment with link to JSON feed.""" |
| 186 | + pagekind = context['pagekind'] |
| 187 | + lang = context['lang'] |
| 188 | + fragment = '<link rel="alternate" type="application/json" title="{title}" href="{url}">\n' |
| 189 | + if 'main_index' in pagekind: |
| 190 | + path_handler = "index_jsonfeed" |
| 191 | + name = "" |
| 192 | + elif 'author_page' in pagekind: |
| 193 | + path_handler = "author_jsonfeed" |
| 194 | + name = context["author"] |
| 195 | + elif 'tag_page' in pagekind: |
| 196 | + path_handler = context["kind"] + "_jsonfeed" |
| 197 | + name = context[context["kind"]] |
| 198 | + elif 'archive_page' in pagekind: |
| 199 | + path_handler = "archive_jsonfeed" |
| 200 | + if "archive_name" in context: |
| 201 | + name = context["archive_name"] |
| 202 | + else: |
| 203 | + if not self._section_archive_link_warned: |
| 204 | + utils.LOGGER.warning("To create links for section and archive JSON feeds, you need Nikola >= 7.8.6.") |
| 205 | + self._section_archive_link_warned = True |
| 206 | + return '' |
| 207 | + elif 'section_page' in pagekind: |
| 208 | + path_handler = "section_index_jsonfeed" |
| 209 | + if "section" in context: |
| 210 | + name = context["section"] |
| 211 | + else: |
| 212 | + if not self._section_archive_link_warned: |
| 213 | + utils.LOGGER.warning("To create links for section and archive JSON feeds, you need Nikola >= 7.8.6.") |
| 214 | + self._section_archive_link_warned = True |
| 215 | + return '' |
| 216 | + else: |
| 217 | + return '' # Do nothing on unsupported pages |
| 218 | + |
| 219 | + if len(self.site.translations) > 1: |
| 220 | + out = "" |
| 221 | + for lang in self.site.translations: |
| 222 | + title = "JSON Feed ({0})".format(lang) |
| 223 | + url = self.site.link(path_handler, name, lang) |
| 224 | + out += fragment.format(title=title, url=url) |
| 225 | + return out |
| 226 | + else: |
| 227 | + title = "JSON Feed" |
| 228 | + url = self.site.link(path_handler, name, lang) |
| 229 | + return fragment.format(title=title, url=url) |
| 230 | + |
| 231 | + def generate_feed_task(self, lang, title, link, description, timeline, |
| 232 | + feed_url, output_name, primary_author=None): |
| 233 | + """Generate a task to create a feed.""" |
| 234 | + # Build dependency list |
| 235 | + deps = [] |
| 236 | + deps_uptodate = [] |
| 237 | + for post in timeline: |
| 238 | + deps += post.deps(lang) |
| 239 | + deps_uptodate += post.deps_uptodate(lang) |
| 240 | + |
| 241 | + task = { |
| 242 | + 'basename': str(self.name), |
| 243 | + 'name': str(output_name), |
| 244 | + 'targets': [output_name], |
| 245 | + 'file_dep': deps, |
| 246 | + 'task_dep': ['render_posts', 'render_taxonomies'], |
| 247 | + 'actions': [(self.generate_feed, (lang, title, link, description, |
| 248 | + timeline, feed_url, output_name, |
| 249 | + primary_author))], |
| 250 | + 'uptodate': [utils.config_changed(self.kw, 'jsonfeed:' + output_name)] + deps_uptodate, |
| 251 | + 'clean': True |
| 252 | + } |
| 253 | + |
| 254 | + yield utils.apply_filters(task, self.site.config['FILTERS']) |
| 255 | + |
| 256 | + def generate_feed(self, lang, title, link, description, timeline, |
| 257 | + feed_url, output_name, primary_author=None): |
| 258 | + """Generate a feed and write it to file.""" |
| 259 | + utils.LocaleBorg().set_locale(lang) |
| 260 | + items = [] |
| 261 | + for post in timeline: |
| 262 | + item = { |
| 263 | + "id": post.guid(lang), |
| 264 | + "url": post.permalink(lang), |
| 265 | + "title": post.title(lang), |
| 266 | + "date_published": post.date.replace(microsecond=0).isoformat(), |
| 267 | + "date_modified": post.updated.replace(microsecond=0).isoformat(), |
| 268 | + "author": { |
| 269 | + "name": post.author(lang), |
| 270 | + "url": self.site.link("author", post.author(lang), lang) |
| 271 | + }, |
| 272 | + "tags": post.tags_for_language(lang), |
| 273 | + } |
| 274 | + |
| 275 | + if post.updated == post.date: |
| 276 | + del item["date_modified"] |
| 277 | + |
| 278 | + link = post.meta[lang].get('link') |
| 279 | + if link: |
| 280 | + item['external_url'] = link |
| 281 | + |
| 282 | + previewimage = post.meta[lang].get('previewimage') |
| 283 | + if previewimage: |
| 284 | + item['image'] = self.site.url_replacer(post.permalink(), previewimage, lang, 'absolute') |
| 285 | + |
| 286 | + if self.kw['feed_plain']: |
| 287 | + strip_html = True |
| 288 | + content_tag = "content_text" |
| 289 | + else: |
| 290 | + strip_html = False |
| 291 | + content_tag = "content_html" |
| 292 | + |
| 293 | + data = post.text(lang, self.kw['feed_teasers'], strip_html, True, True, self.kw['feed_links_append_query']) |
| 294 | + |
| 295 | + if feed_url is not None and data: |
| 296 | + # Copied from nikola.py |
| 297 | + # Massage the post's HTML (unless plain) |
| 298 | + if not strip_html: |
| 299 | + if self.kw["feed_previewimage"] and 'previewimage' in post.meta[lang] and post.meta[lang]['previewimage'] not in data: |
| 300 | + data = "<figure><img src=\"{}\"></figure> {}".format(post.meta[lang]['previewimage'], data) |
| 301 | + # FIXME: this is duplicated with code in Post.text() |
| 302 | + try: |
| 303 | + doc = lxml.html.document_fromstring(data) |
| 304 | + doc.rewrite_links(lambda dst: self.site.url_replacer(post.permalink(), dst, lang, 'absolute')) |
| 305 | + try: |
| 306 | + body = doc.body |
| 307 | + data = (body.text or '') + ''.join( |
| 308 | + [lxml.html.tostring(child, encoding='unicode') |
| 309 | + for child in body.iterchildren()]) |
| 310 | + except IndexError: # No body there, it happens sometimes |
| 311 | + data = '' |
| 312 | + except lxml.etree.ParserError as e: |
| 313 | + if str(e) == "Document is empty": |
| 314 | + data = "" |
| 315 | + else: # let other errors raise |
| 316 | + raise |
| 317 | + |
| 318 | + item[content_tag] = data |
| 319 | + items.append(item) |
| 320 | + |
| 321 | + if not primary_author: |
| 322 | + # Override for author pages |
| 323 | + primary_author = {"name": self.kw['blog_author'](lang)} |
| 324 | + |
| 325 | + feed = { |
| 326 | + "version": "https://jsonfeed.org/version/1", |
| 327 | + "user_comment": ("This feed allows you to read the posts from this " |
| 328 | + "site in any feed reader that supports the JSON " |
| 329 | + "Feed format. To add " "this feed to your reader, " |
| 330 | + "copy the following URL — " + feed_url + |
| 331 | + " — and add it your reader."), |
| 332 | + "title": title, |
| 333 | + "home_page_url": self.kw['site_url'], |
| 334 | + "feed_url": feed_url, |
| 335 | + "description": description, |
| 336 | + "author": primary_author, |
| 337 | + "items": items |
| 338 | + } |
| 339 | + |
| 340 | + utils.makedirs(os.path.dirname(output_name)) |
| 341 | + |
| 342 | + with io.open(output_name, 'w', encoding='utf-8') as fh: |
| 343 | + json.dump(feed, fh, ensure_ascii=False, indent=4) |
0 commit comments