Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #229 from getnikola/jsonfeed
Initial JSON Feed implementation
- Loading branch information
Showing
4 changed files
with
371 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
An implementation of the [JSON Feed](https://jsonfeed.org/) specification (version 1). | ||
|
||
Supported: | ||
|
||
* archives (`/archives/2017/feed.json` — only if archives are indexes) | ||
* blog index (`/feed.json`) | ||
* author pages (`/authors/john-doe-feed.json`) | ||
* categories (`/categories/cat_foo-feed.json`) | ||
* sections (`/section/feed.json`) | ||
* tags (`/categories/bar-feed.json`) | ||
|
||
Unsupported: | ||
|
||
* galleries (requires some changes to Nikola core) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
# Add links to JSON Feeds to page <head>s, where applicable. | ||
JSONFEED_APPEND_LINKS = True |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
[Core] | ||
Name = jsonfeed | ||
Module = jsonfeed | ||
|
||
[Nikola] | ||
PluginCategory = Task | ||
|
||
[Documentation] | ||
Author = Chris Warrick | ||
Version = 0.1.0 | ||
Website = https://jsonfeed.org/ | ||
Description = Generate JSON Feeds for a Nikola blog. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,343 @@ | ||
# -*- coding: utf-8 -*- | ||
|
||
# Copyright © 2017, Chris Warrick and others. | ||
|
||
# Permission is hereby granted, free of charge, to any | ||
# person obtaining a copy of this software and associated | ||
# documentation files (the "Software"), to deal in the | ||
# Software without restriction, including without limitation | ||
# the rights to use, copy, modify, merge, publish, | ||
# distribute, sublicense, and/or sell copies of the | ||
# Software, and to permit persons to whom the Software is | ||
# furnished to do so, subject to the following conditions: | ||
# | ||
# The above copyright notice and this permission notice | ||
# shall be included in all copies or substantial portions of | ||
# the Software. | ||
# | ||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY | ||
# KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE | ||
# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR | ||
# PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS | ||
# OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR | ||
# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR | ||
# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | ||
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
|
||
"""Generate JSON Feeds.""" | ||
|
||
from __future__ import unicode_literals | ||
import json | ||
import io | ||
import os | ||
import lxml | ||
|
||
from nikola.plugin_categories import Task | ||
from nikola import utils | ||
|
||
try: | ||
from urlparse import urljoin | ||
except ImportError: | ||
from urllib.parse import urljoin # NOQA | ||
|
||
|
||
class JSONFeed(Task): | ||
"""Generate JSON feeds.""" | ||
|
||
name = "jsonfeed" | ||
supported_taxonomies = { | ||
'archive': 'archive_jsonfeed', | ||
'author': 'author_jsonfeed', | ||
'category': 'category_jsonfeed', | ||
'section_index': 'section_index_jsonfeed', | ||
'tag': 'tag_jsonfeed', | ||
} | ||
_section_archive_link_warned = False | ||
|
||
def set_site(self, site): | ||
"""Set site, which is a Nikola instance.""" | ||
super(JSONFeed, self).set_site(site) | ||
|
||
self.kw = { | ||
'feed_links_append_query': self.site.config['FEED_LINKS_APPEND_QUERY'], | ||
'feed_length': self.site.config['FEED_LENGTH'], | ||
'feed_plain': self.site.config['FEED_PLAIN'], | ||
'feed_previewimage': self.site.config['FEED_PREVIEWIMAGE'], | ||
'feed_read_more_link': self.site.config['FEED_READ_MORE_LINK'], | ||
'feed_teasers': self.site.config['FEED_TEASERS'], | ||
'jsonfeed_append_links': self.site.config.get('JSONFEED_APPEND_LINKS', True), | ||
'site_url': self.site.config['SITE_URL'], | ||
'blog_title': self.site.config['BLOG_TITLE'], | ||
'blog_description': self.site.config['BLOG_DESCRIPTION'], | ||
'blog_author': self.site.config['BLOG_AUTHOR'], | ||
'tag_pages_titles': self.site.config['TAG_PAGES_TITLES'], | ||
'category_pages_titles': self.site.config['CATEGORY_PAGES_TITLES'], | ||
'posts_section_title': self.site.config['POSTS_SECTION_TITLE'], | ||
'archives_are_indexes': self.site.config['ARCHIVES_ARE_INDEXES'], | ||
} | ||
|
||
self.site.register_path_handler("index_jsonfeed", self.index_jsonfeed_path) | ||
for t in self.supported_taxonomies.values(): | ||
self.site.register_path_handler(t, getattr(self, t + '_path')) | ||
|
||
# Add links if desired | ||
if self.kw['jsonfeed_append_links']: | ||
self.site.template_hooks['extra_head'].append(self.jsonfeed_html_link, True) | ||
|
||
def gen_tasks(self): | ||
"""Generate JSON feeds.""" | ||
self.site.scan_posts() | ||
yield self.group_task() | ||
|
||
for lang in self.site.translations: | ||
# Main feed | ||
title = self.kw['blog_title'](lang) | ||
link = self.kw['site_url'] | ||
description = self.kw['blog_description'](lang) | ||
timeline = self.site.posts[:self.kw['feed_length']] | ||
output_name = os.path.normpath(os.path.join(self.site.config['OUTPUT_FOLDER'], self.site.path("index_jsonfeed", "", lang))) | ||
feed_url = self.get_link("index_jsonfeed", "", lang) | ||
|
||
yield self.generate_feed_task(lang, title, link, description, | ||
timeline, feed_url, output_name) | ||
|
||
for classification_name, path_handler in self.supported_taxonomies.items(): | ||
taxonomy = self.site.taxonomy_plugins[classification_name] | ||
|
||
if classification_name == "archive" and not self.kw['archives_are_indexes']: | ||
continue | ||
|
||
classification_timelines = {} | ||
for tlang, posts_per_classification in self.site.posts_per_classification[taxonomy.classification_name].items(): | ||
if lang != tlang and not taxonomy.also_create_classifications_from_other_languages: | ||
continue | ||
classification_timelines.update(posts_per_classification) | ||
|
||
for classification, timeline in classification_timelines.items(): | ||
if not classification: | ||
continue | ||
if taxonomy.has_hierarchy: | ||
node = self.site.hierarchy_lookup_per_classification[taxonomy.classification_name][lang][classification] | ||
taxo_context = taxonomy.provide_context_and_uptodate(classification, lang, node)[0] | ||
else: | ||
taxo_context = taxonomy.provide_context_and_uptodate(classification, lang)[0] | ||
title = taxo_context.get('title', classification) | ||
link = self.get_link(classification_name, classification, lang) | ||
description = taxo_context.get('description', self.kw['blog_description'](lang)) | ||
timeline = timeline[:self.kw['feed_length']] | ||
output_name = os.path.normpath(os.path.join(self.site.config['OUTPUT_FOLDER'], self.site.path(path_handler, classification, lang))) | ||
feed_url = self.get_link(path_handler, classification, lang) | ||
|
||
# Special handling for author pages | ||
if classification_name == "author": | ||
primary_author = { | ||
'name': classification, | ||
'url': link | ||
} | ||
else: | ||
primary_author = None | ||
|
||
yield self.generate_feed_task(lang, title, link, description, | ||
timeline, feed_url, output_name, primary_author) | ||
|
||
def index_jsonfeed_path(self, name, lang, **kwargs): | ||
"""Return path to main JSON Feed.""" | ||
return [_f for _f in [self.site.config['TRANSLATIONS'][lang], 'feed.json'] if _f] | ||
|
||
def archive_jsonfeed_path(self, name, lang, **kwargs): | ||
"""Return path to archive JSON Feed.""" | ||
return [_f for _f in [self.site.config['TRANSLATIONS'][lang], | ||
self.site.config['ARCHIVE_PATH'], name, 'feed.json'] if _f] | ||
|
||
def author_jsonfeed_path(self, name, lang, **kwargs): | ||
"""Return path to author JSON Feed.""" | ||
if self.site.config['SLUG_AUTHOR_PATH']: | ||
filename = utils.slugify(name, lang) + '-feed.json' | ||
else: | ||
filename = name + '-feed.json' | ||
return [_f for _f in [self.site.config['TRANSLATIONS'][lang], | ||
self.site.config['AUTHOR_PATH'](lang), filename] if _f] | ||
|
||
def category_jsonfeed_path(self, name, lang, **kwargs): | ||
"""Return path to category JSON Feed.""" | ||
t = self.site.taxonomy_plugins['category'] | ||
name = t.slugify_category_name(t.extract_hierarchy(name), lang)[0] | ||
return [_f for _f in [self.site.config['TRANSLATIONS'][lang], | ||
self.site.config['CATEGORY_PATH'](lang), name + '-feed.json'] if _f] | ||
|
||
def section_index_jsonfeed_path(self, name, lang, **kwargs): | ||
"""Return path to section JSON Feed.""" | ||
return [_f for _f in [self.site.config['TRANSLATIONS'][lang], | ||
self.site.config['SECTION_PATH'](lang), name, 'feed.json'] if _f] | ||
|
||
def tag_jsonfeed_path(self, name, lang, **kwargs): | ||
"""Return path to tag JSON Feed.""" | ||
t = self.site.taxonomy_plugins['tag'] | ||
name = t.slugify_tag_name(name, lang) | ||
return [_f for _f in [self.site.config['TRANSLATIONS'][lang], | ||
self.site.config['TAG_PATH'](lang), name + '-feed.json'] if _f] | ||
|
||
def get_link(self, path_handler, classification, lang): | ||
"""Get link for a page.""" | ||
return urljoin(self.site.config['BASE_URL'], self.site.link(path_handler, classification, lang).lstrip('/')) | ||
|
||
def jsonfeed_html_link(self, site, context): | ||
"""Generate HTML fragment with link to JSON feed.""" | ||
pagekind = context['pagekind'] | ||
lang = context['lang'] | ||
fragment = '<link rel="alternate" type="application/json" title="{title}" href="{url}">\n' | ||
if 'main_index' in pagekind: | ||
path_handler = "index_jsonfeed" | ||
name = "" | ||
elif 'author_page' in pagekind: | ||
path_handler = "author_jsonfeed" | ||
name = context["author"] | ||
elif 'tag_page' in pagekind: | ||
path_handler = context["kind"] + "_jsonfeed" | ||
name = context[context["kind"]] | ||
elif 'archive_page' in pagekind: | ||
path_handler = "archive_jsonfeed" | ||
if "archive_name" in context: | ||
name = context["archive_name"] | ||
else: | ||
if not self._section_archive_link_warned: | ||
utils.LOGGER.warning("To create links for section and archive JSON feeds, you need Nikola >= 7.8.6.") | ||
self._section_archive_link_warned = True | ||
return '' | ||
elif 'section_page' in pagekind: | ||
path_handler = "section_index_jsonfeed" | ||
if "section" in context: | ||
name = context["section"] | ||
else: | ||
if not self._section_archive_link_warned: | ||
utils.LOGGER.warning("To create links for section and archive JSON feeds, you need Nikola >= 7.8.6.") | ||
self._section_archive_link_warned = True | ||
return '' | ||
else: | ||
return '' # Do nothing on unsupported pages | ||
|
||
if len(self.site.translations) > 1: | ||
out = "" | ||
for lang in self.site.translations: | ||
title = "JSON Feed ({0})".format(lang) | ||
url = self.site.link(path_handler, name, lang) | ||
out += fragment.format(title=title, url=url) | ||
return out | ||
else: | ||
title = "JSON Feed" | ||
url = self.site.link(path_handler, name, lang) | ||
return fragment.format(title=title, url=url) | ||
|
||
def generate_feed_task(self, lang, title, link, description, timeline, | ||
feed_url, output_name, primary_author=None): | ||
"""Generate a task to create a feed.""" | ||
# Build dependency list | ||
deps = [] | ||
deps_uptodate = [] | ||
for post in timeline: | ||
deps += post.deps(lang) | ||
deps_uptodate += post.deps_uptodate(lang) | ||
|
||
task = { | ||
'basename': str(self.name), | ||
'name': str(output_name), | ||
'targets': [output_name], | ||
'file_dep': deps, | ||
'task_dep': ['render_posts', 'render_taxonomies'], | ||
'actions': [(self.generate_feed, (lang, title, link, description, | ||
timeline, feed_url, output_name, | ||
primary_author))], | ||
'uptodate': [utils.config_changed(self.kw, 'jsonfeed:' + output_name)] + deps_uptodate, | ||
'clean': True | ||
} | ||
|
||
yield utils.apply_filters(task, self.site.config['FILTERS']) | ||
|
||
def generate_feed(self, lang, title, link, description, timeline, | ||
feed_url, output_name, primary_author=None): | ||
"""Generate a feed and write it to file.""" | ||
utils.LocaleBorg().set_locale(lang) | ||
items = [] | ||
for post in timeline: | ||
item = { | ||
"id": post.guid(lang), | ||
"url": post.permalink(lang), | ||
"title": post.title(lang), | ||
"date_published": post.date.replace(microsecond=0).isoformat(), | ||
"date_modified": post.updated.replace(microsecond=0).isoformat(), | ||
"author": { | ||
"name": post.author(lang), | ||
"url": self.site.link("author", post.author(lang), lang) | ||
}, | ||
"tags": post.tags_for_language(lang), | ||
} | ||
|
||
if post.updated == post.date: | ||
del item["date_modified"] | ||
|
||
link = post.meta[lang].get('link') | ||
if link: | ||
item['external_url'] = link | ||
|
||
previewimage = post.meta[lang].get('previewimage') | ||
if previewimage: | ||
item['image'] = self.site.url_replacer(post.permalink(), previewimage, lang, 'absolute') | ||
|
||
if self.kw['feed_plain']: | ||
strip_html = True | ||
content_tag = "content_text" | ||
else: | ||
strip_html = False | ||
content_tag = "content_html" | ||
|
||
data = post.text(lang, self.kw['feed_teasers'], strip_html, True, True, self.kw['feed_links_append_query']) | ||
|
||
if feed_url is not None and data: | ||
# Copied from nikola.py | ||
# Massage the post's HTML (unless plain) | ||
if not strip_html: | ||
if self.kw["feed_previewimage"] and 'previewimage' in post.meta[lang] and post.meta[lang]['previewimage'] not in data: | ||
data = "<figure><img src=\"{}\"></figure> {}".format(post.meta[lang]['previewimage'], data) | ||
# FIXME: this is duplicated with code in Post.text() | ||
try: | ||
doc = lxml.html.document_fromstring(data) | ||
doc.rewrite_links(lambda dst: self.site.url_replacer(post.permalink(), dst, lang, 'absolute')) | ||
try: | ||
body = doc.body | ||
data = (body.text or '') + ''.join( | ||
[lxml.html.tostring(child, encoding='unicode') | ||
for child in body.iterchildren()]) | ||
except IndexError: # No body there, it happens sometimes | ||
data = '' | ||
except lxml.etree.ParserError as e: | ||
if str(e) == "Document is empty": | ||
data = "" | ||
else: # let other errors raise | ||
raise | ||
|
||
item[content_tag] = data | ||
items.append(item) | ||
|
||
if not primary_author: | ||
# Override for author pages | ||
primary_author = {"name": self.kw['blog_author'](lang)} | ||
|
||
feed = { | ||
"version": "https://jsonfeed.org/version/1", | ||
"user_comment": ("This feed allows you to read the posts from this " | ||
"site in any feed reader that supports the JSON " | ||
"Feed format. To add " "this feed to your reader, " | ||
"copy the following URL — " + feed_url + | ||
" — and add it your reader."), | ||
"title": title, | ||
"home_page_url": self.kw['site_url'], | ||
"feed_url": feed_url, | ||
"description": description, | ||
"author": primary_author, | ||
"items": items | ||
} | ||
|
||
utils.makedirs(os.path.dirname(output_name)) | ||
|
||
with io.open(output_name, 'w', encoding='utf-8') as fh: | ||
json.dump(feed, fh, ensure_ascii=False, indent=4) |