Skip to content

Commit 953e196

Browse files
authoredMay 22, 2017
Merge pull request #229 from getnikola/jsonfeed
Initial JSON Feed implementation
2 parents 54b2bec + f4f03a0 commit 953e196

File tree

4 files changed

+371
-0
lines changed

4 files changed

+371
-0
lines changed
 

‎v7/jsonfeed/README.md

+14
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
An implementation of the [JSON Feed](https://jsonfeed.org/) specification (version 1).
2+
3+
Supported:
4+
5+
* archives (`/archives/2017/feed.json` — only if archives are indexes)
6+
* blog index (`/feed.json`)
7+
* author pages (`/authors/john-doe-feed.json`)
8+
* categories (`/categories/cat_foo-feed.json`)
9+
* sections (`/section/feed.json`)
10+
* tags (`/categories/bar-feed.json`)
11+
12+
Unsupported:
13+
14+
* galleries (requires some changes to Nikola core)

‎v7/jsonfeed/conf.py.sample

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
# Add links to JSON Feeds to page <head>s, where applicable.
2+
JSONFEED_APPEND_LINKS = True

‎v7/jsonfeed/jsonfeed.plugin

+12
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
[Core]
2+
Name = jsonfeed
3+
Module = jsonfeed
4+
5+
[Nikola]
6+
PluginCategory = Task
7+
8+
[Documentation]
9+
Author = Chris Warrick
10+
Version = 0.1.0
11+
Website = https://jsonfeed.org/
12+
Description = Generate JSON Feeds for a Nikola blog.

‎v7/jsonfeed/jsonfeed.py

+343
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,343 @@
1+
# -*- coding: utf-8 -*-
2+
3+
# Copyright © 2017, Chris Warrick and others.
4+
5+
# Permission is hereby granted, free of charge, to any
6+
# person obtaining a copy of this software and associated
7+
# documentation files (the "Software"), to deal in the
8+
# Software without restriction, including without limitation
9+
# the rights to use, copy, modify, merge, publish,
10+
# distribute, sublicense, and/or sell copies of the
11+
# Software, and to permit persons to whom the Software is
12+
# furnished to do so, subject to the following conditions:
13+
#
14+
# The above copyright notice and this permission notice
15+
# shall be included in all copies or substantial portions of
16+
# the Software.
17+
#
18+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
19+
# KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
20+
# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
21+
# PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS
22+
# OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
23+
# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
24+
# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25+
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26+
27+
"""Generate JSON Feeds."""
28+
29+
from __future__ import unicode_literals
30+
import json
31+
import io
32+
import os
33+
import lxml
34+
35+
from nikola.plugin_categories import Task
36+
from nikola import utils
37+
38+
try:
39+
from urlparse import urljoin
40+
except ImportError:
41+
from urllib.parse import urljoin # NOQA
42+
43+
44+
class JSONFeed(Task):
45+
"""Generate JSON feeds."""
46+
47+
name = "jsonfeed"
48+
supported_taxonomies = {
49+
'archive': 'archive_jsonfeed',
50+
'author': 'author_jsonfeed',
51+
'category': 'category_jsonfeed',
52+
'section_index': 'section_index_jsonfeed',
53+
'tag': 'tag_jsonfeed',
54+
}
55+
_section_archive_link_warned = False
56+
57+
def set_site(self, site):
58+
"""Set site, which is a Nikola instance."""
59+
super(JSONFeed, self).set_site(site)
60+
61+
self.kw = {
62+
'feed_links_append_query': self.site.config['FEED_LINKS_APPEND_QUERY'],
63+
'feed_length': self.site.config['FEED_LENGTH'],
64+
'feed_plain': self.site.config['FEED_PLAIN'],
65+
'feed_previewimage': self.site.config['FEED_PREVIEWIMAGE'],
66+
'feed_read_more_link': self.site.config['FEED_READ_MORE_LINK'],
67+
'feed_teasers': self.site.config['FEED_TEASERS'],
68+
'jsonfeed_append_links': self.site.config.get('JSONFEED_APPEND_LINKS', True),
69+
'site_url': self.site.config['SITE_URL'],
70+
'blog_title': self.site.config['BLOG_TITLE'],
71+
'blog_description': self.site.config['BLOG_DESCRIPTION'],
72+
'blog_author': self.site.config['BLOG_AUTHOR'],
73+
'tag_pages_titles': self.site.config['TAG_PAGES_TITLES'],
74+
'category_pages_titles': self.site.config['CATEGORY_PAGES_TITLES'],
75+
'posts_section_title': self.site.config['POSTS_SECTION_TITLE'],
76+
'archives_are_indexes': self.site.config['ARCHIVES_ARE_INDEXES'],
77+
}
78+
79+
self.site.register_path_handler("index_jsonfeed", self.index_jsonfeed_path)
80+
for t in self.supported_taxonomies.values():
81+
self.site.register_path_handler(t, getattr(self, t + '_path'))
82+
83+
# Add links if desired
84+
if self.kw['jsonfeed_append_links']:
85+
self.site.template_hooks['extra_head'].append(self.jsonfeed_html_link, True)
86+
87+
def gen_tasks(self):
88+
"""Generate JSON feeds."""
89+
self.site.scan_posts()
90+
yield self.group_task()
91+
92+
for lang in self.site.translations:
93+
# Main feed
94+
title = self.kw['blog_title'](lang)
95+
link = self.kw['site_url']
96+
description = self.kw['blog_description'](lang)
97+
timeline = self.site.posts[:self.kw['feed_length']]
98+
output_name = os.path.normpath(os.path.join(self.site.config['OUTPUT_FOLDER'], self.site.path("index_jsonfeed", "", lang)))
99+
feed_url = self.get_link("index_jsonfeed", "", lang)
100+
101+
yield self.generate_feed_task(lang, title, link, description,
102+
timeline, feed_url, output_name)
103+
104+
for classification_name, path_handler in self.supported_taxonomies.items():
105+
taxonomy = self.site.taxonomy_plugins[classification_name]
106+
107+
if classification_name == "archive" and not self.kw['archives_are_indexes']:
108+
continue
109+
110+
classification_timelines = {}
111+
for tlang, posts_per_classification in self.site.posts_per_classification[taxonomy.classification_name].items():
112+
if lang != tlang and not taxonomy.also_create_classifications_from_other_languages:
113+
continue
114+
classification_timelines.update(posts_per_classification)
115+
116+
for classification, timeline in classification_timelines.items():
117+
if not classification:
118+
continue
119+
if taxonomy.has_hierarchy:
120+
node = self.site.hierarchy_lookup_per_classification[taxonomy.classification_name][lang][classification]
121+
taxo_context = taxonomy.provide_context_and_uptodate(classification, lang, node)[0]
122+
else:
123+
taxo_context = taxonomy.provide_context_and_uptodate(classification, lang)[0]
124+
title = taxo_context.get('title', classification)
125+
link = self.get_link(classification_name, classification, lang)
126+
description = taxo_context.get('description', self.kw['blog_description'](lang))
127+
timeline = timeline[:self.kw['feed_length']]
128+
output_name = os.path.normpath(os.path.join(self.site.config['OUTPUT_FOLDER'], self.site.path(path_handler, classification, lang)))
129+
feed_url = self.get_link(path_handler, classification, lang)
130+
131+
# Special handling for author pages
132+
if classification_name == "author":
133+
primary_author = {
134+
'name': classification,
135+
'url': link
136+
}
137+
else:
138+
primary_author = None
139+
140+
yield self.generate_feed_task(lang, title, link, description,
141+
timeline, feed_url, output_name, primary_author)
142+
143+
def index_jsonfeed_path(self, name, lang, **kwargs):
144+
"""Return path to main JSON Feed."""
145+
return [_f for _f in [self.site.config['TRANSLATIONS'][lang], 'feed.json'] if _f]
146+
147+
def archive_jsonfeed_path(self, name, lang, **kwargs):
148+
"""Return path to archive JSON Feed."""
149+
return [_f for _f in [self.site.config['TRANSLATIONS'][lang],
150+
self.site.config['ARCHIVE_PATH'], name, 'feed.json'] if _f]
151+
152+
def author_jsonfeed_path(self, name, lang, **kwargs):
153+
"""Return path to author JSON Feed."""
154+
if self.site.config['SLUG_AUTHOR_PATH']:
155+
filename = utils.slugify(name, lang) + '-feed.json'
156+
else:
157+
filename = name + '-feed.json'
158+
return [_f for _f in [self.site.config['TRANSLATIONS'][lang],
159+
self.site.config['AUTHOR_PATH'](lang), filename] if _f]
160+
161+
def category_jsonfeed_path(self, name, lang, **kwargs):
162+
"""Return path to category JSON Feed."""
163+
t = self.site.taxonomy_plugins['category']
164+
name = t.slugify_category_name(t.extract_hierarchy(name), lang)[0]
165+
return [_f for _f in [self.site.config['TRANSLATIONS'][lang],
166+
self.site.config['CATEGORY_PATH'](lang), name + '-feed.json'] if _f]
167+
168+
def section_index_jsonfeed_path(self, name, lang, **kwargs):
169+
"""Return path to section JSON Feed."""
170+
return [_f for _f in [self.site.config['TRANSLATIONS'][lang],
171+
self.site.config['SECTION_PATH'](lang), name, 'feed.json'] if _f]
172+
173+
def tag_jsonfeed_path(self, name, lang, **kwargs):
174+
"""Return path to tag JSON Feed."""
175+
t = self.site.taxonomy_plugins['tag']
176+
name = t.slugify_tag_name(name, lang)
177+
return [_f for _f in [self.site.config['TRANSLATIONS'][lang],
178+
self.site.config['TAG_PATH'](lang), name + '-feed.json'] if _f]
179+
180+
def get_link(self, path_handler, classification, lang):
181+
"""Get link for a page."""
182+
return urljoin(self.site.config['BASE_URL'], self.site.link(path_handler, classification, lang).lstrip('/'))
183+
184+
def jsonfeed_html_link(self, site, context):
185+
"""Generate HTML fragment with link to JSON feed."""
186+
pagekind = context['pagekind']
187+
lang = context['lang']
188+
fragment = '<link rel="alternate" type="application/json" title="{title}" href="{url}">\n'
189+
if 'main_index' in pagekind:
190+
path_handler = "index_jsonfeed"
191+
name = ""
192+
elif 'author_page' in pagekind:
193+
path_handler = "author_jsonfeed"
194+
name = context["author"]
195+
elif 'tag_page' in pagekind:
196+
path_handler = context["kind"] + "_jsonfeed"
197+
name = context[context["kind"]]
198+
elif 'archive_page' in pagekind:
199+
path_handler = "archive_jsonfeed"
200+
if "archive_name" in context:
201+
name = context["archive_name"]
202+
else:
203+
if not self._section_archive_link_warned:
204+
utils.LOGGER.warning("To create links for section and archive JSON feeds, you need Nikola >= 7.8.6.")
205+
self._section_archive_link_warned = True
206+
return ''
207+
elif 'section_page' in pagekind:
208+
path_handler = "section_index_jsonfeed"
209+
if "section" in context:
210+
name = context["section"]
211+
else:
212+
if not self._section_archive_link_warned:
213+
utils.LOGGER.warning("To create links for section and archive JSON feeds, you need Nikola >= 7.8.6.")
214+
self._section_archive_link_warned = True
215+
return ''
216+
else:
217+
return '' # Do nothing on unsupported pages
218+
219+
if len(self.site.translations) > 1:
220+
out = ""
221+
for lang in self.site.translations:
222+
title = "JSON Feed ({0})".format(lang)
223+
url = self.site.link(path_handler, name, lang)
224+
out += fragment.format(title=title, url=url)
225+
return out
226+
else:
227+
title = "JSON Feed"
228+
url = self.site.link(path_handler, name, lang)
229+
return fragment.format(title=title, url=url)
230+
231+
def generate_feed_task(self, lang, title, link, description, timeline,
232+
feed_url, output_name, primary_author=None):
233+
"""Generate a task to create a feed."""
234+
# Build dependency list
235+
deps = []
236+
deps_uptodate = []
237+
for post in timeline:
238+
deps += post.deps(lang)
239+
deps_uptodate += post.deps_uptodate(lang)
240+
241+
task = {
242+
'basename': str(self.name),
243+
'name': str(output_name),
244+
'targets': [output_name],
245+
'file_dep': deps,
246+
'task_dep': ['render_posts', 'render_taxonomies'],
247+
'actions': [(self.generate_feed, (lang, title, link, description,
248+
timeline, feed_url, output_name,
249+
primary_author))],
250+
'uptodate': [utils.config_changed(self.kw, 'jsonfeed:' + output_name)] + deps_uptodate,
251+
'clean': True
252+
}
253+
254+
yield utils.apply_filters(task, self.site.config['FILTERS'])
255+
256+
def generate_feed(self, lang, title, link, description, timeline,
257+
feed_url, output_name, primary_author=None):
258+
"""Generate a feed and write it to file."""
259+
utils.LocaleBorg().set_locale(lang)
260+
items = []
261+
for post in timeline:
262+
item = {
263+
"id": post.guid(lang),
264+
"url": post.permalink(lang),
265+
"title": post.title(lang),
266+
"date_published": post.date.replace(microsecond=0).isoformat(),
267+
"date_modified": post.updated.replace(microsecond=0).isoformat(),
268+
"author": {
269+
"name": post.author(lang),
270+
"url": self.site.link("author", post.author(lang), lang)
271+
},
272+
"tags": post.tags_for_language(lang),
273+
}
274+
275+
if post.updated == post.date:
276+
del item["date_modified"]
277+
278+
link = post.meta[lang].get('link')
279+
if link:
280+
item['external_url'] = link
281+
282+
previewimage = post.meta[lang].get('previewimage')
283+
if previewimage:
284+
item['image'] = self.site.url_replacer(post.permalink(), previewimage, lang, 'absolute')
285+
286+
if self.kw['feed_plain']:
287+
strip_html = True
288+
content_tag = "content_text"
289+
else:
290+
strip_html = False
291+
content_tag = "content_html"
292+
293+
data = post.text(lang, self.kw['feed_teasers'], strip_html, True, True, self.kw['feed_links_append_query'])
294+
295+
if feed_url is not None and data:
296+
# Copied from nikola.py
297+
# Massage the post's HTML (unless plain)
298+
if not strip_html:
299+
if self.kw["feed_previewimage"] and 'previewimage' in post.meta[lang] and post.meta[lang]['previewimage'] not in data:
300+
data = "<figure><img src=\"{}\"></figure> {}".format(post.meta[lang]['previewimage'], data)
301+
# FIXME: this is duplicated with code in Post.text()
302+
try:
303+
doc = lxml.html.document_fromstring(data)
304+
doc.rewrite_links(lambda dst: self.site.url_replacer(post.permalink(), dst, lang, 'absolute'))
305+
try:
306+
body = doc.body
307+
data = (body.text or '') + ''.join(
308+
[lxml.html.tostring(child, encoding='unicode')
309+
for child in body.iterchildren()])
310+
except IndexError: # No body there, it happens sometimes
311+
data = ''
312+
except lxml.etree.ParserError as e:
313+
if str(e) == "Document is empty":
314+
data = ""
315+
else: # let other errors raise
316+
raise
317+
318+
item[content_tag] = data
319+
items.append(item)
320+
321+
if not primary_author:
322+
# Override for author pages
323+
primary_author = {"name": self.kw['blog_author'](lang)}
324+
325+
feed = {
326+
"version": "https://jsonfeed.org/version/1",
327+
"user_comment": ("This feed allows you to read the posts from this "
328+
"site in any feed reader that supports the JSON "
329+
"Feed format. To add " "this feed to your reader, "
330+
"copy the following URL — " + feed_url +
331+
" — and add it your reader."),
332+
"title": title,
333+
"home_page_url": self.kw['site_url'],
334+
"feed_url": feed_url,
335+
"description": description,
336+
"author": primary_author,
337+
"items": items
338+
}
339+
340+
utils.makedirs(os.path.dirname(output_name))
341+
342+
with io.open(output_name, 'w', encoding='utf-8') as fh:
343+
json.dump(feed, fh, ensure_ascii=False, indent=4)

0 commit comments

Comments
 (0)
Please sign in to comment.