Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
make it fail
  • Loading branch information
ralsina committed Jun 4, 2015
1 parent d6a60d5 commit 8bd33e4
Show file tree
Hide file tree
Showing 5 changed files with 31 additions and 14 deletions.
1 change: 1 addition & 0 deletions CHANGES.txt
Expand Up @@ -4,6 +4,7 @@ New in master
Features
--------

* New html5lib serializer creates better HTML (Issue #1768)
* New --get-path option for ``nikola install_theme`` (Issue #1762)
* New `nikola rst2html` command (Issue #1710)
* New `nikola status` command (Issue #1740)
Expand Down
10 changes: 3 additions & 7 deletions nikola/nikola.py
Expand Up @@ -974,9 +974,7 @@ def render_template(self, template_name, output_name, context):
parser = lxml.html.HTMLParser(remove_blank_text=True)
doc = lxml.html.document_fromstring(data, parser)
doc.rewrite_links(lambda dst: self.url_replacer(src, dst, context['lang']))
data = b'<!DOCTYPE html>\n' + lxml.html.tostring(doc, encoding='utf8', method='html', pretty_print=True)
with open(output_name, "wb+") as post_file:
post_file.write(data)
utils.save_doc(doc, output_name)

def url_replacer(self, src, dst, lang=None, url_type=None):
"""URL mangler.
Expand Down Expand Up @@ -1131,8 +1129,7 @@ def generic_rss_renderer(self, lang, title, link, description, timeline, output_
try:
body = doc.body
data = (body.text or '') + ''.join(
[lxml.html.tostring(child, encoding='unicode')
for child in body.iterchildren()])
[utils.doc_tostring(child) for child in body.iterchildren()])
except IndexError: # No body there, it happens sometimes
data = ''
except lxml.etree.ParserError as e:
Expand Down Expand Up @@ -1694,8 +1691,7 @@ def atom_link(link_rel, link_type, link_href):
try:
body = doc.body
data = (body.text or '') + ''.join(
[lxml.html.tostring(child, encoding='unicode')
for child in body.iterchildren()])
[utils.doc_tostring(child) for child in body.iterchildren()])
except IndexError: # No body there, it happens sometimes
data = ''
except lxml.etree.ParserError as e:
Expand Down
12 changes: 6 additions & 6 deletions nikola/post.py
Expand Up @@ -579,9 +579,9 @@ def text(self, lang=None, teaser_only=False, strip_html=False, show_read_more_li
hyphenate(document, lang)

try:
data = lxml.html.tostring(document.body, encoding='unicode')
data = utils.doc_tostring(document.body)
except:
data = lxml.html.tostring(document, encoding='unicode')
data = utils.doc_tostring(document)

if teaser_only:
teaser = TEASER_REGEXP.split(data)[0]
Expand All @@ -604,9 +604,9 @@ def text(self, lang=None, teaser_only=False, strip_html=False, show_read_more_li
# This closes all open tags and sanitizes the broken HTML
document = lxml.html.fromstring(teaser)
try:
data = lxml.html.tostring(document.body, encoding='unicode')
data = utils.doc_tostring(document.body)
except IndexError:
data = lxml.html.tostring(document, encoding='unicode')
data = utils.doc_tostring(document)

if data and strip_html:
try:
Expand All @@ -621,9 +621,9 @@ def text(self, lang=None, teaser_only=False, strip_html=False, show_read_more_li
try:
document = lxml.html.fromstring(data)
demote_headers(document, self.demote_headers)
data = lxml.html.tostring(document.body, encoding='unicode')
data = utils.doc_tostring(document.body)
except (lxml.etree.ParserError, IndexError):
data = lxml.html.tostring(document, encoding='unicode')
data = utils.doc_tostring(document)

return data

Expand Down
21 changes: 20 additions & 1 deletion nikola/utils.py
Expand Up @@ -28,6 +28,7 @@

from __future__ import print_function, unicode_literals, absolute_import
import calendar
import codecs
import datetime
import dateutil.tz
import hashlib
Expand All @@ -47,6 +48,8 @@
import warnings
import PyRSS2Gen as rss
from collections import defaultdict, Callable

import html5lib
from logbook.more import ExceptionHandler, ColorizedStderrHandler
from pygments.formatters import HtmlFormatter
from zipfile import ZipFile as zipf
Expand All @@ -69,7 +72,7 @@
'adjust_name_for_index_path', 'adjust_name_for_index_link',
'NikolaPygmentsHTML', 'create_redirect', 'TreeNode',
'flatten_tree_structure', 'parse_escaped_hierarchical_category_name',
'join_hierarchical_category_path']
'join_hierarchical_category_path', 'doc_tostring', 'save_doc']

# Are you looking for 'generic_rss_renderer'?
# It's defined in nikola.nikola.Nikola (the site object).
Expand Down Expand Up @@ -1617,3 +1620,19 @@ def escape(s):
return s.replace('\\', '\\\\').replace('/', '\\/')

return '/'.join([escape(p) for p in category_path])


def doc_tostring(doc, **opts):
"""Convert a LXML doc to a string. Always returns unicode."""
print('======>', doc); sys.stdout.flush()
data = html5lib.serializer.serialize(doc, tree="lxml", **opts)
return data


def save_doc(doc, dst, **opts):
"""Serialize a LXML doc and save it in the path given by dst."""
dst_dir = os.path.dirname(dst)
makedirs(dst_dir)
data = doc_tostring(doc, **opts)
with codecs.open(dst, 'wb+', 'utf8') as outf:
outf.write(data)
1 change: 1 addition & 0 deletions requirements.txt
Expand Up @@ -12,3 +12,4 @@ logbook>=0.7.0
blinker>=1.3
setuptools>=5.4.1
natsort>=3.5.2
html5lib>=0.999

0 comments on commit 8bd33e4

Please sign in to comment.