Skip to content

Commit 8bd33e4

Browse files
committedJun 4, 2015
make it fail
1 parent d6a60d5 commit 8bd33e4

File tree

5 files changed

+31
-14
lines changed

5 files changed

+31
-14
lines changed
 

‎CHANGES.txt

+1
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ New in master
44
Features
55
--------
66

7+
* New html5lib serializer creates better HTML (Issue #1768)
78
* New --get-path option for ``nikola install_theme`` (Issue #1762)
89
* New `nikola rst2html` command (Issue #1710)
910
* New `nikola status` command (Issue #1740)

‎nikola/nikola.py

+3-7
Original file line numberDiff line numberDiff line change
@@ -974,9 +974,7 @@ def render_template(self, template_name, output_name, context):
974974
parser = lxml.html.HTMLParser(remove_blank_text=True)
975975
doc = lxml.html.document_fromstring(data, parser)
976976
doc.rewrite_links(lambda dst: self.url_replacer(src, dst, context['lang']))
977-
data = b'<!DOCTYPE html>\n' + lxml.html.tostring(doc, encoding='utf8', method='html', pretty_print=True)
978-
with open(output_name, "wb+") as post_file:
979-
post_file.write(data)
977+
utils.save_doc(doc, output_name)
980978

981979
def url_replacer(self, src, dst, lang=None, url_type=None):
982980
"""URL mangler.
@@ -1131,8 +1129,7 @@ def generic_rss_renderer(self, lang, title, link, description, timeline, output_
11311129
try:
11321130
body = doc.body
11331131
data = (body.text or '') + ''.join(
1134-
[lxml.html.tostring(child, encoding='unicode')
1135-
for child in body.iterchildren()])
1132+
[utils.doc_tostring(child) for child in body.iterchildren()])
11361133
except IndexError: # No body there, it happens sometimes
11371134
data = ''
11381135
except lxml.etree.ParserError as e:
@@ -1694,8 +1691,7 @@ def atom_link(link_rel, link_type, link_href):
16941691
try:
16951692
body = doc.body
16961693
data = (body.text or '') + ''.join(
1697-
[lxml.html.tostring(child, encoding='unicode')
1698-
for child in body.iterchildren()])
1694+
[utils.doc_tostring(child) for child in body.iterchildren()])
16991695
except IndexError: # No body there, it happens sometimes
17001696
data = ''
17011697
except lxml.etree.ParserError as e:

‎nikola/post.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -579,9 +579,9 @@ def text(self, lang=None, teaser_only=False, strip_html=False, show_read_more_li
579579
hyphenate(document, lang)
580580

581581
try:
582-
data = lxml.html.tostring(document.body, encoding='unicode')
582+
data = utils.doc_tostring(document.body)
583583
except:
584-
data = lxml.html.tostring(document, encoding='unicode')
584+
data = utils.doc_tostring(document)
585585

586586
if teaser_only:
587587
teaser = TEASER_REGEXP.split(data)[0]
@@ -604,9 +604,9 @@ def text(self, lang=None, teaser_only=False, strip_html=False, show_read_more_li
604604
# This closes all open tags and sanitizes the broken HTML
605605
document = lxml.html.fromstring(teaser)
606606
try:
607-
data = lxml.html.tostring(document.body, encoding='unicode')
607+
data = utils.doc_tostring(document.body)
608608
except IndexError:
609-
data = lxml.html.tostring(document, encoding='unicode')
609+
data = utils.doc_tostring(document)
610610

611611
if data and strip_html:
612612
try:
@@ -621,9 +621,9 @@ def text(self, lang=None, teaser_only=False, strip_html=False, show_read_more_li
621621
try:
622622
document = lxml.html.fromstring(data)
623623
demote_headers(document, self.demote_headers)
624-
data = lxml.html.tostring(document.body, encoding='unicode')
624+
data = utils.doc_tostring(document.body)
625625
except (lxml.etree.ParserError, IndexError):
626-
data = lxml.html.tostring(document, encoding='unicode')
626+
data = utils.doc_tostring(document)
627627

628628
return data
629629

‎nikola/utils.py

+20-1
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828

2929
from __future__ import print_function, unicode_literals, absolute_import
3030
import calendar
31+
import codecs
3132
import datetime
3233
import dateutil.tz
3334
import hashlib
@@ -47,6 +48,8 @@
4748
import warnings
4849
import PyRSS2Gen as rss
4950
from collections import defaultdict, Callable
51+
52+
import html5lib
5053
from logbook.more import ExceptionHandler, ColorizedStderrHandler
5154
from pygments.formatters import HtmlFormatter
5255
from zipfile import ZipFile as zipf
@@ -69,7 +72,7 @@
6972
'adjust_name_for_index_path', 'adjust_name_for_index_link',
7073
'NikolaPygmentsHTML', 'create_redirect', 'TreeNode',
7174
'flatten_tree_structure', 'parse_escaped_hierarchical_category_name',
72-
'join_hierarchical_category_path']
75+
'join_hierarchical_category_path', 'doc_tostring', 'save_doc']
7376

7477
# Are you looking for 'generic_rss_renderer'?
7578
# It's defined in nikola.nikola.Nikola (the site object).
@@ -1617,3 +1620,19 @@ def escape(s):
16171620
return s.replace('\\', '\\\\').replace('/', '\\/')
16181621

16191622
return '/'.join([escape(p) for p in category_path])
1623+
1624+
1625+
def doc_tostring(doc, **opts):
1626+
"""Convert a LXML doc to a string. Always returns unicode."""
1627+
print('======>', doc); sys.stdout.flush()
1628+
data = html5lib.serializer.serialize(doc, tree="lxml", **opts)
1629+
return data
1630+
1631+
1632+
def save_doc(doc, dst, **opts):
1633+
"""Serialize a LXML doc and save it in the path given by dst."""
1634+
dst_dir = os.path.dirname(dst)
1635+
makedirs(dst_dir)
1636+
data = doc_tostring(doc, **opts)
1637+
with codecs.open(dst, 'wb+', 'utf8') as outf:
1638+
outf.write(data)

‎requirements.txt

+1
Original file line numberDiff line numberDiff line change
@@ -12,3 +12,4 @@ logbook>=0.7.0
1212
blinker>=1.3
1313
setuptools>=5.4.1
1414
natsort>=3.5.2
15+
html5lib>=0.999

0 commit comments

Comments
 (0)
Failed to load comments.