Skip to content

Commit

Permalink
Merge branch 'master' into fix-2059
Browse files Browse the repository at this point in the history
  • Loading branch information
ralsina committed Sep 10, 2015
2 parents 0117fa3 + 20c814b commit ba99000
Show file tree
Hide file tree
Showing 8 changed files with 66 additions and 25 deletions.
4 changes: 3 additions & 1 deletion CHANGES.txt
Expand Up @@ -3,7 +3,9 @@ New in master

Features
--------

* Support UTF-8 paths and encoded links when the ``USE_SLUGIFY`` option
is disabled. (Issue #2037)
* Per-document hyphenation using "hyphenate" metadata flag.
* New option USE_KATEX to switch from MathJax to KaTeX (Experimental).
* Support SVG in galleries (Issue #1605)
* Made TAG_PATH translatable (Issue #1914)
Expand Down
4 changes: 4 additions & 0 deletions docs/manual.txt
Expand Up @@ -313,6 +313,10 @@ to your configuration:
Set "True" if you do not want to see the **page** title as a
heading of the output html file (does not work for posts).

hyphenate
Set "True" if you want this document to be hyphenated even if you have
hyphenation disabled by default.

nocomments
Set to "True" to disable comments. Example:

Expand Down
19 changes: 12 additions & 7 deletions nikola/nikola.py
Expand Up @@ -1252,7 +1252,7 @@ def generic_rss_renderer(self, lang, title, link, description, timeline, output_
"""Take all necessary data, and render a RSS feed in output_path."""
rss_obj = utils.ExtendedRSS2(
title=title,
link=link,
link=utils.encodelink(link),
description=description,
lastBuildDate=datetime.datetime.utcnow(),
generator='https://getnikola.com/',
Expand Down Expand Up @@ -1447,7 +1447,9 @@ def register_path_handler(self, kind, f):

def link(self, *args):
"""Create a link."""
return self.path(*args, is_link=True)
url = self.path(*args, is_link=True)
url = utils.encodelink(url)
return url

def abs_link(self, dst, protocol_relative=False):
"""Get an absolute link."""
Expand All @@ -1459,6 +1461,7 @@ def abs_link(self, dst, protocol_relative=False):
url = urlparse(dst).geturl()
if protocol_relative:
url = url.split(":", 1)[1]
url = utils.encodelink(url)
return url

def rel_link(self, src, dst):
Expand All @@ -1473,7 +1476,7 @@ def rel_link(self, src, dst):
parsed_src = urlsplit(src)
parsed_dst = urlsplit(dst)
if parsed_src[:2] != parsed_dst[:2]:
return dst
return utils.encodelink(dst)
# Now both paths are on the same site and absolute
src_elems = parsed_src.path.split('/')[1:]
dst_elems = parsed_dst.path.split('/')[1:]
Expand All @@ -1484,7 +1487,9 @@ def rel_link(self, src, dst):
else:
i += 1
# Now i is the longest common prefix
return '/'.join(['..'] * (len(src_elems) - i - 1) + dst_elems[i:])
url = '/'.join(['..'] * (len(src_elems) - i - 1) + dst_elems[i:])
url = utils.encodelink(url)
return url

def file_exists(self, path, not_empty=False):
"""Check if the file exists. If not_empty is True, it also must not be empty."""
Expand Down Expand Up @@ -1635,7 +1640,7 @@ def scan_posts(self, really=False, ignore_quit=False, quiet=False):
utils.LOGGER.error('Tag {0} is used in: {1}'.format(other_tag, ', '.join([p.source_path for p in self.posts_per_tag[other_tag]])))
quit = True
else:
slugged_tags.add(utils.slugify(tag, force=True))
slugged_tags.add(utils.slugify(tag))
self.posts_per_tag[tag].append(post)
for lang in self.config['TRANSLATIONS'].keys():
self.tags_per_language[lang].extend(post.tags_for_language(lang))
Expand Down Expand Up @@ -1792,7 +1797,7 @@ def atom_link(link_rel, link_type, link_href):
link = lxml.etree.Element("link")
link.set("rel", link_rel)
link.set("type", link_type)
link.set("href", link_href)
link.set("href", utils.encodelink(link_href))
return link

deps = []
Expand Down Expand Up @@ -1828,7 +1833,7 @@ def atom_link(link_rel, link_type, link_href):
feed_root = lxml.etree.Element("feed", nsmap=nslist)
feed_root.addprevious(lxml.etree.ProcessingInstruction(
"xml-stylesheet",
'href="' + feed_xsl_link + '" type="text/xsl media="all"'))
'href="' + utils.encodelink(feed_xsl_link) + '" type="text/xsl media="all"'))
feed_root.set("{http://www.w3.org/XML/1998/namespace}lang", lang)
feed_root.set("xmlns", "http://www.w3.org/2005/Atom")
feed_title = lxml.etree.SubElement(feed_root, "title")
Expand Down
1 change: 1 addition & 0 deletions nikola/plugins/command/auto/__init__.py
Expand Up @@ -262,6 +262,7 @@ def do_rebuild(self, event):
fname = os.path.basename(event_path)
if (fname.endswith('~') or
fname.startswith('.') or
'__pycache__' in event_path or
os.path.isdir(event_path)): # Skip on folders, these are usually duplicates
return
self.logger.info('REBUILDING SITE (from {0})'.format(event_path))
Expand Down
10 changes: 6 additions & 4 deletions nikola/plugins/command/check.py
Expand Up @@ -212,7 +212,7 @@ def analyze(self, fname, find_sources=False, check_remote=False):
# Quietly ignore files that don’t exist; use `nikola check -f` instead (Issue #1831)
return False

if '.html' == fname[-5:]: # DISABLED
if '.html' == fname[-5:]:
d = lxml.html.fromstring(open(filename, 'rb').read())
extra_objs = lxml.html.fromstring('<html/>')

Expand Down Expand Up @@ -323,8 +323,9 @@ def analyze(self, fname, find_sources=False, check_remote=False):
target_filename = os.path.abspath(
os.path.join(self.site.config['OUTPUT_FOLDER'], unquote(target.lstrip('/'))))
else: # Relative path
unquoted_target = unquote(target).encode('utf-8') if sys.version_info.major >= 3 else unquote(target).decode('utf-8')
target_filename = os.path.abspath(
os.path.join(os.path.dirname(filename), unquote(target)))
os.path.join(os.path.dirname(filename).encode('utf-8'), unquoted_target))

elif url_type in ('full_path', 'absolute'):
if url_type == 'absolute':
Expand All @@ -340,9 +341,10 @@ def analyze(self, fname, find_sources=False, check_remote=False):

if any(re.search(x, target_filename) for x in self.whitelist):
continue

elif target_filename not in self.existing_targets:
if os.path.exists(target_filename):
self.logger.notice("Good link {0} => {1}".format(target, target_filename))
self.logger.notice(u"Good link {0} => {1}".format(target, target_filename))
self.existing_targets.add(target_filename)
else:
rv = True
Expand All @@ -352,7 +354,7 @@ def analyze(self, fname, find_sources=False, check_remote=False):
self.logger.warn("\n".join(deps[filename]))
self.logger.warn("===============================\n")
except Exception as exc:
self.logger.error("Error with: {0} {1}".format(filename, exc))
self.logger.error(u"Error with: {0} {1}".format(filename, exc))
return rv

def scan_links(self, find_sources=False, check_remote=False):
Expand Down
12 changes: 6 additions & 6 deletions nikola/plugins/task/sitemap/__init__.py
Expand Up @@ -40,7 +40,7 @@
import urllib.robotparser as robotparser # NOQA

from nikola.plugin_categories import LateTask
from nikola.utils import config_changed, apply_filters
from nikola.utils import apply_filters, config_changed, encodelink


urlset_header = """<?xml version="1.0" encoding="UTF-8"?>
Expand Down Expand Up @@ -158,10 +158,10 @@ def scan_locs():
if post:
for lang in kw['translations']:
alt_url = post.permalink(lang=lang, absolute=True)
if loc == alt_url:
if encodelink(loc) == alt_url:
continue
alternates.append(alternates_format.format(lang, alt_url))
urlset[loc] = loc_format.format(loc, lastmod, ''.join(alternates))
urlset[loc] = loc_format.format(encodelink(loc), lastmod, ''.join(alternates))
for fname in files:
if kw['strip_indexes'] and fname == kw['index_file']:
continue # We already mapped the folder
Expand Down Expand Up @@ -201,7 +201,7 @@ def scan_locs():
path = path.replace(os.sep, '/')
lastmod = self.get_lastmod(real_path)
loc = urljoin(base_url, base_path + path)
sitemapindex[loc] = sitemap_format.format(loc, lastmod)
sitemapindex[loc] = sitemap_format.format(encodelink(loc), lastmod)
continue
else:
continue # ignores all XML files except those presumed to be RSS
Expand All @@ -215,10 +215,10 @@ def scan_locs():
if post:
for lang in kw['translations']:
alt_url = post.permalink(lang=lang, absolute=True)
if loc == alt_url:
if encodelink(loc) == alt_url:
continue
alternates.append(alternates_format.format(lang, alt_url))
urlset[loc] = loc_format.format(loc, lastmod, '\n'.join(alternates))
urlset[loc] = loc_format.format(encodelink(loc), lastmod, '\n'.join(alternates))

def robot_fetch(path):
"""Check if robots can fetch a file."""
Expand Down
15 changes: 10 additions & 5 deletions nikola/post.py
Expand Up @@ -129,7 +129,6 @@ def __init__(
self._template_name = template_name
self.is_two_file = True
self.newstylemeta = True
self.hyphenate = self.config['HYPHENATE']
self._reading_time = None
self._remaining_reading_time = None
self._paragraph_count = None
Expand Down Expand Up @@ -231,6 +230,11 @@ def __init__(
# Register potential extra dependencies
self.compiler.register_extra_dependencies(self)

def _get_hyphenate(self):
return bool(self.config['HYPHENATE'] or self.meta('hyphenate'))

hyphenate = property(_get_hyphenate)

def __repr__(self):
"""Provide a representation of the post object."""
# Calculate a hash that represents most data about the post
Expand Down Expand Up @@ -707,10 +711,9 @@ def remaining_paragraph_count(self):
def source_link(self, lang=None):
"""Return absolute link to the post's source."""
ext = self.source_ext(True)
return "/" + self.destination_path(
lang=lang,
extension=ext,
sep='/')
link = "/" + self.destination_path(lang=lang, extension=ext, sep='/')
link = utils.encodelink(link)
return link

def destination_path(self, lang=None, extension='.html', sep=os.sep):
"""Destination path for this post, relative to output/.
Expand Down Expand Up @@ -747,6 +750,7 @@ def section_link(self, lang=None):
link = urljoin('/' + slug + '/', self.index_file)
else:
link = '/' + slug + '/'
link = utils.encodelink(link)
return link

def section_name(self, lang=None):
Expand Down Expand Up @@ -803,6 +807,7 @@ def permalink(self, lang=None, absolute=False, extension='.html', query=None):
link = link[:-index_len]
if query:
link = link + "?" + query
link = utils.encodelink(link)
return link

@property
Expand Down
26 changes: 24 additions & 2 deletions nikola/utils.py
Expand Up @@ -45,15 +45,24 @@
import dateutil.parser
import dateutil.tz
import logbook
try:
from urllib import quote as urlquote
from urllib import unquote as urlunquote
from urlparse import urlparse, urlunparse
except ImportError:
from urllib.parse import quote as urlquote # NOQA
from urllib.parse import unquote as urlunquote # NOQA
from urllib.parse import urlparse, urlunparse # NOQA
import warnings
import PyRSS2Gen as rss
from collections import defaultdict, Callable
from collections import defaultdict, Callable, OrderedDict
from logbook.compat import redirect_logging
from logbook.more import ExceptionHandler, ColorizedStderrHandler
from pygments.formatters import HtmlFormatter
from zipfile import ZipFile as zipf
from doit import tools
from unidecode import unidecode
from unicodedata import normalize as unicodenormalize
from pkg_resources import resource_filename
from doit.cmdparse import CmdParse

Expand Down Expand Up @@ -725,7 +734,7 @@ def remove_file(source):
elif os.path.isfile(source) or os.path.islink(source):
os.remove(source)

# slugify is copied from
# slugify is adopted from
# http://code.activestate.com/recipes/
# 577257-slugify-make-a-string-usable-in-a-url-or-filename/
_slugify_strip_re = re.compile(r'[^+\w\s-]')
Expand Down Expand Up @@ -783,9 +792,22 @@ def unslugify(value, discard_numbers=True):
return value


def encodelink(iri):
"""Given an encoded or unencoded link string, return an encoded string suitable for use as a link in HTML and XML."""
iri = unicodenormalize('NFC', iri)
link = OrderedDict(urlparse(iri)._asdict())
link['path'] = urlquote(urlunquote(link['path']).encode('utf-8'))
try:
link['netloc'] = link['netloc'].encode('utf-8').decode('idna').encode('idna').decode('utf-8')
except UnicodeDecodeError:
link['netloc'] = link['netloc'].encode('idna').decode('utf-8')
encoded_link = urlunparse(link.values())
return encoded_link

# A very slightly safer version of zip.extractall that works on
# python < 2.6


class UnsafeZipException(Exception):

"""Exception for unsafe zip files."""
Expand Down

0 comments on commit ba99000

Please sign in to comment.