Skip to content

Commit

Permalink
Merge pull request #2037 from getnikola/encodelinks
Browse files Browse the repository at this point in the history
Use encodelink() everywhere
  • Loading branch information
da2x committed Sep 10, 2015
2 parents 078a8b8 + f15f871 commit ef6af2b
Show file tree
Hide file tree
Showing 5 changed files with 53 additions and 23 deletions.
19 changes: 12 additions & 7 deletions nikola/nikola.py
Expand Up @@ -1252,7 +1252,7 @@ def generic_rss_renderer(self, lang, title, link, description, timeline, output_
"""Take all necessary data, and render a RSS feed in output_path."""
rss_obj = utils.ExtendedRSS2(
title=title,
link=link,
link=utils.encodelink(link),
description=description,
lastBuildDate=datetime.datetime.utcnow(),
generator='https://getnikola.com/',
Expand Down Expand Up @@ -1447,7 +1447,9 @@ def register_path_handler(self, kind, f):

def link(self, *args):
"""Create a link."""
return self.path(*args, is_link=True)
url = self.path(*args, is_link=True)
url = utils.encodelink(url)
return url

def abs_link(self, dst, protocol_relative=False):
"""Get an absolute link."""
Expand All @@ -1459,6 +1461,7 @@ def abs_link(self, dst, protocol_relative=False):
url = urlparse(dst).geturl()
if protocol_relative:
url = url.split(":", 1)[1]
url = utils.encodelink(url)
return url

def rel_link(self, src, dst):
Expand All @@ -1473,7 +1476,7 @@ def rel_link(self, src, dst):
parsed_src = urlsplit(src)
parsed_dst = urlsplit(dst)
if parsed_src[:2] != parsed_dst[:2]:
return dst
return utils.encodelink(dst)
# Now both paths are on the same site and absolute
src_elems = parsed_src.path.split('/')[1:]
dst_elems = parsed_dst.path.split('/')[1:]
Expand All @@ -1484,7 +1487,9 @@ def rel_link(self, src, dst):
else:
i += 1
# Now i is the longest common prefix
return '/'.join(['..'] * (len(src_elems) - i - 1) + dst_elems[i:])
url = '/'.join(['..'] * (len(src_elems) - i - 1) + dst_elems[i:])
url = utils.encodelink(url)
return url

def file_exists(self, path, not_empty=False):
"""Check if the file exists. If not_empty is True, it also must not be empty."""
Expand Down Expand Up @@ -1635,7 +1640,7 @@ def scan_posts(self, really=False, ignore_quit=False, quiet=False):
utils.LOGGER.error('Tag {0} is used in: {1}'.format(other_tag, ', '.join([p.source_path for p in self.posts_per_tag[other_tag]])))
quit = True
else:
slugged_tags.add(utils.slugify(tag, force=True))
slugged_tags.add(utils.slugify(tag))
self.posts_per_tag[tag].append(post)
for lang in self.config['TRANSLATIONS'].keys():
self.tags_per_language[lang].extend(post.tags_for_language(lang))
Expand Down Expand Up @@ -1792,7 +1797,7 @@ def atom_link(link_rel, link_type, link_href):
link = lxml.etree.Element("link")
link.set("rel", link_rel)
link.set("type", link_type)
link.set("href", link_href)
link.set("href", utils.encodelink(link_href))
return link

deps = []
Expand Down Expand Up @@ -1828,7 +1833,7 @@ def atom_link(link_rel, link_type, link_href):
feed_root = lxml.etree.Element("feed", nsmap=nslist)
feed_root.addprevious(lxml.etree.ProcessingInstruction(
"xml-stylesheet",
'href="' + feed_xsl_link + '" type="text/xsl media="all"'))
'href="' + utils.encodelink(feed_xsl_link) + '" type="text/xsl media="all"'))
feed_root.set("{http://www.w3.org/XML/1998/namespace}lang", lang)
feed_root.set("xmlns", "http://www.w3.org/2005/Atom")
feed_title = lxml.etree.SubElement(feed_root, "title")
Expand Down
10 changes: 6 additions & 4 deletions nikola/plugins/command/check.py
Expand Up @@ -212,7 +212,7 @@ def analyze(self, fname, find_sources=False, check_remote=False):
# Quietly ignore files that don’t exist; use `nikola check -f` instead (Issue #1831)
return False

if '.html' == fname[-5:]: # DISABLED
if '.html' == fname[-5:]:
d = lxml.html.fromstring(open(filename, 'rb').read())
extra_objs = lxml.html.fromstring('<html/>')

Expand Down Expand Up @@ -323,8 +323,9 @@ def analyze(self, fname, find_sources=False, check_remote=False):
target_filename = os.path.abspath(
os.path.join(self.site.config['OUTPUT_FOLDER'], unquote(target.lstrip('/'))))
else: # Relative path
unquoted_target = unquote(target).encode('utf-8') if sys.version_info.major >= 3 else unquote(target).decode('utf-8')
target_filename = os.path.abspath(
os.path.join(os.path.dirname(filename), unquote(target)))
os.path.join(os.path.dirname(filename).encode('utf-8'), unquoted_target))

elif url_type in ('full_path', 'absolute'):
if url_type == 'absolute':
Expand All @@ -340,9 +341,10 @@ def analyze(self, fname, find_sources=False, check_remote=False):

if any(re.search(x, target_filename) for x in self.whitelist):
continue

elif target_filename not in self.existing_targets:
if os.path.exists(target_filename):
self.logger.notice("Good link {0} => {1}".format(target, target_filename))
self.logger.notice(u"Good link {0} => {1}".format(target, target_filename))
self.existing_targets.add(target_filename)
else:
rv = True
Expand All @@ -352,7 +354,7 @@ def analyze(self, fname, find_sources=False, check_remote=False):
self.logger.warn("\n".join(deps[filename]))
self.logger.warn("===============================\n")
except Exception as exc:
self.logger.error("Error with: {0} {1}".format(filename, exc))
self.logger.error(u"Error with: {0} {1}".format(filename, exc))
return rv

def scan_links(self, find_sources=False, check_remote=False):
Expand Down
12 changes: 6 additions & 6 deletions nikola/plugins/task/sitemap/__init__.py
Expand Up @@ -40,7 +40,7 @@
import urllib.robotparser as robotparser # NOQA

from nikola.plugin_categories import LateTask
from nikola.utils import config_changed, apply_filters
from nikola.utils import apply_filters, config_changed, encodelink


urlset_header = """<?xml version="1.0" encoding="UTF-8"?>
Expand Down Expand Up @@ -158,10 +158,10 @@ def scan_locs():
if post:
for lang in kw['translations']:
alt_url = post.permalink(lang=lang, absolute=True)
if loc == alt_url:
if encodelink(loc) == alt_url:
continue
alternates.append(alternates_format.format(lang, alt_url))
urlset[loc] = loc_format.format(loc, lastmod, ''.join(alternates))
urlset[loc] = loc_format.format(encodelink(loc), lastmod, ''.join(alternates))
for fname in files:
if kw['strip_indexes'] and fname == kw['index_file']:
continue # We already mapped the folder
Expand Down Expand Up @@ -201,7 +201,7 @@ def scan_locs():
path = path.replace(os.sep, '/')
lastmod = self.get_lastmod(real_path)
loc = urljoin(base_url, base_path + path)
sitemapindex[loc] = sitemap_format.format(loc, lastmod)
sitemapindex[loc] = sitemap_format.format(encodelink(loc), lastmod)
continue
else:
continue # ignores all XML files except those presumed to be RSS
Expand All @@ -215,10 +215,10 @@ def scan_locs():
if post:
for lang in kw['translations']:
alt_url = post.permalink(lang=lang, absolute=True)
if loc == alt_url:
if encodelink(loc) == alt_url:
continue
alternates.append(alternates_format.format(lang, alt_url))
urlset[loc] = loc_format.format(loc, lastmod, '\n'.join(alternates))
urlset[loc] = loc_format.format(encodelink(loc), lastmod, '\n'.join(alternates))

def robot_fetch(path):
"""Check if robots can fetch a file."""
Expand Down
9 changes: 5 additions & 4 deletions nikola/post.py
Expand Up @@ -711,10 +711,9 @@ def remaining_paragraph_count(self):
def source_link(self, lang=None):
"""Return absolute link to the post's source."""
ext = self.source_ext(True)
return "/" + self.destination_path(
lang=lang,
extension=ext,
sep='/')
link = "/" + self.destination_path(lang=lang, extension=ext, sep='/')
link = utils.encodelink(link)
return link

def destination_path(self, lang=None, extension='.html', sep=os.sep):
"""Destination path for this post, relative to output/.
Expand Down Expand Up @@ -751,6 +750,7 @@ def section_link(self, lang=None):
link = urljoin('/' + slug + '/', self.index_file)
else:
link = '/' + slug + '/'
link = utils.encodelink(link)
return link

def section_name(self, lang=None):
Expand Down Expand Up @@ -807,6 +807,7 @@ def permalink(self, lang=None, absolute=False, extension='.html', query=None):
link = link[:-index_len]
if query:
link = link + "?" + query
link = utils.encodelink(link)
return link

@property
Expand Down
26 changes: 24 additions & 2 deletions nikola/utils.py
Expand Up @@ -45,15 +45,24 @@
import dateutil.parser
import dateutil.tz
import logbook
try:
from urllib import quote as urlquote
from urllib import unquote as urlunquote
from urlparse import urlparse, urlunparse
except ImportError:
from urllib.parse import quote as urlquote # NOQA
from urllib.parse import unquote as urlunquote # NOQA
from urllib.parse import urlparse, urlunparse # NOQA
import warnings
import PyRSS2Gen as rss
from collections import defaultdict, Callable
from collections import defaultdict, Callable, OrderedDict
from logbook.compat import redirect_logging
from logbook.more import ExceptionHandler, ColorizedStderrHandler
from pygments.formatters import HtmlFormatter
from zipfile import ZipFile as zipf
from doit import tools
from unidecode import unidecode
from unicodedata import normalize as unicodenormalize
from pkg_resources import resource_filename
from doit.cmdparse import CmdParse

Expand Down Expand Up @@ -725,7 +734,7 @@ def remove_file(source):
elif os.path.isfile(source) or os.path.islink(source):
os.remove(source)

# slugify is copied from
# slugify is adopted from
# http://code.activestate.com/recipes/
# 577257-slugify-make-a-string-usable-in-a-url-or-filename/
_slugify_strip_re = re.compile(r'[^+\w\s-]')
Expand Down Expand Up @@ -783,9 +792,22 @@ def unslugify(value, discard_numbers=True):
return value


def encodelink(iri):
"""Given an encoded or unencoded link string, return an encoded string suitable for use as a link in HTML and XML."""
iri = unicodenormalize('NFC', iri)
link = OrderedDict(urlparse(iri)._asdict())
link['path'] = urlquote(urlunquote(link['path']).encode('utf-8'))
try:
link['netloc'] = link['netloc'].encode('utf-8').decode('idna').encode('idna').decode('utf-8')
except UnicodeDecodeError:
link['netloc'] = link['netloc'].encode('idna').decode('utf-8')
encoded_link = urlunparse(link.values())
return encoded_link

# A very slightly safer version of zip.extractall that works on
# python < 2.6


class UnsafeZipException(Exception):

"""Exception for unsafe zip files."""
Expand Down

0 comments on commit ef6af2b

Please sign in to comment.