Skip to content

Commit

Permalink
fix #1644 -- work around issues with IDNs
Browse files Browse the repository at this point in the history
Encode IDNs to Punycode in ``nikola init`` and in links;
show an error if the site URL is not Punycode.

Signed-off-by: Chris Warrick <kwpolska@gmail.com>
  • Loading branch information
Kwpolska committed Apr 24, 2015
1 parent a0fba78 commit ea4ff18
Show file tree
Hide file tree
Showing 4 changed files with 80 additions and 21 deletions.
2 changes: 2 additions & 0 deletions CHANGES.txt
Expand Up @@ -17,6 +17,8 @@ Features
Bugfixes
--------

* Encode IDNs to Punycode in ``nikola init`` and in links;
show an error if the site URL is not Punycode (Issue #1644)
* Make ``images`` the default output directory for IMAGE_FOLDERS
(Issue #1663)
* Don't default to any swatch in bootswatch_theme (Issue #1656)
Expand Down
34 changes: 17 additions & 17 deletions nikola/conf.py.in
Expand Up @@ -492,28 +492,28 @@ IMAGE_FOLDERS = {'images': 'images'}
# algol
# algol_nu
# arduino
# autumn
# borland
# bw
# colorful
# default
# emacs
# friendly
# fruity
# autumn
# borland
# bw
# colorful
# default
# emacs
# friendly
# fruity
# igor
# lovelace
# manni
# monokai
# murphy
# native
# monokai
# murphy
# native
# paraiso_dark
# paraiso_light
# pastie
# perldoc
# rrt
# tango
# trac
# vim
# pastie
# perldoc
# rrt
# tango
# trac
# vim
# vs
# xcode
# This list MAY be incomplete since pygments adds styles every now and then.
Expand Down
31 changes: 29 additions & 2 deletions nikola/nikola.py
Expand Up @@ -37,9 +37,9 @@
import sys
import mimetypes
try:
from urlparse import urlparse, urlsplit, urljoin
from urlparse import urlparse, urlsplit, urlunsplit, urljoin, unquote
except ImportError:
from urllib.parse import urlparse, urlsplit, urljoin # NOQA
from urllib.parse import urlparse, urlsplit, urlunsplit, urljoin, unquote # NOQA

try:
import pyphen
Expand Down Expand Up @@ -625,6 +625,15 @@ def __init__(self, **config):
utils.LOGGER.warn("Your BASE_URL doesn't end in / -- adding it, but please fix it in your config file!")
self.config['BASE_URL'] += '/'

try:
_bnl = urlsplit(self.config['BASE_URL']).netloc
_bnl.encode('ascii')
urlsplit(self.config['SITE_URL']).netloc.encode('ascii')
except (UnicodeEncodeError, UnicodeDecodeError):
utils.LOGGER.error("Your BASE_URL or SITE_URL contains an IDN expressed in Unicode. Please convert it to Punycode.")
utils.LOGGER.error("Punycode of {}: {}".format(_bnl, _bnl.encode('idna')))
sys.exit(1)

# todo: remove in v8
if not isinstance(self.config['DEPLOY_COMMANDS'], dict):
utils.LOGGER.warn("A single list as DEPLOY_COMMANDS is deprecated. DEPLOY_COMMANDS should be a dict, with deploy preset names as keys and lists of commands as values.")
Expand Down Expand Up @@ -984,6 +993,24 @@ def url_replacer(self, src, dst, lang=None, url_type=None):
if dst_url.scheme == 'link': # Magic link
dst = self.link(dst_url.netloc, dst_url.path.lstrip('/'), lang)
else:
print(dst)
if '%' in dst_url.netloc:
# convert lxml percent-encoded garbage to punycode
nl = unquote(dst_url.netloc)
try:
nl = nl.decode('utf-8')
except AttributeError:
# python 3: already unicode
pass

nl = nl.encode('idna')

dst = urlunsplit((dst_url.scheme,
nl,
dst_url.path,
dst_url.query,
dst_url.fragment))
print(dst)
return dst
elif dst_url.scheme == 'link': # Magic absolute path link:
dst = dst_url.path
Expand Down
34 changes: 32 additions & 2 deletions nikola/plugins/command/init.py
Expand Up @@ -39,7 +39,7 @@
import tarfile

import nikola
from nikola.nikola import DEFAULT_TRANSLATIONS_PATTERN, DEFAULT_INDEX_READ_MORE_LINK, DEFAULT_RSS_READ_MORE_LINK, LEGAL_VALUES
from nikola.nikola import DEFAULT_TRANSLATIONS_PATTERN, DEFAULT_INDEX_READ_MORE_LINK, DEFAULT_RSS_READ_MORE_LINK, LEGAL_VALUES, urlsplit, urlunsplit
from nikola.plugin_categories import Command
from nikola.utils import ask, ask_yesno, get_logger, makedirs, STDERR_HANDLER, load_messages
from nikola.packages.tzlocal import get_localzone
Expand Down Expand Up @@ -261,6 +261,36 @@ def create_empty_site(cls, target):
@staticmethod
def ask_questions(target):
"""Ask some questions about Nikola."""
def urlhandler(default, toconf):
answer = ask('Site URL', 'http://getnikola.com/')
try:
answer = answer.decode('utf-8')
except (AttributeError, UnicodeDecodeError):
pass
if not answer.startswith(u'http'):
print(" ERROR: You must specify a protocol (http or https).")
urlhandler(default, toconf)
return
if not answer.endswith('/'):
print(" The URL does not end in '/' -- adding it.")
answer += '/'

dst_url = urlsplit(answer)
try:
dst_url.netloc.encode('ascii')
except (UnicodeEncodeError, UnicodeDecodeError):
# The IDN contains characters beyond ASCII. We must convert it
# to Punycode. (Issue #1644)
nl = dst_url.netloc.encode('idna')
answer = urlunsplit((dst_url.scheme,
nl,
dst_url.path,
dst_url.query,
dst_url.fragment))
print(" Converting to Punycode:", answer)

SAMPLE_CONF['SITE_URL'] = answer

def lhandler(default, toconf, show_header=True):
if show_header:
print("We will now ask you to provide the list of languages you want to use.")
Expand Down Expand Up @@ -375,7 +405,7 @@ def chandler(default, toconf):
('Site author', 'Nikola Tesla', True, 'BLOG_AUTHOR'),
('Site author\'s e-mail', 'n.tesla@example.com', True, 'BLOG_EMAIL'),
('Site description', 'This is a demo site for Nikola.', True, 'BLOG_DESCRIPTION'),
('Site URL', 'http://getnikola.com/', True, 'SITE_URL'),
(urlhandler, None, True, True),
('Questions about languages and locales', None, None, None),
(lhandler, None, True, True),
(tzhandler, None, True, True),
Expand Down

0 comments on commit ea4ff18

Please sign in to comment.