Skip to content

Commit ea4ff18

Browse files
committedApr 24, 2015
fix #1644 -- work around issues with IDNs
Encode IDNs to Punycode in ``nikola init`` and in links; show an error if the site URL is not Punycode. Signed-off-by: Chris Warrick <kwpolska@gmail.com>
1 parent a0fba78 commit ea4ff18

File tree

4 files changed

+80
-21
lines changed

4 files changed

+80
-21
lines changed
 

‎CHANGES.txt

+2
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@ Features
1717
Bugfixes
1818
--------
1919

20+
* Encode IDNs to Punycode in ``nikola init`` and in links;
21+
show an error if the site URL is not Punycode (Issue #1644)
2022
* Make ``images`` the default output directory for IMAGE_FOLDERS
2123
(Issue #1663)
2224
* Don't default to any swatch in bootswatch_theme (Issue #1656)

‎nikola/conf.py.in

+17-17
Original file line numberDiff line numberDiff line change
@@ -492,28 +492,28 @@ IMAGE_FOLDERS = {'images': 'images'}
492492
# algol
493493
# algol_nu
494494
# arduino
495-
# autumn
496-
# borland
497-
# bw
498-
# colorful
499-
# default
500-
# emacs
501-
# friendly
502-
# fruity
495+
# autumn
496+
# borland
497+
# bw
498+
# colorful
499+
# default
500+
# emacs
501+
# friendly
502+
# fruity
503503
# igor
504504
# lovelace
505505
# manni
506-
# monokai
507-
# murphy
508-
# native
506+
# monokai
507+
# murphy
508+
# native
509509
# paraiso_dark
510510
# paraiso_light
511-
# pastie
512-
# perldoc
513-
# rrt
514-
# tango
515-
# trac
516-
# vim
511+
# pastie
512+
# perldoc
513+
# rrt
514+
# tango
515+
# trac
516+
# vim
517517
# vs
518518
# xcode
519519
# This list MAY be incomplete since pygments adds styles every now and then.

‎nikola/nikola.py

+29-2
Original file line numberDiff line numberDiff line change
@@ -37,9 +37,9 @@
3737
import sys
3838
import mimetypes
3939
try:
40-
from urlparse import urlparse, urlsplit, urljoin
40+
from urlparse import urlparse, urlsplit, urlunsplit, urljoin, unquote
4141
except ImportError:
42-
from urllib.parse import urlparse, urlsplit, urljoin # NOQA
42+
from urllib.parse import urlparse, urlsplit, urlunsplit, urljoin, unquote # NOQA
4343

4444
try:
4545
import pyphen
@@ -625,6 +625,15 @@ def __init__(self, **config):
625625
utils.LOGGER.warn("Your BASE_URL doesn't end in / -- adding it, but please fix it in your config file!")
626626
self.config['BASE_URL'] += '/'
627627

628+
try:
629+
_bnl = urlsplit(self.config['BASE_URL']).netloc
630+
_bnl.encode('ascii')
631+
urlsplit(self.config['SITE_URL']).netloc.encode('ascii')
632+
except (UnicodeEncodeError, UnicodeDecodeError):
633+
utils.LOGGER.error("Your BASE_URL or SITE_URL contains an IDN expressed in Unicode. Please convert it to Punycode.")
634+
utils.LOGGER.error("Punycode of {}: {}".format(_bnl, _bnl.encode('idna')))
635+
sys.exit(1)
636+
628637
# todo: remove in v8
629638
if not isinstance(self.config['DEPLOY_COMMANDS'], dict):
630639
utils.LOGGER.warn("A single list as DEPLOY_COMMANDS is deprecated. DEPLOY_COMMANDS should be a dict, with deploy preset names as keys and lists of commands as values.")
@@ -984,6 +993,24 @@ def url_replacer(self, src, dst, lang=None, url_type=None):
984993
if dst_url.scheme == 'link': # Magic link
985994
dst = self.link(dst_url.netloc, dst_url.path.lstrip('/'), lang)
986995
else:
996+
print(dst)
997+
if '%' in dst_url.netloc:
998+
# convert lxml percent-encoded garbage to punycode
999+
nl = unquote(dst_url.netloc)
1000+
try:
1001+
nl = nl.decode('utf-8')
1002+
except AttributeError:
1003+
# python 3: already unicode
1004+
pass
1005+
1006+
nl = nl.encode('idna')
1007+
1008+
dst = urlunsplit((dst_url.scheme,
1009+
nl,
1010+
dst_url.path,
1011+
dst_url.query,
1012+
dst_url.fragment))
1013+
print(dst)
9871014
return dst
9881015
elif dst_url.scheme == 'link': # Magic absolute path link:
9891016
dst = dst_url.path

‎nikola/plugins/command/init.py

+32-2
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
import tarfile
4040

4141
import nikola
42-
from nikola.nikola import DEFAULT_TRANSLATIONS_PATTERN, DEFAULT_INDEX_READ_MORE_LINK, DEFAULT_RSS_READ_MORE_LINK, LEGAL_VALUES
42+
from nikola.nikola import DEFAULT_TRANSLATIONS_PATTERN, DEFAULT_INDEX_READ_MORE_LINK, DEFAULT_RSS_READ_MORE_LINK, LEGAL_VALUES, urlsplit, urlunsplit
4343
from nikola.plugin_categories import Command
4444
from nikola.utils import ask, ask_yesno, get_logger, makedirs, STDERR_HANDLER, load_messages
4545
from nikola.packages.tzlocal import get_localzone
@@ -261,6 +261,36 @@ def create_empty_site(cls, target):
261261
@staticmethod
262262
def ask_questions(target):
263263
"""Ask some questions about Nikola."""
264+
def urlhandler(default, toconf):
265+
answer = ask('Site URL', 'http://getnikola.com/')
266+
try:
267+
answer = answer.decode('utf-8')
268+
except (AttributeError, UnicodeDecodeError):
269+
pass
270+
if not answer.startswith(u'http'):
271+
print(" ERROR: You must specify a protocol (http or https).")
272+
urlhandler(default, toconf)
273+
return
274+
if not answer.endswith('/'):
275+
print(" The URL does not end in '/' -- adding it.")
276+
answer += '/'
277+
278+
dst_url = urlsplit(answer)
279+
try:
280+
dst_url.netloc.encode('ascii')
281+
except (UnicodeEncodeError, UnicodeDecodeError):
282+
# The IDN contains characters beyond ASCII. We must convert it
283+
# to Punycode. (Issue #1644)
284+
nl = dst_url.netloc.encode('idna')
285+
answer = urlunsplit((dst_url.scheme,
286+
nl,
287+
dst_url.path,
288+
dst_url.query,
289+
dst_url.fragment))
290+
print(" Converting to Punycode:", answer)
291+
292+
SAMPLE_CONF['SITE_URL'] = answer
293+
264294
def lhandler(default, toconf, show_header=True):
265295
if show_header:
266296
print("We will now ask you to provide the list of languages you want to use.")
@@ -375,7 +405,7 @@ def chandler(default, toconf):
375405
('Site author', 'Nikola Tesla', True, 'BLOG_AUTHOR'),
376406
('Site author\'s e-mail', 'n.tesla@example.com', True, 'BLOG_EMAIL'),
377407
('Site description', 'This is a demo site for Nikola.', True, 'BLOG_DESCRIPTION'),
378-
('Site URL', 'http://getnikola.com/', True, 'SITE_URL'),
408+
(urlhandler, None, True, True),
379409
('Questions about languages and locales', None, None, None),
380410
(lhandler, None, True, True),
381411
(tzhandler, None, True, True),

0 commit comments

Comments
 (0)
Please sign in to comment.