Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Merge pull request #1886 from getnikola/fix-1885
Fix #1885 -- always return unicode in slugify
  • Loading branch information
Kwpolska committed Jul 11, 2015
2 parents 695c98b + 6de860b commit f390f36
Show file tree
Hide file tree
Showing 3 changed files with 70 additions and 4 deletions.
1 change: 1 addition & 0 deletions CHANGES.txt
Expand Up @@ -4,6 +4,7 @@ New in master
Features
--------

* Always return unicode in slugify (Issue #1885)
* Remove logging handlers (Issue #1797)
* Add ``-d``, ``--detach`` option to ``nikola serve`` (Issue #1871)
* Use provided teaser format (``*_READ_MORE_LINK``) with custom teaser text
Expand Down
8 changes: 4 additions & 4 deletions nikola/utils.py
Expand Up @@ -729,9 +729,9 @@ def slugify(value, force=False):
if USE_SLUGIFY or force:
# This is the standard state of slugify, which actually does some work.
# It is the preferred style, especially for Western languages.
value = unidecode(value)
value = str(_slugify_strip_re.sub('', value).strip().lower())
return _slugify_hyphenate_re.sub('-', value)
value = unicode_str(unidecode(value))
value = _slugify_strip_re.sub('', value, re.UNICODE).strip().lower()
return _slugify_hyphenate_re.sub('-', value, re.UNICODE)
else:
# This is the “disarmed” state of slugify, which lets the user
# have any character they please (be it regular ASCII with spaces,
Expand All @@ -741,7 +741,7 @@ def slugify(value, force=False):
# We still replace some characters, though. In particular, we need
# to replace ? and #, which should not appear in URLs, and some
# Windows-unsafe characters. This list might be even longer.
rc = '/\\?#"\'\r\n\t*:<>|"'
rc = '/\\?#"\'\r\n\t*:<>|'

for c in rc:
value = value.replace(c, '-')
Expand Down
65 changes: 65 additions & 0 deletions tests/test_slugify.py
@@ -0,0 +1,65 @@
# -*- coding: utf-8 -*-

u"""Test slugify."""

from __future__ import unicode_literals
import nikola.utils


def test_ascii():
"""Test an ASCII-only string."""
o = nikola.utils.slugify(u'hello')
assert o == u'hello'
assert isinstance(o, nikola.utils.unicode_str)


def test_ascii_dash():
"""Test an ASCII string, with dashes."""
o = nikola.utils.slugify(u'hello-world')
assert o == u'hello-world'
assert isinstance(o, nikola.utils.unicode_str)


def test_ascii_fancy():
"""Test an ASCII string, with fancy characters."""
o = nikola.utils.slugify(u'The quick brown fox jumps over the lazy dog!-123.456')
assert o == u'the-quick-brown-fox-jumps-over-the-lazy-dog-123456'
assert isinstance(o, nikola.utils.unicode_str)


def test_pl():
"""Test a string with Polish diacritical characters."""
o = nikola.utils.slugify(u'zażółćgęśląjaźń')
assert o == u'zazolcgeslajazn'
assert isinstance(o, nikola.utils.unicode_str)


def test_pl_dash():
"""Test a string with Polish diacritical characters and dashes."""
o = nikola.utils.slugify(u'zażółć-gęślą-jaźń')
assert o == u'zazolc-gesla-jazn'


def test_pl_fancy():
"""Test a string with Polish diacritical characters and fancy characters."""
o = nikola.utils.slugify(u'Zażółć gęślą jaźń!-123.456')
assert o == u'zazolc-gesla-jazn-123456'
assert isinstance(o, nikola.utils.unicode_str)


def test_disarmed():
"""Test disarmed slugify."""
nikola.utils.USE_SLUGIFY = False
o = nikola.utils.slugify(u'Zażółć gęślą jaźń!-123.456')
assert o == u'Zażółć gęślą jaźń!-123.456'
assert isinstance(o, nikola.utils.unicode_str)
nikola.utils.USE_SLUGIFY = True


def test_disarmed_weird():
"""Test disarmed slugify with banned characters."""
nikola.utils.USE_SLUGIFY = False
o = nikola.utils.slugify(u'Zażółć gęślą jaźń!-123.456 "Hello World"?#H<e>l/l\\o:W\'o\rr*l\td|!\n')
assert o == u'Zażółć gęślą jaźń!-123.456 -Hello World---H-e-l-l-o-W-o-r-l-d-!-'
assert isinstance(o, nikola.utils.unicode_str)
nikola.utils.USE_SLUGIFY = True

0 comments on commit f390f36

Please sign in to comment.