Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Merge pull request #2194 from getnikola/shortcode-parser
Use a custom shortcode parser
  • Loading branch information
ralsina committed Dec 24, 2015
2 parents 19cd8dd + 4bbdd98 commit 5bc23e3
Show file tree
Hide file tree
Showing 2 changed files with 91 additions and 35 deletions.
114 changes: 82 additions & 32 deletions nikola/shortcodes.py
Expand Up @@ -26,11 +26,6 @@

"""Support for Hugo-style shortcodes."""

try:
from html.parser import HTMLParser
except ImportError:
from HTMLParser import HTMLParser

from .utils import LOGGER


Expand Down Expand Up @@ -84,15 +79,14 @@ def _find_shortcodes(data):
"""
# FIXME: this is really space-intolerant

parser = SCParser()
pos = 0
while True:
start = data.find('{{%', pos)
if start == -1:
break
# Get the whole shortcode tag
end = data.find('%}}', start + 1)
name, args = parser.parse_sc('<{}>'.format(data[start + 3:end].strip()))
name, args = parse_sc(data[start + 3:end].strip())
# Check if this start has a matching close
close_tag = '{{% /{} %}}'.format(name)
close = data.find(close_tag, end + 3)
Expand All @@ -106,28 +100,84 @@ def _find_shortcodes(data):
yield [name, args, start, end]


class SCParser(HTMLParser):
"""Parser for shortcode arguments."""

# Because shortcode attributes are HTML-like, we are abusing the HTML parser.
# TODO replace with self-contained parser
# FIXME should be able to take quoted positional arguments!

def parse_sc(self, data):
"""Parse shortcode arguments into a tuple."""
self.name = None
self.attrs = {}
self.feed(data)
args = []
kwargs = {}
for a, b in self.attrs:
if b is None:
args.append(a)
else:
kwargs[a] = b
return self.name, (args, kwargs)

def handle_starttag(self, tag, attrs):
"""Set start tag information on parser object."""
self.name = tag
self.attrs = attrs
def parse_sc(data):
"""Parse shortcode arguments into a tuple."""
elements = data.split(' ', 1)
name = elements[0]
if len(elements) == 1:
# No arguments
return name, ([], {})
args = []
kwargs = {}

# "Simple" argument parser.
# flag can be one of:
# 0 name
# 1 value +value
# 2 name inside quotes +quotes
# 3 value inside quotes
# 4 [unsupported] +backslash
# 5 value inside backslash
# 4 [unsupported]
# 7 value inside quotes and backslash
flag = 0
cname = ''
cvalue = ''
qc = ''
for char in elements[1]:
if flag & 0b100 and flag & 1:
# Backslash in value: escape next character, no matter what
cvalue += char
flag -= 0b100
elif flag & 0b100:
# Backslash in name: escape next character, no matter what
cname += char
flag -= 0b100
elif char == '=' and flag == 0:
# Equals sign inside unquoted name: switch to value
flag = 1
elif char == ' ' and flag == 0:
# Space inside unquoted name: save as positional argument
args.append(cname)
cname = cvalue = qc = ''
elif char == ' ' and flag == 1:
# Space inside unquoted value: save as keyword argument
kwargs[cname] = cvalue
flag = 0
cname = cvalue = qc = ''
elif char == ' ' and flag == 2:
# Space inside quoted name: save to name
cname += char
elif char == ' ' and flag == 3:
# Space inside quoted value: save to value
cvalue += char
elif char == '\\':
# Backslash: next character will be escaped
flag += 4
elif char == '"' or char == "'":
# Quote handler
qc = char
if not flag & 2:
flag += 2
elif flag & 2 and qc == char:
flag -= 2
elif flag == 2:
# Unbalanced quotes, reproduce as is
cname += char
elif flag == 3:
# Unbalanced quotes, reproduce as is
cvalue += char
elif flag & 1:
# Fallback: add anything else to value
cvalue += char
else:
# Fallback: add anything else to name
cname += char

# Handle last argument
if cvalue:
kwargs[cname] = cvalue
else:
args.append(cname)

return name, (args, kwargs)
12 changes: 9 additions & 3 deletions tests/test_shortcodes.py
Expand Up @@ -7,6 +7,7 @@
import pytest
from nikola import shortcodes
from .base import FakeSite
import sys

def noargs(site, data=''):
return "noargs {0} success!".format(data)
Expand All @@ -15,6 +16,10 @@ def arg(*args, **kwargs):
# don’t clutter the kwargs dict
_ = kwargs.pop('site')
data = kwargs.pop('data')
# TODO hack for Python 2.7 -- remove when possible
if sys.version_info[0] == 2:
args = tuple(i.encode('utf-8') for i in args)
kwargs = {k.encode('utf-8'): v.encode('utf-8') for k, v in kwargs.items()}
return "arg {0}/{1}/{2}".format(args, sorted(kwargs.items()), data)


Expand All @@ -32,15 +37,16 @@ def test_noargs(fakesite):
def test_arg_pos(fakesite):
assert shortcodes.apply_shortcodes('test({{% arg 1 %}})', fakesite.shortcode_registry) == "test(arg ('1',)/[]/)"
assert shortcodes.apply_shortcodes('test({{% arg 1 2aa %}})', fakesite.shortcode_registry) == "test(arg ('1', '2aa')/[]/)"
# TODO: currently unsupported!
# assert shortcodes.apply_shortcodes('test({{% arg "hello world" %}})', fakesite.shortcode_registry) == "test(arg ('hello world',)/[]/)"
assert shortcodes.apply_shortcodes('test({{% arg "hello world" %}})', fakesite.shortcode_registry) == "test(arg ('hello world',)/[]/)"
assert shortcodes.apply_shortcodes('test({{% arg back\ slash arg2 %}})', fakesite.shortcode_registry) == "test(arg ('back slash', 'arg2')/[]/)"

def test_arg_keyword(fakesite):
assert shortcodes.apply_shortcodes('test({{% arg 1a=2b %}})', fakesite.shortcode_registry) == "test(arg ()/[('1a', '2b')]/)"
assert shortcodes.apply_shortcodes('test({{% arg 1a="2b 3c" 4d=5f %}})', fakesite.shortcode_registry) == "test(arg ()/[('1a', '2b 3c'), ('4d', '5f')]/)"
assert shortcodes.apply_shortcodes('test({{% arg 1a="2b 3c" 4d=5f back=slash\ slash %}})', fakesite.shortcode_registry) == "test(arg ()/[('1a', '2b 3c'), ('4d', '5f'), ('back', 'slash slash')]/)"

def test_data(fakesite):
assert shortcodes.apply_shortcodes('test({{% arg 123 %}}Hello!{{% /arg %}})', fakesite.shortcode_registry) == "test(arg ('123',)/[]/Hello!)"
assert shortcodes.apply_shortcodes('test({{% arg 123 456 foo=bar %}}Hello world!{{% /arg %}})', fakesite.shortcode_registry) == "test(arg ('123', '456')/[('foo', 'bar')]/Hello world!)"
assert shortcodes.apply_shortcodes('test({{% arg 123 456 foo=bar baz="quotes rock." %}}Hello test suite!{{% /arg %}})', fakesite.shortcode_registry) == "test(arg ('123', '456')/[('baz', 'quotes rock.'), ('foo', 'bar')]/Hello test suite!)"
# assert shortcodes.apply_shortcodes('test({{% arg "123 foo" foobar foo=bar baz="quotes rock." %}}Hello test suite!!{{% /arg %}})', fakesite.shortcode_registry) == "test(arg ('123 foo', 'foobar')/[('baz', 'quotes rock.'), ('foo', 'bar')]/Hello test suite!!)"
assert shortcodes.apply_shortcodes('test({{% arg "123 foo" foobar foo=bar baz="quotes rock." %}}Hello test suite!!{{% /arg %}})', fakesite.shortcode_registry) == "test(arg ('123 foo', 'foobar')/[('baz', 'quotes rock.'), ('foo', 'bar')]/Hello test suite!!)"

0 comments on commit 5bc23e3

Please sign in to comment.