Skip to content

Commit 5bc23e3

Browse files
committedDec 24, 2015
Merge pull request #2194 from getnikola/shortcode-parser
Use a custom shortcode parser
2 parents 19cd8dd + 4bbdd98 commit 5bc23e3

File tree

2 files changed

+91
-35
lines changed

2 files changed

+91
-35
lines changed
 

‎nikola/shortcodes.py

+82-32
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,6 @@
2626

2727
"""Support for Hugo-style shortcodes."""
2828

29-
try:
30-
from html.parser import HTMLParser
31-
except ImportError:
32-
from HTMLParser import HTMLParser
33-
3429
from .utils import LOGGER
3530

3631

@@ -84,15 +79,14 @@ def _find_shortcodes(data):
8479
"""
8580
# FIXME: this is really space-intolerant
8681

87-
parser = SCParser()
8882
pos = 0
8983
while True:
9084
start = data.find('{{%', pos)
9185
if start == -1:
9286
break
9387
# Get the whole shortcode tag
9488
end = data.find('%}}', start + 1)
95-
name, args = parser.parse_sc('<{}>'.format(data[start + 3:end].strip()))
89+
name, args = parse_sc(data[start + 3:end].strip())
9690
# Check if this start has a matching close
9791
close_tag = '{{% /{} %}}'.format(name)
9892
close = data.find(close_tag, end + 3)
@@ -106,28 +100,84 @@ def _find_shortcodes(data):
106100
yield [name, args, start, end]
107101

108102

109-
class SCParser(HTMLParser):
110-
"""Parser for shortcode arguments."""
111-
112-
# Because shortcode attributes are HTML-like, we are abusing the HTML parser.
113-
# TODO replace with self-contained parser
114-
# FIXME should be able to take quoted positional arguments!
115-
116-
def parse_sc(self, data):
117-
"""Parse shortcode arguments into a tuple."""
118-
self.name = None
119-
self.attrs = {}
120-
self.feed(data)
121-
args = []
122-
kwargs = {}
123-
for a, b in self.attrs:
124-
if b is None:
125-
args.append(a)
126-
else:
127-
kwargs[a] = b
128-
return self.name, (args, kwargs)
129-
130-
def handle_starttag(self, tag, attrs):
131-
"""Set start tag information on parser object."""
132-
self.name = tag
133-
self.attrs = attrs
103+
def parse_sc(data):
104+
"""Parse shortcode arguments into a tuple."""
105+
elements = data.split(' ', 1)
106+
name = elements[0]
107+
if len(elements) == 1:
108+
# No arguments
109+
return name, ([], {})
110+
args = []
111+
kwargs = {}
112+
113+
# "Simple" argument parser.
114+
# flag can be one of:
115+
# 0 name
116+
# 1 value +value
117+
# 2 name inside quotes +quotes
118+
# 3 value inside quotes
119+
# 4 [unsupported] +backslash
120+
# 5 value inside backslash
121+
# 4 [unsupported]
122+
# 7 value inside quotes and backslash
123+
flag = 0
124+
cname = ''
125+
cvalue = ''
126+
qc = ''
127+
for char in elements[1]:
128+
if flag & 0b100 and flag & 1:
129+
# Backslash in value: escape next character, no matter what
130+
cvalue += char
131+
flag -= 0b100
132+
elif flag & 0b100:
133+
# Backslash in name: escape next character, no matter what
134+
cname += char
135+
flag -= 0b100
136+
elif char == '=' and flag == 0:
137+
# Equals sign inside unquoted name: switch to value
138+
flag = 1
139+
elif char == ' ' and flag == 0:
140+
# Space inside unquoted name: save as positional argument
141+
args.append(cname)
142+
cname = cvalue = qc = ''
143+
elif char == ' ' and flag == 1:
144+
# Space inside unquoted value: save as keyword argument
145+
kwargs[cname] = cvalue
146+
flag = 0
147+
cname = cvalue = qc = ''
148+
elif char == ' ' and flag == 2:
149+
# Space inside quoted name: save to name
150+
cname += char
151+
elif char == ' ' and flag == 3:
152+
# Space inside quoted value: save to value
153+
cvalue += char
154+
elif char == '\\':
155+
# Backslash: next character will be escaped
156+
flag += 4
157+
elif char == '"' or char == "'":
158+
# Quote handler
159+
qc = char
160+
if not flag & 2:
161+
flag += 2
162+
elif flag & 2 and qc == char:
163+
flag -= 2
164+
elif flag == 2:
165+
# Unbalanced quotes, reproduce as is
166+
cname += char
167+
elif flag == 3:
168+
# Unbalanced quotes, reproduce as is
169+
cvalue += char
170+
elif flag & 1:
171+
# Fallback: add anything else to value
172+
cvalue += char
173+
else:
174+
# Fallback: add anything else to name
175+
cname += char
176+
177+
# Handle last argument
178+
if cvalue:
179+
kwargs[cname] = cvalue
180+
else:
181+
args.append(cname)
182+
183+
return name, (args, kwargs)

‎tests/test_shortcodes.py

+9-3
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import pytest
88
from nikola import shortcodes
99
from .base import FakeSite
10+
import sys
1011

1112
def noargs(site, data=''):
1213
return "noargs {0} success!".format(data)
@@ -15,6 +16,10 @@ def arg(*args, **kwargs):
1516
# don’t clutter the kwargs dict
1617
_ = kwargs.pop('site')
1718
data = kwargs.pop('data')
19+
# TODO hack for Python 2.7 -- remove when possible
20+
if sys.version_info[0] == 2:
21+
args = tuple(i.encode('utf-8') for i in args)
22+
kwargs = {k.encode('utf-8'): v.encode('utf-8') for k, v in kwargs.items()}
1823
return "arg {0}/{1}/{2}".format(args, sorted(kwargs.items()), data)
1924

2025

@@ -32,15 +37,16 @@ def test_noargs(fakesite):
3237
def test_arg_pos(fakesite):
3338
assert shortcodes.apply_shortcodes('test({{% arg 1 %}})', fakesite.shortcode_registry) == "test(arg ('1',)/[]/)"
3439
assert shortcodes.apply_shortcodes('test({{% arg 1 2aa %}})', fakesite.shortcode_registry) == "test(arg ('1', '2aa')/[]/)"
35-
# TODO: currently unsupported!
36-
# assert shortcodes.apply_shortcodes('test({{% arg "hello world" %}})', fakesite.shortcode_registry) == "test(arg ('hello world',)/[]/)"
40+
assert shortcodes.apply_shortcodes('test({{% arg "hello world" %}})', fakesite.shortcode_registry) == "test(arg ('hello world',)/[]/)"
41+
assert shortcodes.apply_shortcodes('test({{% arg back\ slash arg2 %}})', fakesite.shortcode_registry) == "test(arg ('back slash', 'arg2')/[]/)"
3742

3843
def test_arg_keyword(fakesite):
3944
assert shortcodes.apply_shortcodes('test({{% arg 1a=2b %}})', fakesite.shortcode_registry) == "test(arg ()/[('1a', '2b')]/)"
4045
assert shortcodes.apply_shortcodes('test({{% arg 1a="2b 3c" 4d=5f %}})', fakesite.shortcode_registry) == "test(arg ()/[('1a', '2b 3c'), ('4d', '5f')]/)"
46+
assert shortcodes.apply_shortcodes('test({{% arg 1a="2b 3c" 4d=5f back=slash\ slash %}})', fakesite.shortcode_registry) == "test(arg ()/[('1a', '2b 3c'), ('4d', '5f'), ('back', 'slash slash')]/)"
4147

4248
def test_data(fakesite):
4349
assert shortcodes.apply_shortcodes('test({{% arg 123 %}}Hello!{{% /arg %}})', fakesite.shortcode_registry) == "test(arg ('123',)/[]/Hello!)"
4450
assert shortcodes.apply_shortcodes('test({{% arg 123 456 foo=bar %}}Hello world!{{% /arg %}})', fakesite.shortcode_registry) == "test(arg ('123', '456')/[('foo', 'bar')]/Hello world!)"
4551
assert shortcodes.apply_shortcodes('test({{% arg 123 456 foo=bar baz="quotes rock." %}}Hello test suite!{{% /arg %}})', fakesite.shortcode_registry) == "test(arg ('123', '456')/[('baz', 'quotes rock.'), ('foo', 'bar')]/Hello test suite!)"
46-
# assert shortcodes.apply_shortcodes('test({{% arg "123 foo" foobar foo=bar baz="quotes rock." %}}Hello test suite!!{{% /arg %}})', fakesite.shortcode_registry) == "test(arg ('123 foo', 'foobar')/[('baz', 'quotes rock.'), ('foo', 'bar')]/Hello test suite!!)"
52+
assert shortcodes.apply_shortcodes('test({{% arg "123 foo" foobar foo=bar baz="quotes rock." %}}Hello test suite!!{{% /arg %}})', fakesite.shortcode_registry) == "test(arg ('123 foo', 'foobar')/[('baz', 'quotes rock.'), ('foo', 'bar')]/Hello test suite!!)"

0 commit comments

Comments
 (0)
Please sign in to comment.