Skip to content

Commit 9a27a2c

Browse files
committedDec 24, 2015
Use a custom shortcode parser
Signed-off-by: Chris Warrick <kwpolska@gmail.com>
1 parent 19cd8dd commit 9a27a2c

File tree

1 file changed

+83
-32
lines changed

1 file changed

+83
-32
lines changed
 

‎nikola/shortcodes.py

+83-32
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,6 @@
2626

2727
"""Support for Hugo-style shortcodes."""
2828

29-
try:
30-
from html.parser import HTMLParser
31-
except ImportError:
32-
from HTMLParser import HTMLParser
33-
3429
from .utils import LOGGER
3530

3631

@@ -84,15 +79,14 @@ def _find_shortcodes(data):
8479
"""
8580
# FIXME: this is really space-intolerant
8681

87-
parser = SCParser()
8882
pos = 0
8983
while True:
9084
start = data.find('{{%', pos)
9185
if start == -1:
9286
break
9387
# Get the whole shortcode tag
9488
end = data.find('%}}', start + 1)
95-
name, args = parser.parse_sc('<{}>'.format(data[start + 3:end].strip()))
89+
name, args = parse_sc(data[start + 3:end].strip())
9690
# Check if this start has a matching close
9791
close_tag = '{{% /{} %}}'.format(name)
9892
close = data.find(close_tag, end + 3)
@@ -106,28 +100,85 @@ def _find_shortcodes(data):
106100
yield [name, args, start, end]
107101

108102

109-
class SCParser(HTMLParser):
110-
"""Parser for shortcode arguments."""
111-
112-
# Because shortcode attributes are HTML-like, we are abusing the HTML parser.
113-
# TODO replace with self-contained parser
114-
# FIXME should be able to take quoted positional arguments!
115-
116-
def parse_sc(self, data):
117-
"""Parse shortcode arguments into a tuple."""
118-
self.name = None
119-
self.attrs = {}
120-
self.feed(data)
121-
args = []
122-
kwargs = {}
123-
for a, b in self.attrs:
124-
if b is None:
125-
args.append(a)
126-
else:
127-
kwargs[a] = b
128-
return self.name, (args, kwargs)
129-
130-
def handle_starttag(self, tag, attrs):
131-
"""Set start tag information on parser object."""
132-
self.name = tag
133-
self.attrs = attrs
103+
def parse_sc(data):
104+
"""Parse shortcode arguments into a tuple."""
105+
elements = data.split(' ', 1)
106+
name = elements[0]
107+
if len(elements) == 1:
108+
# No arguments
109+
return name, ([], {})
110+
args = []
111+
kwargs = {}
112+
113+
# "Simple" argument parser.
114+
# flag can be one of:
115+
# 0 name
116+
# 1 value +value
117+
# 2 name inside quotes +quotes
118+
# 3 value inside quotes
119+
# 4 [unsupported] +backslash
120+
# 5 value inside backslash
121+
# 4 [unsupported]
122+
# 7 value inside quotes and backslash
123+
flag = 0
124+
cname = ''
125+
cvalue = ''
126+
qc = ''
127+
for char in elements[1]:
128+
print(char, flag)
129+
if flag & 0b100 and flag & 1:
130+
# Backslash in value: escape next character, no matter what
131+
cvalue += char
132+
flag -= 0b100
133+
elif flag & 0b100:
134+
# Backslash in name: escape next character, no matter what
135+
cname += char
136+
flag -= 0b100
137+
elif char == '=' and flag == 0:
138+
# Equals sign inside unquoted name: switch to value
139+
flag = 1
140+
elif char == ' ' and flag == 0:
141+
# Space inside unquoted name: save as positional argument
142+
args.append(cname)
143+
cname = cvalue = qc = ''
144+
elif char == ' ' and flag == 1:
145+
# Space inside unquoted value: save as keyword argument
146+
kwargs[cname] = cvalue
147+
flag = 0
148+
cname = cvalue = qc = ''
149+
elif char == ' ' and flag == 2:
150+
# Space inside quoted name: save to name
151+
cname += char
152+
elif char == ' ' and flag == 3:
153+
# Space inside quoted value: save to value
154+
cvalue += char
155+
elif char == '\\':
156+
# Backslash: next character will be escaped
157+
flag += 4
158+
elif char == '"' or char == "'":
159+
# Quote handler
160+
qc = char
161+
if not flag & 2:
162+
flag += 2
163+
elif flag & 2 and qc == char:
164+
flag -= 2
165+
elif flag == 2:
166+
# Unbalanced quotes, reproduce as is
167+
cname += char
168+
elif flag == 3:
169+
# Unbalanced quotes, reproduce as is
170+
cvalue += char
171+
elif flag & 1:
172+
# Fallback: add anything else to value
173+
cvalue += char
174+
else:
175+
# Fallback: add anything else to name
176+
cname += char
177+
178+
# Handle last argument
179+
if cvalue:
180+
kwargs[cname] = cvalue
181+
else:
182+
args.append(cname)
183+
184+
return name, (args, kwargs)

0 commit comments

Comments
 (0)
Please sign in to comment.