26
26
27
27
"""Support for Hugo-style shortcodes."""
28
28
29
- try :
30
- from html .parser import HTMLParser
31
- except ImportError :
32
- from HTMLParser import HTMLParser
33
-
34
29
from .utils import LOGGER
35
30
36
31
@@ -84,15 +79,14 @@ def _find_shortcodes(data):
84
79
"""
85
80
# FIXME: this is really space-intolerant
86
81
87
- parser = SCParser ()
88
82
pos = 0
89
83
while True :
90
84
start = data .find ('{{%' , pos )
91
85
if start == - 1 :
92
86
break
93
87
# Get the whole shortcode tag
94
88
end = data .find ('%}}' , start + 1 )
95
- name , args = parser . parse_sc ('<{}>' . format ( data [start + 3 :end ].strip () ))
89
+ name , args = parse_sc (data [start + 3 :end ].strip ())
96
90
# Check if this start has a matching close
97
91
close_tag = '{{% /{} %}}' .format (name )
98
92
close = data .find (close_tag , end + 3 )
@@ -106,28 +100,84 @@ def _find_shortcodes(data):
106
100
yield [name , args , start , end ]
107
101
108
102
109
- class SCParser (HTMLParser ):
110
- """Parser for shortcode arguments."""
111
-
112
- # Because shortcode attributes are HTML-like, we are abusing the HTML parser.
113
- # TODO replace with self-contained parser
114
- # FIXME should be able to take quoted positional arguments!
115
-
116
- def parse_sc (self , data ):
117
- """Parse shortcode arguments into a tuple."""
118
- self .name = None
119
- self .attrs = {}
120
- self .feed (data )
121
- args = []
122
- kwargs = {}
123
- for a , b in self .attrs :
124
- if b is None :
125
- args .append (a )
126
- else :
127
- kwargs [a ] = b
128
- return self .name , (args , kwargs )
129
-
130
- def handle_starttag (self , tag , attrs ):
131
- """Set start tag information on parser object."""
132
- self .name = tag
133
- self .attrs = attrs
103
+ def parse_sc (data ):
104
+ """Parse shortcode arguments into a tuple."""
105
+ elements = data .split (' ' , 1 )
106
+ name = elements [0 ]
107
+ if len (elements ) == 1 :
108
+ # No arguments
109
+ return name , ([], {})
110
+ args = []
111
+ kwargs = {}
112
+
113
+ # "Simple" argument parser.
114
+ # flag can be one of:
115
+ # 0 name
116
+ # 1 value +value
117
+ # 2 name inside quotes +quotes
118
+ # 3 value inside quotes
119
+ # 4 [unsupported] +backslash
120
+ # 5 value inside backslash
121
+ # 4 [unsupported]
122
+ # 7 value inside quotes and backslash
123
+ flag = 0
124
+ cname = ''
125
+ cvalue = ''
126
+ qc = ''
127
+ for char in elements [1 ]:
128
+ if flag & 0b100 and flag & 1 :
129
+ # Backslash in value: escape next character, no matter what
130
+ cvalue += char
131
+ flag -= 0b100
132
+ elif flag & 0b100 :
133
+ # Backslash in name: escape next character, no matter what
134
+ cname += char
135
+ flag -= 0b100
136
+ elif char == '=' and flag == 0 :
137
+ # Equals sign inside unquoted name: switch to value
138
+ flag = 1
139
+ elif char == ' ' and flag == 0 :
140
+ # Space inside unquoted name: save as positional argument
141
+ args .append (cname )
142
+ cname = cvalue = qc = ''
143
+ elif char == ' ' and flag == 1 :
144
+ # Space inside unquoted value: save as keyword argument
145
+ kwargs [cname ] = cvalue
146
+ flag = 0
147
+ cname = cvalue = qc = ''
148
+ elif char == ' ' and flag == 2 :
149
+ # Space inside quoted name: save to name
150
+ cname += char
151
+ elif char == ' ' and flag == 3 :
152
+ # Space inside quoted value: save to value
153
+ cvalue += char
154
+ elif char == '\\ ' :
155
+ # Backslash: next character will be escaped
156
+ flag += 4
157
+ elif char == '"' or char == "'" :
158
+ # Quote handler
159
+ qc = char
160
+ if not flag & 2 :
161
+ flag += 2
162
+ elif flag & 2 and qc == char :
163
+ flag -= 2
164
+ elif flag == 2 :
165
+ # Unbalanced quotes, reproduce as is
166
+ cname += char
167
+ elif flag == 3 :
168
+ # Unbalanced quotes, reproduce as is
169
+ cvalue += char
170
+ elif flag & 1 :
171
+ # Fallback: add anything else to value
172
+ cvalue += char
173
+ else :
174
+ # Fallback: add anything else to name
175
+ cname += char
176
+
177
+ # Handle last argument
178
+ if cvalue :
179
+ kwargs [cname ] = cvalue
180
+ else :
181
+ args .append (cname )
182
+
183
+ return name , (args , kwargs )
0 commit comments