Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit bff18eb

Browse files
committedJan 14, 2017
Added LaTeX page compiler plugin.
1 parent 2fc1fb6 commit bff18eb

13 files changed

+3654
-0
lines changed
 

‎v7/latex/README.md

+42
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
This plugin allows to write posts and pages in a LaTeX-like syntax.
2+
3+
For Python before 3.4, you need to install the [`enum34` library](https://pypi.python.org/pypi/enum34). From Python 3.4 on, it is part of the language.
4+
5+
6+
Formulae
7+
--------
8+
9+
There are two available formulae backends:
10+
11+
* one based on the [`latex_formula_renderer` plugin](https://plugins.getnikola.com/v7/latex_formula_renderer/);
12+
* one based on [MathJax](https://www.mathjax.org/).
13+
14+
The first plugin allows special features the second doesn't:
15+
16+
* `align` environments (see the [AMSMath documentation](ftp://ftp.ams.org/ams/doc/amsmath/amsldoc.pdf));
17+
* XY-pic diagrams (see the [XY-Pic user guide](http://texdoc.net/texmf-dist/doc/generic/xypic/xyguide.pdf));
18+
* PSTricks graphics (see [here](https://en.wikipedia.org/wiki/PSTricks) for more information);
19+
* TikZ pictures (see [here](https://en.wikibooks.org/wiki/LaTeX/PGF/TikZ) for more information).
20+
21+
You need an installed LaTeX distribution for this to work, with some extra tools. See the `latex_formula_renderer` plugin for details.
22+
23+
24+
Required Translations
25+
---------------------
26+
27+
You need to add the following translations to your theme if you use theorem environments:
28+
``` .py
29+
MESSAGES = {
30+
'math_thm_name': 'Theorem',
31+
'math_prop_name': 'Proposition',
32+
'math_cor_name': 'Corollary',
33+
'math_lemma_name': 'Lemma',
34+
'math_def_name': 'Definition',
35+
'math_defs_name': 'Definitions',
36+
'math_proof_name': 'Proof',
37+
'math_example_name': 'Example',
38+
'math_examples_name': 'Examples',
39+
'math_remark_name': 'Remark',
40+
'math_remarks_name': 'Remarks',
41+
}
42+
```

‎v7/latex/conf.py.sample

+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# Determines how the formulae are rendered. Possibilities:
2+
# - "latex_formula_image_renderer": renders formulae as graphics and includes them.
3+
# - "latex_formula_mathjax": inserts MathJax code.
4+
LATEX_FORMULA_RENDERER = "latex_formula_image_renderer"
5+
6+
# When "latex_formula_image_renderer" is selected as the formula renderer,
7+
# the formulae colors and scale can be set here:
8+
#
9+
# The color must be given as an RGB triple with components in range [0, 1].
10+
# Here, (0, 0, 0) is black and (1, 1, 1) is white.
11+
LATEX_FORMULA_COLOR = (0., 0., 0.)
12+
#
13+
# The formula scale determines the effective size of the formulae.
14+
# Check what looks good with your theme's main font.
15+
LATEX_FORMULA_SCALE = 1.25
16+

‎v7/latex/latex.plugin

+12
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
[Core]
2+
Name = latex
3+
Module = latex
4+
5+
[Nikola]
6+
PluginCategory = PageCompiler
7+
8+
[Documentation]
9+
Author = Felix Fontein
10+
Version = 0.1
11+
Website = https://felix.fontein.de
12+
Description = Compile LaTeX-similar text to HTML

‎v7/latex/latex/__init__.py

+351
Large diffs are not rendered by default.

‎v7/latex/latex/htmlify.py

+577
Large diffs are not rendered by default.

‎v7/latex/latex/parser.py

+1,074
Large diffs are not rendered by default.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
[Core]
2+
Name = latex_formula_image_renderer
3+
Module = latex_formula_image_renderer
4+
5+
[Nikola]
6+
Compiler = latex
7+
PluginCategory = CompilerExtension
8+
9+
[Documentation]
10+
Author = Felix Fontein
11+
Version = 0.1
12+
Description = Provides LaTeX image formula rendering
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,191 @@
1+
# -*- coding: utf-8 -*-
2+
3+
# Copyright © 2014-2017 Felix Fontein
4+
#
5+
# Permission is hereby granted, free of charge, to any
6+
# person obtaining a copy of this software and associated
7+
# documentation files (the "Software"), to deal in the
8+
# Software without restriction, including without limitation
9+
# the rights to use, copy, modify, merge, publish,
10+
# distribute, sublicense, and/or sell copies of the
11+
# Software, and to permit persons to whom the Software is
12+
# furnished to do so, subject to the following conditions:
13+
#
14+
# The above copyright notice and this permission notice
15+
# shall be included in all copies or substantial portions of
16+
# the Software.
17+
#
18+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
19+
# KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
20+
# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
21+
# PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS
22+
# OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
23+
# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
24+
# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25+
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26+
27+
"""Render formulas for LaTeX post compiler as images using the latex_formula_renderer plugin."""
28+
29+
from __future__ import unicode_literals
30+
31+
import nikola.plugin_categories
32+
import nikola.utils
33+
34+
import json
35+
import os.path
36+
import sys
37+
38+
LOGGER = nikola.utils.get_logger('compile_latex.formula.image', nikola.utils.STDERR_HANDLER)
39+
40+
41+
class FormulaContext(object):
42+
"""Stores information about the formula renderer.
43+
44+
For the formula image renderer, scale and color are stored in the context.
45+
"""
46+
47+
def __init__(self, scale, color):
48+
"""Create formula context with given scale and color."""
49+
self.scale = scale
50+
self.color = color
51+
52+
def clone(self):
53+
"""Clone this FormulaContext object."""
54+
return FormulaContext(self.scale, self.color)
55+
56+
57+
def _escape_html_argument(text):
58+
"""Escape a string to be usable as an HTML tag argument."""
59+
result = ""
60+
for c in text:
61+
if c == "<":
62+
result += "&lt;"
63+
elif c == ">":
64+
result += "&gt;"
65+
elif c == "&":
66+
result += "&amp;"
67+
elif c == '"':
68+
result += "&quot;"
69+
elif c == "'":
70+
result += "&#39;"
71+
elif c == " ":
72+
result += " "
73+
elif '0' <= c <= '9' or 'A' <= c <= 'Z' or 'a' <= c <= 'z' or c in {'/', ':', '.', '@', '-', '_'}:
74+
result += c
75+
else:
76+
result += '&#x{0};'.format(hex(ord(c))[2:])
77+
return result
78+
79+
80+
class LatexImageFormulaRenderer(nikola.plugin_categories.CompilerExtension):
81+
"""Render LaTeX formulae as image files using the latex_formula_renderer plugin."""
82+
83+
name = 'latex_formula_image_renderer'
84+
compiler_name = 'latex'
85+
latex_plugin_type = 'formula_renderer'
86+
87+
def __init__(self):
88+
"""Initialize plugin."""
89+
super(LatexImageFormulaRenderer, self).__init__()
90+
self.__formula_scale = 1.25
91+
self.__formula_color = (0., 0., 0.)
92+
93+
def _get_formulae_filename(self, post, lang):
94+
"""Get filename for post and language to store LaTeX formulae in."""
95+
return post.translated_base_path(lang) + '.ltxfor'
96+
97+
def _collect_formulas(self):
98+
"""Collect LaTeX formulae used in posts."""
99+
# Look for candidates from posts
100+
candidates = set()
101+
for post in self.site.timeline:
102+
if post.compiler.name != 'latex':
103+
continue
104+
for lang in self.site.config['TRANSLATIONS']:
105+
candidates.add(self._get_formulae_filename(post, lang))
106+
# Look for candidates from extra formula sources
107+
for dirpath, _, filenames in os.walk(self.__extra_formula_sources, followlinks=True):
108+
for filename in filenames:
109+
if filename.endswith('.texfor'):
110+
candidates.add(os.path.join(dirpath, filename))
111+
# Check the candidates
112+
formulae = []
113+
for fn in candidates:
114+
if os.path.isfile(fn):
115+
with open(fn, "rb") as f:
116+
fs = json.loads(f.read().decode('utf-8'))
117+
for f in fs:
118+
formulae.append(tuple(f))
119+
return formulae
120+
121+
def set_site(self, site):
122+
"""Set Nikola site object."""
123+
super(LatexImageFormulaRenderer, self).set_site(site)
124+
self.__formula_color = site.config.get('LATEX_FORMULA_COLOR', self.__formula_color)
125+
self.__formula_scale = site.config.get('LATEX_FORMULA_SCALE', self.__formula_scale)
126+
self.__extra_formula_sources = os.path.join(site.config['CACHE_FOLDER'], 'extra-formula-sources')
127+
128+
if not hasattr(site, 'latex_formula_collectors'):
129+
site.latex_formula_collectors = []
130+
site.latex_formula_collectors.append(self._collect_formulas)
131+
132+
def create_context(self):
133+
"""Create a FormulaContext object."""
134+
return FormulaContext(self.__formula_scale, self.__formula_color)
135+
136+
def get_extra_targets(self, post, lang, dest):
137+
"""Return a list of extra formula-related targets."""
138+
return [self._get_formulae_filename(post, lang)]
139+
140+
def add_extra_deps(self, post, lang, what, where):
141+
"""Return a list of extra dependencies for given post and language."""
142+
if what == 'uptodate' and where == 'fragment':
143+
return [nikola.utils.config_changed({
144+
'scale': self.__formula_scale,
145+
'color': list(self.__formula_color),
146+
}, 'latex_formula_image_renderer:config')]
147+
return []
148+
149+
def _write_formulae(self, latex_context, filename):
150+
"""Write used LaTeX formulae into JSON-encoded file."""
151+
formulae = sorted(latex_context.get_plugin_data(self.name, 'formulae', []))
152+
with open(filename, "wb") as f:
153+
f.write(json.dumps(formulae, sort_keys=True).encode('utf-8'))
154+
155+
def write_extra_targets(self, post, lang, dest, latex_context):
156+
"""Write extra formula-related targets."""
157+
self._write_formulae(latex_context, self._get_formulae_filename(post, lang))
158+
159+
def before_processing(self, latex_context, source_path=None, post=None):
160+
"""Add information to context before post is processed."""
161+
latex_context.store_plugin_data(self.name, 'formulae', [])
162+
163+
def after_processing(self, latex_context, source_path=None, post=None):
164+
"""Retrieve information from context after post is processed."""
165+
if post is None and source_path is not None:
166+
fn = os.path.join(self.__extra_formula_sources, source_path, '.texfor')
167+
nikola.utils.makedirs(os.path.split(fn)[0])
168+
self._write_formulae(latex_context, fn)
169+
170+
def modify_result(self, output, latex_context):
171+
"""Modify generated HTML output."""
172+
return output
173+
174+
def render(self, formula, formula_context, formula_type, latex_context):
175+
"""Produce HTML code which displays the formula.
176+
177+
formula: LaTeX code for the formula (excluding environment/delimiters)
178+
formula_context: a FormulaContext object created by this object (or a clone of it)
179+
formula_type: one of 'inline', 'display', 'align', 'pstricks', 'tikzpicture'
180+
latex_context: the LaTeX context object
181+
"""
182+
try:
183+
lfr = self.site.latex_formula_renderer
184+
except:
185+
LOGGER.error("Cannot find latex formula rendering plugin!")
186+
sys.exit(1)
187+
src, width, height = lfr.compile(formula, formula_context.color, formula_context.scale, formula_type)
188+
latex_context.get_plugin_data(self.name, 'formulae', []).append((formula, formula_context.color, formula_context.scale, formula_type))
189+
alt_text = _escape_html_argument(formula).strip()
190+
css_type = formula_type
191+
return "<img class='img-{0}-formula img-formula' width='{1}' height='{2}' src='{3}' alt='{4}' title='{4}' />".format(css_type, width, height, src, alt_text)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
[Core]
2+
Name = latex_formula_mathjax
3+
Module = latex_formula_mathjax
4+
5+
[Nikola]
6+
Compiler = latex
7+
PluginCategory = CompilerExtension
8+
9+
[Documentation]
10+
Author = Felix Fontein
11+
Version = 0.1
12+
Description = Provides mathjax-based formula rendering
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
# -*- coding: utf-8 -*-
2+
3+
# Copyright © 2014-2017 Felix Fontein
4+
#
5+
# Permission is hereby granted, free of charge, to any
6+
# person obtaining a copy of this software and associated
7+
# documentation files (the "Software"), to deal in the
8+
# Software without restriction, including without limitation
9+
# the rights to use, copy, modify, merge, publish,
10+
# distribute, sublicense, and/or sell copies of the
11+
# Software, and to permit persons to whom the Software is
12+
# furnished to do so, subject to the following conditions:
13+
#
14+
# The above copyright notice and this permission notice
15+
# shall be included in all copies or substantial portions of
16+
# the Software.
17+
#
18+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
19+
# KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
20+
# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
21+
# PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS
22+
# OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
23+
# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
24+
# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25+
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26+
27+
"""Display formulas for LaTeX post compiler via MathJax."""
28+
29+
from __future__ import unicode_literals
30+
31+
import nikola.plugin_categories
32+
import nikola.utils
33+
34+
LOGGER = nikola.utils.get_logger('compile_latex.formula.mathjax', nikola.utils.STDERR_HANDLER)
35+
36+
37+
class FormulaContext(object):
38+
"""Stores information about the formula renderer."""
39+
40+
def clone(self):
41+
"""Clone this FormulaContext object."""
42+
return FormulaContext()
43+
44+
45+
class MathJaxFormulaRenderer(nikola.plugin_categories.CompilerExtension):
46+
"""Show LaTeX formulae via MathJax. Supports only inline and display-style formulae."""
47+
48+
name = 'latex_formula_mathjax'
49+
compiler_name = 'latex'
50+
latex_plugin_type = 'formula_renderer'
51+
52+
def __init__(self):
53+
"""Initialize plugin."""
54+
super(MathJaxFormulaRenderer, self).__init__()
55+
self.__script_origin = '//cdn.mathjax.org/mathjax/latest/MathJax.js'
56+
self.__delimiters = {
57+
'inline': r'\({0}\)',
58+
'display': r'$${0}$$'
59+
}
60+
61+
def set_site(self, site):
62+
"""Set Nikola site object."""
63+
super(MathJaxFormulaRenderer, self).set_site(site)
64+
self.__script_origin = site.config.get('LATEX_MATHJAX_SCRIPT_ORIGIN', self.__script_origin)
65+
66+
def create_context(self):
67+
"""Create a FormulaContext object."""
68+
return FormulaContext()
69+
70+
def get_extra_targets(self, post, lang, dest):
71+
"""Return a list of extra formula-related targets."""
72+
return []
73+
74+
def add_extra_deps(self, post, lang, what, where):
75+
"""Return a list of extra dependencies for given post and language."""
76+
if what == 'uptodate' and where == 'fragment':
77+
return [nikola.utils.config_changed({
78+
'script_origin': self.__script_origin,
79+
'delimiters': self.__delimiters,
80+
}, 'latex_formula_mathjax:config')]
81+
return []
82+
83+
def write_extra_targets(self, post, lang, dest, latex_context):
84+
"""Write extra formula-related targets."""
85+
pass
86+
87+
def before_processing(self, latex_context, source_path=None, post=None):
88+
"""Add information to context before post is processed."""
89+
pass
90+
91+
def after_processing(self, latex_context, source_path=None, post=None):
92+
"""Retrieve information from context after post is processed."""
93+
pass
94+
95+
def modify_result(self, output, latex_context):
96+
"""Modify generated HTML output."""
97+
prefix = '''<script type="text/x-mathjax-config">MathJax.Hub.Config({tex2jax: {inlineMath: [['\\\\(','\\\\)']]}});</script>'''
98+
prefix += '''<script type="application/javascript" src="''' + self.__script_origin + '''?config=TeX-AMS_HTML-full"></script>'''
99+
return prefix + output
100+
101+
def render(self, formula, formula_context, formula_type, latex_context):
102+
"""Produce HTML code which displays the formula.
103+
104+
formula: LaTeX code for the formula (excluding environment/delimiters)
105+
formula_context: a FormulaContext object created by this object (or a clone of it)
106+
formula_type: one of 'inline', 'display', 'align', 'pstricks', 'tikzpicture'
107+
latex_context: the LaTeX context object
108+
"""
109+
if formula_type not in self.__delimiters:
110+
raise NotImplementedError("Formula type '{}' is not supported by MathJax formula rendering backend!".format(formula_type))
111+
112+
return self.__delimiters[formula_type].format(formula)

‎v7/latex/latex/tokenizer.py

+378
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,378 @@
1+
# -*- coding: utf-8 -*-
2+
3+
# Copyright © 2014-2017 Felix Fontein
4+
#
5+
# Permission is hereby granted, free of charge, to any
6+
# person obtaining a copy of this software and associated
7+
# documentation files (the "Software"), to deal in the
8+
# Software without restriction, including without limitation
9+
# the rights to use, copy, modify, merge, publish,
10+
# distribute, sublicense, and/or sell copies of the
11+
# Software, and to permit persons to whom the Software is
12+
# furnished to do so, subject to the following conditions:
13+
#
14+
# The above copyright notice and this permission notice
15+
# shall be included in all copies or substantial portions of
16+
# the Software.
17+
#
18+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
19+
# KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
20+
# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
21+
# PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS
22+
# OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
23+
# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
24+
# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25+
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26+
27+
"""A basic LaTeX tokenizer."""
28+
29+
from __future__ import unicode_literals
30+
31+
import nikola.utils
32+
33+
from enum import Enum
34+
35+
LOGGER = nikola.utils.get_logger('compile_latex.tokenizer', nikola.utils.STDERR_HANDLER)
36+
37+
38+
class Token(Enum):
39+
"""Represents a single token."""
40+
41+
Whitespace = 1
42+
NonbreakableWhitespace = 2
43+
Text = 3
44+
EscapedText = 4
45+
Command = 5 # '\' followed by text
46+
InlineFormulaDelimiter = 6 # just '$' (the alternative, '\(', is a Command)
47+
DisplayFormulaDelimiter = 7 # just '$$' (the alternative, '\[', is a Command)
48+
CurlyBraketOpen = 8 # '{'
49+
CurlyBraketClose = 9 # '}'
50+
SquareBraketOpen = 10 # '['
51+
SquareBraketClose = 11 # ']'
52+
DoubleNewLine = 12
53+
Comment = 13 # '%'
54+
ForcedLineBreak = 14 # '\\'
55+
TableColumnDelimiter = 15 # '&'
56+
57+
58+
def _compute_position(input, index):
59+
"""Compute line/column position given an index in a string."""
60+
line = 1
61+
col = 1
62+
eol = None # last end of line character
63+
for c in input[:index]:
64+
if c == '\n' or c == '\r':
65+
if eol is None or eol == c:
66+
eol = c
67+
line += 1
68+
col = 1
69+
else:
70+
# ignore second of '\n\r' and '\r\n' sequences
71+
eol = None
72+
else:
73+
col += 1
74+
return (line, col)
75+
76+
77+
class Tokenizer:
78+
"""A simple tokenizer."""
79+
80+
def _is_whitespace(self, char):
81+
"""Check for whitespace."""
82+
return ord(char) <= 32
83+
84+
def _is_line_break(self, char):
85+
"""Check for line breaks."""
86+
return ord(char) == 10 or ord(char) == 13
87+
88+
def _is_command_char(self, char):
89+
"""Check for a command character."""
90+
return (char >= 'A' and char <= 'Z') or (char >= 'a' and char <= 'z') or (char == '@')
91+
92+
def _eat_whitespace(self):
93+
"""Skip whitespace and return number of contained line breaks."""
94+
number_of_line_breaks = 0
95+
last_line_break = None
96+
while self._position < len(self._input):
97+
if not self._is_whitespace(self._input[self._position]):
98+
break
99+
if self._is_line_break(self._input[self._position]):
100+
if last_line_break is None or last_line_break == self._input[self._position]:
101+
number_of_line_breaks += 1
102+
last_line_break = self._input[self._position]
103+
else:
104+
last_line_break = None
105+
self._position += 1
106+
return number_of_line_breaks
107+
108+
def _eat_comment(self):
109+
"""Skip comment's content."""
110+
start = self._position
111+
last_line_break = None
112+
had_line_break = False
113+
while self._position < len(self._input):
114+
if had_line_break and not self._is_whitespace(self._input[self._position]):
115+
break
116+
if self._is_line_break(self._input[self._position]):
117+
if last_line_break is None or last_line_break == self._input[self._position]:
118+
if had_line_break:
119+
break
120+
last_line_break = self._input[self._position]
121+
had_line_break = True
122+
else:
123+
last_line_break = None
124+
self._position += 1
125+
return self._input[start:self._position]
126+
127+
def _read_text(self, strict):
128+
"""Read text."""
129+
start = self._position
130+
while self._position < len(self._input):
131+
char = self._input[self._position]
132+
if self._is_whitespace(char):
133+
break
134+
if char == "~" or char == "{" or char == "}" or char == "$" or char == "[" or char == "]" or char == "$" or char == "\\" or char == "&":
135+
break
136+
if strict and not self._is_command_char(char):
137+
break
138+
self._position += 1
139+
return self._input[start:self._position]
140+
141+
def _find_next(self):
142+
"""Find next token."""
143+
self._token = None
144+
self._token_value = None
145+
self._token_begin_index = None
146+
self._token_end_index = None
147+
if (self._position >= len(self._input)):
148+
return
149+
self._token_begin_index = self._position
150+
char = self._input[self._position]
151+
if self._is_whitespace(char):
152+
number_of_line_breaks = self._eat_whitespace()
153+
if number_of_line_breaks > 1:
154+
self._token = Token.DoubleNewLine
155+
else:
156+
self._token = Token.Whitespace
157+
elif char == "~":
158+
self._token = Token.NonbreakableWhitespace
159+
self._position += 1
160+
elif char == '&':
161+
self._token = Token.TableColumnDelimiter
162+
self._position += 1
163+
elif char == "{":
164+
self._token = Token.CurlyBraketOpen
165+
self._position += 1
166+
elif char == "}":
167+
self._token = Token.CurlyBraketClose
168+
self._position += 1
169+
elif char == "[":
170+
self._token = Token.SquareBraketOpen
171+
self._position += 1
172+
elif char == "]":
173+
self._token = Token.SquareBraketClose
174+
self._position += 1
175+
elif char == "$":
176+
self._token = Token.InlineFormulaDelimiter
177+
self._position += 1
178+
if self._position < len(self._input) and self._input[self._position] == "$":
179+
self._token = Token.DisplayFormulaDelimiter
180+
self._position += 1
181+
elif char == "\\":
182+
self._position += 1
183+
if self._position == len(self._input):
184+
raise "Reached end of text after '\\'"
185+
self._token = Token.Command
186+
cmd = self._read_text(True)
187+
if len(cmd) == 0:
188+
ch = self._input[self._position]
189+
if ch == '(' or ch == ')' or ch == '[' or ch == ']':
190+
self._token_value = ch
191+
elif ch == '\\':
192+
self._token = Token.ForcedLineBreak
193+
else:
194+
self._token = Token.EscapedText
195+
self._token_value = ch
196+
self._position += 1
197+
else:
198+
self._token_value = cmd
199+
elif char == '%':
200+
self._token = Token.Comment
201+
self._position += 1
202+
self._token_value = self._eat_comment()
203+
else:
204+
self._token = Token.Text
205+
self._token_value = self._read_text(False)
206+
self._token_end_index = self._position
207+
208+
def __init__(self, input):
209+
"""Initialize tokenizer with input unicode string ``input``."""
210+
self._input = input
211+
self._position = 0
212+
self._find_next()
213+
214+
def has_token(self):
215+
"""Whether a token is available."""
216+
return self._token is not None
217+
218+
def token_type(self):
219+
"""Return type of current token."""
220+
return self._token
221+
222+
def token_value(self):
223+
"""Return value of current token."""
224+
# only if token_type() returns Token.Text or Token.Command
225+
return self._token_value
226+
227+
def token_begin_index(self):
228+
"""Return beginning of token in input string."""
229+
return self._token_begin_index
230+
231+
def token_end_index(self):
232+
"""Return end of token in input string."""
233+
return self._token_end_index
234+
235+
def next(self):
236+
"""Proceed to next token."""
237+
if self._token is not None:
238+
self._find_next()
239+
240+
def get_substring(self, start_index, end_index):
241+
"""Return substring of input string."""
242+
return self._input[start_index:end_index]
243+
244+
def get_position(self, index):
245+
"""Retrieve position as (line, column) pair in input string."""
246+
return _compute_position(self._input, index)
247+
248+
249+
class TokenStream:
250+
"""Represent the output of a Tokenizer as a stream of tokens, allowing to peek ahead."""
251+
252+
def _fill_ahead(self, count):
253+
"""Fill ahead buffer."""
254+
if len(self.__ahead) < count:
255+
for i in range(len(self.__ahead), count):
256+
if self.__tokenizer.has_token():
257+
self.__ahead.append((self.__tokenizer.token_type(), self.__tokenizer.token_value()))
258+
self.__ahead_indices.append((self.__tokenizer.token_begin_index(), self.__tokenizer.token_end_index()))
259+
self.__tokenizer.next()
260+
else:
261+
self.__ahead.append((None, None))
262+
self.__ahead_indices.append((None, None))
263+
264+
def __init__(self, input):
265+
"""Create TokenStream from input unicode string. Creates Tokenizer."""
266+
self.__tokenizer = Tokenizer(input)
267+
self.__ahead = list()
268+
self.__ahead_indices = list()
269+
270+
def current(self):
271+
"""Get current token. Return pair (type, value)."""
272+
self._fill_ahead(1)
273+
return self.__ahead[0]
274+
275+
def current_indices(self):
276+
"""Get current token indices in input string."""
277+
self._fill_ahead(1)
278+
return self.__ahead_indices[0]
279+
280+
def current_type(self):
281+
"""Get current token type."""
282+
self._fill_ahead(1)
283+
return self.__ahead[0][0]
284+
285+
def current_value(self):
286+
"""Get current token value."""
287+
self._fill_ahead(1)
288+
return self.__ahead[0][1]
289+
290+
def has_current(self):
291+
"""Return True if current token is available."""
292+
self._fill_ahead(1)
293+
return self.__ahead[0][0] is not None
294+
295+
def skip_current(self, count=1):
296+
"""Skip number of tokens."""
297+
assert count >= 0
298+
self._fill_ahead(count)
299+
self.__ahead = self.__ahead[count:]
300+
self.__ahead_indices = self.__ahead_indices[count:]
301+
302+
def peek(self, index):
303+
"""Peek ahead in token stream. Return pair (type, value)."""
304+
assert index >= 0
305+
self._fill_ahead(index + 1)
306+
return self.__ahead[index]
307+
308+
def peek_indices(self, index):
309+
"""Peek ahead in token stream. Return indices of token in input string."""
310+
assert index >= 0
311+
self._fill_ahead(index + 1)
312+
return self.__ahead_indices[index]
313+
314+
def peek_type(self, index):
315+
"""Peek ahead in token stream. Return token's type."""
316+
assert index >= 0
317+
self._fill_ahead(index + 1)
318+
return self.__ahead[index][0]
319+
320+
def peek_value(self, index):
321+
"""Peek ahead in token stream. Return token's value."""
322+
assert index >= 0
323+
self._fill_ahead(index + 1)
324+
return self.__ahead[index][1]
325+
326+
def can_peek(self, index):
327+
"""Check whether token at current index + ``index`` can be peeked at, i.e. whether it exists."""
328+
assert index >= 0
329+
self._fill_ahead(index + 1)
330+
return self.__ahead[index][0] is not None
331+
332+
def get_substring(self, start_index, end_index):
333+
"""Return substring of input string."""
334+
return self.__tokenizer.get_substring(start_index, end_index)
335+
336+
def get_position(self, index):
337+
"""Retrieve position as (line, column) pair in input string."""
338+
return self.__tokenizer.get_position(index)
339+
340+
def set_value(self, index, new_value):
341+
"""Set value of token at current index + ``index`` to ``new_value``.
342+
343+
Use with care!
344+
"""
345+
assert index >= 0
346+
self._fill_ahead(index + 1)
347+
self.__ahead[index] = (self.__ahead[index][0], new_value)
348+
349+
350+
def recombine_tokens(tokens):
351+
"""Recombine list of tokens as string."""
352+
result = ""
353+
for type, value in tokens:
354+
if type == Token.Whitespace:
355+
result += " "
356+
if type == Token.NonbreakableWhitespace:
357+
result += "~"
358+
elif type == Token.Text:
359+
result += value
360+
elif type == Token.EscapedText:
361+
result += "\\{}".format(value)
362+
elif type == Token.Command:
363+
result += "\\{}".format(value)
364+
elif type == Token.InlineFormulaDelimiter:
365+
result += "$"
366+
elif type == Token.DisplayFormulaDelimiter:
367+
result += "$$"
368+
elif type == Token.CurlyBraketOpen:
369+
result += "{"
370+
elif type == Token.CurlyBraketClose:
371+
result += "}"
372+
elif type == Token.SquareBraketOpen:
373+
result += "["
374+
elif type == Token.SquareBraketClose:
375+
result += "]"
376+
elif type == Token.DoubleNewLine:
377+
result += "\n\n"
378+
return result

‎v7/latex/latex/tree.py

+876
Large diffs are not rendered by default.

‎v7/latex/requirements-plugins.txt

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
latex_formula_renderer

0 commit comments

Comments
 (0)
Please sign in to comment.