diff options
Diffstat (limited to 'module/lib/jinja2/lexer.py')
-rw-r--r-- | module/lib/jinja2/lexer.py | 96 |
1 files changed, 74 insertions, 22 deletions
diff --git a/module/lib/jinja2/lexer.py b/module/lib/jinja2/lexer.py index 0d3f69617..a50128507 100644 --- a/module/lib/jinja2/lexer.py +++ b/module/lib/jinja2/lexer.py @@ -15,10 +15,13 @@ :license: BSD, see LICENSE for more details. """ import re + from operator import itemgetter from collections import deque from jinja2.exceptions import TemplateSyntaxError -from jinja2.utils import LRUCache, next +from jinja2.utils import LRUCache +from jinja2._compat import next, iteritems, implements_iterator, text_type, \ + intern # cache for the lexers. Exists in order to be able to have multiple @@ -126,7 +129,7 @@ operators = { ';': TOKEN_SEMICOLON } -reverse_operators = dict([(v, k) for k, v in operators.iteritems()]) +reverse_operators = dict([(v, k) for k, v in iteritems(operators)]) assert len(operators) == len(reverse_operators), 'operators dropped' operator_re = re.compile('(%s)' % '|'.join(re.escape(x) for x in sorted(operators, key=lambda x: -len(x)))) @@ -197,7 +200,7 @@ def compile_rules(environment): if environment.line_statement_prefix is not None: rules.append((len(environment.line_statement_prefix), 'linestatement', - r'^\s*' + e(environment.line_statement_prefix))) + r'^[ \t\v]*' + e(environment.line_statement_prefix))) if environment.line_comment_prefix is not None: rules.append((len(environment.line_comment_prefix), 'linecomment', r'(?:^|(?<=\S))[^\S\r\n]*' + @@ -262,6 +265,7 @@ class Token(tuple): ) +@implements_iterator class TokenStreamIterator(object): """The iterator for tokenstreams. Iterate over the stream until the eof token is reached. @@ -273,7 +277,7 @@ class TokenStreamIterator(object): def __iter__(self): return self - def next(self): + def __next__(self): token = self.stream.current if token.type is TOKEN_EOF: self.stream.close() @@ -282,6 +286,7 @@ class TokenStreamIterator(object): return token +@implements_iterator class TokenStream(object): """A token stream is an iterable that yields :class:`Token`\s. The parser however does not iterate over it but calls :meth:`next` to go @@ -289,7 +294,7 @@ class TokenStream(object): """ def __init__(self, generator, name, filename): - self._next = iter(generator).next + self._iter = iter(generator) self._pushed = deque() self.name = name self.filename = filename @@ -300,8 +305,9 @@ class TokenStream(object): def __iter__(self): return TokenStreamIterator(self) - def __nonzero__(self): + def __bool__(self): return bool(self._pushed) or self.current.type is not TOKEN_EOF + __nonzero__ = __bool__ # py2 eos = property(lambda x: not x, doc="Are we at the end of the stream?") @@ -319,7 +325,7 @@ class TokenStream(object): def skip(self, n=1): """Got n tokens ahead.""" - for x in xrange(n): + for x in range(n): next(self) def next_if(self, expr): @@ -333,14 +339,14 @@ class TokenStream(object): """Like :meth:`next_if` but only returns `True` or `False`.""" return self.next_if(expr) is not None - def next(self): + def __next__(self): """Go one token ahead and return the old one""" rv = self.current if self._pushed: self.current = self._pushed.popleft() elif self.current.type is not TOKEN_EOF: try: - self.current = self._next() + self.current = next(self._iter) except StopIteration: self.close() return rv @@ -348,7 +354,7 @@ class TokenStream(object): def close(self): """Close the stream.""" self.current = Token(self.current.lineno, TOKEN_EOF, '') - self._next = None + self._iter = None self.closed = True def expect(self, expr): @@ -383,7 +389,9 @@ def get_lexer(environment): environment.line_statement_prefix, environment.line_comment_prefix, environment.trim_blocks, - environment.newline_sequence) + environment.lstrip_blocks, + environment.newline_sequence, + environment.keep_trailing_newline) lexer = _lexer_cache.get(key) if lexer is None: lexer = Lexer(environment) @@ -414,7 +422,7 @@ class Lexer(object): (operator_re, TOKEN_OPERATOR, None) ] - # assamble the root lexing rule. because "|" is ungreedy + # assemble the root lexing rule. because "|" is ungreedy # we have to sort by length so that the lexer continues working # as expected when we have parsing rules like <% for block and # <%= for variables. (if someone wants asp like syntax) @@ -425,7 +433,44 @@ class Lexer(object): # block suffix if trimming is enabled block_suffix_re = environment.trim_blocks and '\\n?' or '' + # strip leading spaces if lstrip_blocks is enabled + prefix_re = {} + if environment.lstrip_blocks: + # use '{%+' to manually disable lstrip_blocks behavior + no_lstrip_re = e('+') + # detect overlap between block and variable or comment strings + block_diff = c(r'^%s(.*)' % e(environment.block_start_string)) + # make sure we don't mistake a block for a variable or a comment + m = block_diff.match(environment.comment_start_string) + no_lstrip_re += m and r'|%s' % e(m.group(1)) or '' + m = block_diff.match(environment.variable_start_string) + no_lstrip_re += m and r'|%s' % e(m.group(1)) or '' + + # detect overlap between comment and variable strings + comment_diff = c(r'^%s(.*)' % e(environment.comment_start_string)) + m = comment_diff.match(environment.variable_start_string) + no_variable_re = m and r'(?!%s)' % e(m.group(1)) or '' + + lstrip_re = r'^[ \t]*' + block_prefix_re = r'%s%s(?!%s)|%s\+?' % ( + lstrip_re, + e(environment.block_start_string), + no_lstrip_re, + e(environment.block_start_string), + ) + comment_prefix_re = r'%s%s%s|%s\+?' % ( + lstrip_re, + e(environment.comment_start_string), + no_variable_re, + e(environment.comment_start_string), + ) + prefix_re['block'] = block_prefix_re + prefix_re['comment'] = comment_prefix_re + else: + block_prefix_re = '%s' % e(environment.block_start_string) + self.newline_sequence = environment.newline_sequence + self.keep_trailing_newline = environment.keep_trailing_newline # global lexing rules self.rules = { @@ -434,11 +479,11 @@ class Lexer(object): (c('(.*?)(?:%s)' % '|'.join( [r'(?P<raw_begin>(?:\s*%s\-|%s)\s*raw\s*(?:\-%s\s*|%s))' % ( e(environment.block_start_string), - e(environment.block_start_string), + block_prefix_re, e(environment.block_end_string), e(environment.block_end_string) )] + [ - r'(?P<%s_begin>\s*%s\-|%s)' % (n, r, r) + r'(?P<%s_begin>\s*%s\-|%s)' % (n, r, prefix_re.get(n,r)) for n, r in root_tag_rules ])), (TOKEN_DATA, '#bygroup'), '#bygroup'), # data @@ -472,7 +517,7 @@ class Lexer(object): TOKEN_RAW_BEGIN: [ (c('(.*?)((?:\s*%s\-|%s)\s*endraw\s*(?:\-%s\s*|%s%s))' % ( e(environment.block_start_string), - e(environment.block_start_string), + block_prefix_re, e(environment.block_end_string), e(environment.block_end_string), block_suffix_re @@ -491,7 +536,7 @@ class Lexer(object): } def _normalize_newlines(self, value): - """Called for strings and template data to normlize it to unicode.""" + """Called for strings and template data to normalize it to unicode.""" return newline_re.sub(self.newline_sequence, value) def tokenize(self, source, name=None, filename=None, state=None): @@ -526,7 +571,7 @@ class Lexer(object): value = self._normalize_newlines(value[1:-1]) \ .encode('ascii', 'backslashreplace') \ .decode('unicode-escape') - except Exception, e: + except Exception as e: msg = str(e).split(':')[-1].strip() raise TemplateSyntaxError(msg, lineno, name, filename) # if we can express it as bytestring (ascii only) @@ -549,7 +594,14 @@ class Lexer(object): """This method tokenizes the text and returns the tokens in a generator. Use this method if you just want to tokenize a template. """ - source = '\n'.join(unicode(source).splitlines()) + source = text_type(source) + lines = source.splitlines() + if self.keep_trailing_newline and source: + for newline in ('\r\n', '\r', '\n'): + if source.endswith(newline): + lines.append('') + break + source = '\n'.join(lines) pos = 0 lineno = 1 stack = ['root'] @@ -571,7 +623,7 @@ class Lexer(object): if m is None: continue - # we only match blocks and variables if brances / parentheses + # we only match blocks and variables if braces / parentheses # are balanced. continue parsing with the lower rule which # is the operator rule. do this only if the end tags look # like operators @@ -590,7 +642,7 @@ class Lexer(object): # yield for the current token the first named # group that matched elif token == '#bygroup': - for key, value in m.groupdict().iteritems(): + for key, value in iteritems(m.groupdict()): if value is not None: yield lineno, key, value lineno += value.count('\n') @@ -647,7 +699,7 @@ class Lexer(object): stack.pop() # resolve the new state by group checking elif new_state == '#bygroup': - for key, value in m.groupdict().iteritems(): + for key, value in iteritems(m.groupdict()): if value is not None: stack.append(key) break @@ -669,7 +721,7 @@ class Lexer(object): # publish new function and start again pos = pos2 break - # if loop terminated without break we havn't found a single match + # if loop terminated without break we haven't found a single match # either we are at the end of the file or we have a problem else: # end of text |