ThirdParty/Pygments/pygments/lexer.py

changeset 808
8f85926125ef
parent 684
2f29a0b6e1c7
child 1705
b0fbc9300f2b
equal deleted inserted replaced
805:83ca4d1ff648 808:8f85926125ef
13 from pygments.filter import apply_filters, Filter 13 from pygments.filter import apply_filters, Filter
14 from pygments.filters import get_filter_by_name 14 from pygments.filters import get_filter_by_name
15 from pygments.token import Error, Text, Other, _TokenType 15 from pygments.token import Error, Text, Other, _TokenType
16 from pygments.util import get_bool_opt, get_int_opt, get_list_opt, \ 16 from pygments.util import get_bool_opt, get_int_opt, get_list_opt, \
17 make_analysator 17 make_analysator
18 import collections
19 18
20 19
21 __all__ = ['Lexer', 'RegexLexer', 'ExtendedRegexLexer', 'DelegatingLexer', 20 __all__ = ['Lexer', 'RegexLexer', 'ExtendedRegexLexer', 'DelegatingLexer',
22 'LexerContext', 'include', 'bygroups', 'using', 'this'] 21 'LexerContext', 'include', 'bygroups', 'using', 'this']
23 22
346 """ 345 """
347 Metaclass for RegexLexer, creates the self._tokens attribute from 346 Metaclass for RegexLexer, creates the self._tokens attribute from
348 self.tokens on the first instantiation. 347 self.tokens on the first instantiation.
349 """ 348 """
350 349
350 def _process_regex(cls, regex, rflags):
351 """Preprocess the regular expression component of a token definition."""
352 return re.compile(regex, rflags).match
353
354 def _process_token(cls, token):
355 """Preprocess the token component of a token definition."""
356 assert type(token) is _TokenType or hasattr(token, '__call__'), \
357 'token type must be simple type or callable, not %r' % (token,)
358 return token
359
360 def _process_new_state(cls, new_state, unprocessed, processed):
361 """Preprocess the state transition action of a token definition."""
362 if isinstance(new_state, str):
363 # an existing state
364 if new_state == '#pop':
365 return -1
366 elif new_state in unprocessed:
367 return (new_state,)
368 elif new_state == '#push':
369 return new_state
370 elif new_state[:5] == '#pop:':
371 return -int(new_state[5:])
372 else:
373 assert False, 'unknown new state %r' % new_state
374 elif isinstance(new_state, combined):
375 # combine a new state from existing ones
376 tmp_state = '_tmp_%d' % cls._tmpname
377 cls._tmpname += 1
378 itokens = []
379 for istate in new_state:
380 assert istate != new_state, 'circular state ref %r' % istate
381 itokens.extend(cls._process_state(unprocessed,
382 processed, istate))
383 processed[tmp_state] = itokens
384 return (tmp_state,)
385 elif isinstance(new_state, tuple):
386 # push more than one state
387 for istate in new_state:
388 assert (istate in unprocessed or
389 istate in ('#pop', '#push')), \
390 'unknown new state ' + istate
391 return new_state
392 else:
393 assert False, 'unknown new state def %r' % new_state
394
351 def _process_state(cls, unprocessed, processed, state): 395 def _process_state(cls, unprocessed, processed, state):
396 """Preprocess a single state definition."""
352 assert type(state) is str, "wrong state name %r" % state 397 assert type(state) is str, "wrong state name %r" % state
353 assert state[0] != '#', "invalid state name %r" % state 398 assert state[0] != '#', "invalid state name %r" % state
354 if state in processed: 399 if state in processed:
355 return processed[state] 400 return processed[state]
356 tokens = processed[state] = [] 401 tokens = processed[state] = []
357 rflags = cls.flags 402 rflags = cls.flags
358 for tdef in unprocessed[state]: 403 for tdef in unprocessed[state]:
359 if isinstance(tdef, include): 404 if isinstance(tdef, include):
360 # it's a state reference 405 # it's a state reference
361 assert tdef != state, "circular state reference %r" % state 406 assert tdef != state, "circular state reference %r" % state
362 tokens.extend(cls._process_state(unprocessed, processed, str(tdef))) 407 tokens.extend(cls._process_state(unprocessed, processed,
408 str(tdef)))
363 continue 409 continue
364 410
365 assert type(tdef) is tuple, "wrong rule def %r" % tdef 411 assert type(tdef) is tuple, "wrong rule def %r" % tdef
366 412
367 try: 413 try:
368 rex = re.compile(tdef[0], rflags).match 414 rex = cls._process_regex(tdef[0], rflags)
369 except Exception as err: 415 except Exception as err:
370 raise ValueError("uncompilable regex %r in state %r of %r: %s" % 416 raise ValueError("uncompilable regex %r in state %r of %r: %s" %
371 (tdef[0], state, cls, err)) 417 (tdef[0], state, cls, err))
372 418
373 assert type(tdef[1]) is _TokenType or isinstance(tdef[1], collections.Callable), \ 419 token = cls._process_token(tdef[1])
374 'token type must be simple type or callable, not %r' % (tdef[1],)
375 420
376 if len(tdef) == 2: 421 if len(tdef) == 2:
377 new_state = None 422 new_state = None
378 else: 423 else:
379 tdef2 = tdef[2] 424 new_state = cls._process_new_state(tdef[2],
380 if isinstance(tdef2, str): 425 unprocessed, processed)
381 # an existing state 426
382 if tdef2 == '#pop': 427 tokens.append((rex, token, new_state))
383 new_state = -1
384 elif tdef2 in unprocessed:
385 new_state = (tdef2,)
386 elif tdef2 == '#push':
387 new_state = tdef2
388 elif tdef2[:5] == '#pop:':
389 new_state = -int(tdef2[5:])
390 else:
391 assert False, 'unknown new state %r' % tdef2
392 elif isinstance(tdef2, combined):
393 # combine a new state from existing ones
394 new_state = '_tmp_%d' % cls._tmpname
395 cls._tmpname += 1
396 itokens = []
397 for istate in tdef2:
398 assert istate != state, 'circular state ref %r' % istate
399 itokens.extend(cls._process_state(unprocessed,
400 processed, istate))
401 processed[new_state] = itokens
402 new_state = (new_state,)
403 elif isinstance(tdef2, tuple):
404 # push more than one state
405 for state in tdef2:
406 assert (state in unprocessed or
407 state in ('#pop', '#push')), \
408 'unknown new state ' + state
409 new_state = tdef2
410 else:
411 assert False, 'unknown new state def %r' % tdef2
412 tokens.append((rex, tdef[1], new_state))
413 return tokens 428 return tokens
414 429
415 def process_tokendef(cls, name, tokendefs=None): 430 def process_tokendef(cls, name, tokendefs=None):
431 """Preprocess a dictionary of token definitions."""
416 processed = cls._all_tokens[name] = {} 432 processed = cls._all_tokens[name] = {}
417 tokendefs = tokendefs or cls.tokens[name] 433 tokendefs = tokendefs or cls.tokens[name]
418 for state in list(list(tokendefs.keys())): 434 for state in list(tokendefs.keys()):
419 cls._process_state(tokendefs, processed, state) 435 cls._process_state(tokendefs, processed, state)
420 return processed 436 return processed
421 437
422 def __call__(cls, *args, **kwds): 438 def __call__(cls, *args, **kwds):
439 """Instantiate cls after preprocessing its token definitions."""
423 if not hasattr(cls, '_tokens'): 440 if not hasattr(cls, '_tokens'):
424 cls._all_tokens = {} 441 cls._all_tokens = {}
425 cls._tmpname = 0 442 cls._tmpname = 0
426 if hasattr(cls, 'token_variants') and cls.token_variants: 443 if hasattr(cls, 'token_variants') and cls.token_variants:
427 # don't process yet 444 # don't process yet

eric ide

mercurial