346 """ |
345 """ |
347 Metaclass for RegexLexer, creates the self._tokens attribute from |
346 Metaclass for RegexLexer, creates the self._tokens attribute from |
348 self.tokens on the first instantiation. |
347 self.tokens on the first instantiation. |
349 """ |
348 """ |
350 |
349 |
|
350 def _process_regex(cls, regex, rflags): |
|
351 """Preprocess the regular expression component of a token definition.""" |
|
352 return re.compile(regex, rflags).match |
|
353 |
|
354 def _process_token(cls, token): |
|
355 """Preprocess the token component of a token definition.""" |
|
356 assert type(token) is _TokenType or hasattr(token, '__call__'), \ |
|
357 'token type must be simple type or callable, not %r' % (token,) |
|
358 return token |
|
359 |
|
360 def _process_new_state(cls, new_state, unprocessed, processed): |
|
361 """Preprocess the state transition action of a token definition.""" |
|
362 if isinstance(new_state, str): |
|
363 # an existing state |
|
364 if new_state == '#pop': |
|
365 return -1 |
|
366 elif new_state in unprocessed: |
|
367 return (new_state,) |
|
368 elif new_state == '#push': |
|
369 return new_state |
|
370 elif new_state[:5] == '#pop:': |
|
371 return -int(new_state[5:]) |
|
372 else: |
|
373 assert False, 'unknown new state %r' % new_state |
|
374 elif isinstance(new_state, combined): |
|
375 # combine a new state from existing ones |
|
376 tmp_state = '_tmp_%d' % cls._tmpname |
|
377 cls._tmpname += 1 |
|
378 itokens = [] |
|
379 for istate in new_state: |
|
380 assert istate != new_state, 'circular state ref %r' % istate |
|
381 itokens.extend(cls._process_state(unprocessed, |
|
382 processed, istate)) |
|
383 processed[tmp_state] = itokens |
|
384 return (tmp_state,) |
|
385 elif isinstance(new_state, tuple): |
|
386 # push more than one state |
|
387 for istate in new_state: |
|
388 assert (istate in unprocessed or |
|
389 istate in ('#pop', '#push')), \ |
|
390 'unknown new state ' + istate |
|
391 return new_state |
|
392 else: |
|
393 assert False, 'unknown new state def %r' % new_state |
|
394 |
351 def _process_state(cls, unprocessed, processed, state): |
395 def _process_state(cls, unprocessed, processed, state): |
|
396 """Preprocess a single state definition.""" |
352 assert type(state) is str, "wrong state name %r" % state |
397 assert type(state) is str, "wrong state name %r" % state |
353 assert state[0] != '#', "invalid state name %r" % state |
398 assert state[0] != '#', "invalid state name %r" % state |
354 if state in processed: |
399 if state in processed: |
355 return processed[state] |
400 return processed[state] |
356 tokens = processed[state] = [] |
401 tokens = processed[state] = [] |
357 rflags = cls.flags |
402 rflags = cls.flags |
358 for tdef in unprocessed[state]: |
403 for tdef in unprocessed[state]: |
359 if isinstance(tdef, include): |
404 if isinstance(tdef, include): |
360 # it's a state reference |
405 # it's a state reference |
361 assert tdef != state, "circular state reference %r" % state |
406 assert tdef != state, "circular state reference %r" % state |
362 tokens.extend(cls._process_state(unprocessed, processed, str(tdef))) |
407 tokens.extend(cls._process_state(unprocessed, processed, |
|
408 str(tdef))) |
363 continue |
409 continue |
364 |
410 |
365 assert type(tdef) is tuple, "wrong rule def %r" % tdef |
411 assert type(tdef) is tuple, "wrong rule def %r" % tdef |
366 |
412 |
367 try: |
413 try: |
368 rex = re.compile(tdef[0], rflags).match |
414 rex = cls._process_regex(tdef[0], rflags) |
369 except Exception as err: |
415 except Exception as err: |
370 raise ValueError("uncompilable regex %r in state %r of %r: %s" % |
416 raise ValueError("uncompilable regex %r in state %r of %r: %s" % |
371 (tdef[0], state, cls, err)) |
417 (tdef[0], state, cls, err)) |
372 |
418 |
373 assert type(tdef[1]) is _TokenType or isinstance(tdef[1], collections.Callable), \ |
419 token = cls._process_token(tdef[1]) |
374 'token type must be simple type or callable, not %r' % (tdef[1],) |
|
375 |
420 |
376 if len(tdef) == 2: |
421 if len(tdef) == 2: |
377 new_state = None |
422 new_state = None |
378 else: |
423 else: |
379 tdef2 = tdef[2] |
424 new_state = cls._process_new_state(tdef[2], |
380 if isinstance(tdef2, str): |
425 unprocessed, processed) |
381 # an existing state |
426 |
382 if tdef2 == '#pop': |
427 tokens.append((rex, token, new_state)) |
383 new_state = -1 |
|
384 elif tdef2 in unprocessed: |
|
385 new_state = (tdef2,) |
|
386 elif tdef2 == '#push': |
|
387 new_state = tdef2 |
|
388 elif tdef2[:5] == '#pop:': |
|
389 new_state = -int(tdef2[5:]) |
|
390 else: |
|
391 assert False, 'unknown new state %r' % tdef2 |
|
392 elif isinstance(tdef2, combined): |
|
393 # combine a new state from existing ones |
|
394 new_state = '_tmp_%d' % cls._tmpname |
|
395 cls._tmpname += 1 |
|
396 itokens = [] |
|
397 for istate in tdef2: |
|
398 assert istate != state, 'circular state ref %r' % istate |
|
399 itokens.extend(cls._process_state(unprocessed, |
|
400 processed, istate)) |
|
401 processed[new_state] = itokens |
|
402 new_state = (new_state,) |
|
403 elif isinstance(tdef2, tuple): |
|
404 # push more than one state |
|
405 for state in tdef2: |
|
406 assert (state in unprocessed or |
|
407 state in ('#pop', '#push')), \ |
|
408 'unknown new state ' + state |
|
409 new_state = tdef2 |
|
410 else: |
|
411 assert False, 'unknown new state def %r' % tdef2 |
|
412 tokens.append((rex, tdef[1], new_state)) |
|
413 return tokens |
428 return tokens |
414 |
429 |
415 def process_tokendef(cls, name, tokendefs=None): |
430 def process_tokendef(cls, name, tokendefs=None): |
|
431 """Preprocess a dictionary of token definitions.""" |
416 processed = cls._all_tokens[name] = {} |
432 processed = cls._all_tokens[name] = {} |
417 tokendefs = tokendefs or cls.tokens[name] |
433 tokendefs = tokendefs or cls.tokens[name] |
418 for state in list(list(tokendefs.keys())): |
434 for state in list(tokendefs.keys()): |
419 cls._process_state(tokendefs, processed, state) |
435 cls._process_state(tokendefs, processed, state) |
420 return processed |
436 return processed |
421 |
437 |
422 def __call__(cls, *args, **kwds): |
438 def __call__(cls, *args, **kwds): |
|
439 """Instantiate cls after preprocessing its token definitions.""" |
423 if not hasattr(cls, '_tokens'): |
440 if not hasattr(cls, '_tokens'): |
424 cls._all_tokens = {} |
441 cls._all_tokens = {} |
425 cls._tmpname = 0 |
442 cls._tmpname = 0 |
426 if hasattr(cls, 'token_variants') and cls.token_variants: |
443 if hasattr(cls, 'token_variants') and cls.token_variants: |
427 # don't process yet |
444 # don't process yet |