eric6/ThirdParty/Pygments/pygments/lexers/asm.py

changeset 7547
21b0534faebc
parent 6942
2602857055c5
child 7701
25f42e208e08
equal deleted inserted replaced
7546:bf5f777260a6 7547:21b0534faebc
3 pygments.lexers.asm 3 pygments.lexers.asm
4 ~~~~~~~~~~~~~~~~~~~ 4 ~~~~~~~~~~~~~~~~~~~
5 5
6 Lexers for assembly languages. 6 Lexers for assembly languages.
7 7
8 :copyright: Copyright 2006-2017 by the Pygments team, see AUTHORS. 8 :copyright: Copyright 2006-2019 by the Pygments team, see AUTHORS.
9 :license: BSD, see LICENSE for details. 9 :license: BSD, see LICENSE for details.
10 """ 10 """
11 11
12 import re 12 import re
13 13
14 from pygments.lexer import RegexLexer, include, bygroups, using, words, \ 14 from pygments.lexer import RegexLexer, include, bygroups, using, words, \
15 DelegatingLexer 15 DelegatingLexer, default
16 from pygments.lexers.c_cpp import CppLexer, CLexer 16 from pygments.lexers.c_cpp import CppLexer, CLexer
17 from pygments.lexers.d import DLexer 17 from pygments.lexers.d import DLexer
18 from pygments.token import Text, Name, Number, String, Comment, Punctuation, \ 18 from pygments.token import Text, Name, Number, String, Comment, Punctuation, \
19 Other, Keyword, Operator 19 Other, Keyword, Operator, Literal
20 20
21 __all__ = ['GasLexer', 'ObjdumpLexer', 'DObjdumpLexer', 'CppObjdumpLexer', 21 __all__ = ['GasLexer', 'ObjdumpLexer', 'DObjdumpLexer', 'CppObjdumpLexer',
22 'CObjdumpLexer', 'HsailLexer', 'LlvmLexer', 'NasmLexer', 22 'CObjdumpLexer', 'HsailLexer', 'LlvmLexer', 'LlvmMirBodyLexer',
23 'NasmObjdumpLexer', 'TasmLexer', 'Ca65Lexer'] 23 'LlvmMirLexer', 'NasmLexer', 'NasmObjdumpLexer', 'TasmLexer',
24 'Ca65Lexer', 'Dasm16Lexer']
24 25
25 26
26 class GasLexer(RegexLexer): 27 class GasLexer(RegexLexer):
27 """ 28 """
28 For Gas (AT&T) assembly code. 29 For Gas (AT&T) assembly code.
35 #: optional Comment or Whitespace 36 #: optional Comment or Whitespace
36 string = r'"(\\"|[^"])*"' 37 string = r'"(\\"|[^"])*"'
37 char = r'[\w$.@-]' 38 char = r'[\w$.@-]'
38 identifier = r'(?:[a-zA-Z$_]' + char + r'*|\.' + char + '+)' 39 identifier = r'(?:[a-zA-Z$_]' + char + r'*|\.' + char + '+)'
39 number = r'(?:0[xX][a-zA-Z0-9]+|\d+)' 40 number = r'(?:0[xX][a-zA-Z0-9]+|\d+)'
41 register = '%' + identifier
40 42
41 tokens = { 43 tokens = {
42 'root': [ 44 'root': [
43 include('whitespace'), 45 include('whitespace'),
44 (identifier + ':', Name.Label), 46 (identifier + ':', Name.Label),
50 'directive-args': [ 52 'directive-args': [
51 (identifier, Name.Constant), 53 (identifier, Name.Constant),
52 (string, String), 54 (string, String),
53 ('@' + identifier, Name.Attribute), 55 ('@' + identifier, Name.Attribute),
54 (number, Number.Integer), 56 (number, Number.Integer),
57 (register, Name.Variable),
55 (r'[\r\n]+', Text, '#pop'), 58 (r'[\r\n]+', Text, '#pop'),
56 (r'[;#].*?\n', Comment, '#pop'), 59 (r'([;#]|//).*?\n', Comment.Single, '#pop'),
60 (r'/[*].*?[*]/', Comment.Multiline),
61 (r'/[*].*?\n[\w\W]*?[*]/', Comment.Multiline, '#pop'),
57 62
58 include('punctuation'), 63 include('punctuation'),
59 include('whitespace') 64 include('whitespace')
60 ], 65 ],
61 'instruction-args': [ 66 'instruction-args': [
70 75
71 # Address constants 76 # Address constants
72 (identifier, Name.Constant), 77 (identifier, Name.Constant),
73 (number, Number.Integer), 78 (number, Number.Integer),
74 # Registers 79 # Registers
75 ('%' + identifier, Name.Variable), 80 (register, Name.Variable),
76 # Numeric constants 81 # Numeric constants
77 ('$'+number, Number.Integer), 82 ('$'+number, Number.Integer),
78 (r"$'(.|\\')'", String.Char), 83 (r"$'(.|\\')'", String.Char),
79 (r'[\r\n]+', Text, '#pop'), 84 (r'[\r\n]+', Text, '#pop'),
80 (r'[;#].*?\n', Comment, '#pop'), 85 (r'([;#]|//).*?\n', Comment.Single, '#pop'),
86 (r'/[*].*?[*]/', Comment.Multiline),
87 (r'/[*].*?\n[\w\W]*?[*]/', Comment.Multiline, '#pop'),
81 88
82 include('punctuation'), 89 include('punctuation'),
83 include('whitespace') 90 include('whitespace')
84 ], 91 ],
85 'whitespace': [ 92 'whitespace': [
86 (r'\n', Text), 93 (r'\n', Text),
87 (r'\s+', Text), 94 (r'\s+', Text),
88 (r'[;#].*?\n', Comment) 95 (r'([;#]|//).*?\n', Comment.Single),
96 (r'/[*][\w\W]*?[*]/', Comment.Multiline)
89 ], 97 ],
90 'punctuation': [ 98 'punctuation': [
91 (r'[-*,.()\[\]!:]+', Punctuation) 99 (r'[-*,.()\[\]!:]+', Punctuation)
92 ] 100 ]
93 } 101 }
94 102
95 def analyse_text(text): 103 def analyse_text(text):
96 if re.match(r'^\.(text|data|section)', text, re.M): 104 if re.search(r'^\.(text|data|section)', text, re.M):
97 return True 105 return True
98 elif re.match(r'^\.\w+', text, re.M): 106 elif re.search(r'^\.\w+', text, re.M):
99 return 0.1 107 return 0.1
100 108
101 109
102 def _objdump_lexer_tokens(asm_lexer): 110 def _objdump_lexer_tokens(asm_lexer):
103 """ 111 """
375 (r';.*?\n', Comment) 383 (r';.*?\n', Comment)
376 ], 384 ],
377 'keyword': [ 385 'keyword': [
378 # Regular keywords 386 # Regular keywords
379 (words(( 387 (words((
380 'begin', 'end', 'true', 'false', 'declare', 'define', 'global', 388 'acq_rel', 'acquire', 'add', 'addrspace', 'addrspacecast', 'afn', 'alias',
381 'constant', 'private', 'linker_private', 'internal', 389 'aliasee', 'align', 'alignLog2', 'alignstack', 'alloca', 'allocsize', 'allOnes',
382 'available_externally', 'linkonce', 'linkonce_odr', 'weak', 390 'alwaysinline', 'amdgpu_cs', 'amdgpu_es', 'amdgpu_gs', 'amdgpu_hs',
383 'weak_odr', 'appending', 'dllimport', 'dllexport', 'common', 391 'amdgpu_kernel', 'amdgpu_ls', 'amdgpu_ps', 'amdgpu_vs', 'and', 'any',
384 'default', 'hidden', 'protected', 'extern_weak', 'external', 392 'anyregcc', 'appending', 'arcp', 'argmemonly', 'args', 'arm_aapcs_vfpcc',
385 'thread_local', 'zeroinitializer', 'undef', 'null', 'to', 'tail', 393 'arm_aapcscc', 'arm_apcscc', 'ashr', 'asm', 'atomic', 'atomicrmw', 'attributes',
386 'target', 'triple', 'datalayout', 'volatile', 'nuw', 'nsw', 'nnan', 394 'available_externally', 'avr_intrcc', 'avr_signalcc', 'bit', 'bitcast',
387 'ninf', 'nsz', 'arcp', 'fast', 'exact', 'inbounds', 'align', 395 'bitMask', 'blockaddress', 'br', 'branchFunnel', 'builtin', 'byArg', 'byte',
388 'addrspace', 'section', 'alias', 'module', 'asm', 'sideeffect', 396 'byteArray', 'byval', 'c', 'call', 'callee', 'caller', 'calls', 'catch',
389 'gc', 'dbg', 'linker_private_weak', 'attributes', 'blockaddress', 397 'catchpad', 'catchret', 'catchswitch', 'cc', 'ccc', 'cleanup', 'cleanuppad',
390 'initialexec', 'localdynamic', 'localexec', 'prefix', 'unnamed_addr', 398 'cleanupret', 'cmpxchg', 'cold', 'coldcc', 'comdat', 'common', 'constant',
391 'ccc', 'fastcc', 'coldcc', 'x86_stdcallcc', 'x86_fastcallcc', 399 'contract', 'convergent', 'critical', 'cxx_fast_tlscc', 'datalayout', 'declare',
392 'arm_apcscc', 'arm_aapcscc', 'arm_aapcs_vfpcc', 'ptx_device', 400 'default', 'define', 'deplibs', 'dereferenceable', 'dereferenceable_or_null',
393 'ptx_kernel', 'intel_ocl_bicc', 'msp430_intrcc', 'spir_func', 401 'distinct', 'dllexport', 'dllimport', 'double', 'dso_local', 'dso_preemptable',
394 'spir_kernel', 'x86_64_sysvcc', 'x86_64_win64cc', 'x86_thiscallcc', 402 'dsoLocal', 'eq', 'exact', 'exactmatch', 'extern_weak', 'external',
395 'cc', 'c', 'signext', 'zeroext', 'inreg', 'sret', 'nounwind', 403 'externally_initialized', 'extractelement', 'extractvalue', 'fadd', 'false',
396 'noreturn', 'noalias', 'nocapture', 'byval', 'nest', 'readnone', 404 'fast', 'fastcc', 'fcmp', 'fdiv', 'fence', 'filter', 'flags', 'float', 'fmul',
397 'readonly', 'inlinehint', 'noinline', 'alwaysinline', 'optsize', 'ssp', 405 'fp128', 'fpext', 'fptosi', 'fptoui', 'fptrunc', 'frem', 'from', 'fsub',
398 'sspreq', 'noredzone', 'noimplicitfloat', 'naked', 'builtin', 'cold', 406 'funcFlags', 'function', 'gc', 'getelementptr', 'ghccc', 'global', 'guid', 'gv',
399 'nobuiltin', 'noduplicate', 'nonlazybind', 'optnone', 'returns_twice', 407 'half', 'hash', 'hhvm_ccc', 'hhvmcc', 'hidden', 'hot', 'hotness', 'icmp',
400 'sanitize_address', 'sanitize_memory', 'sanitize_thread', 'sspstrong', 408 'ifunc', 'inaccessiblemem_or_argmemonly', 'inaccessiblememonly', 'inalloca',
401 'uwtable', 'returned', 'type', 'opaque', 'eq', 'ne', 'slt', 'sgt', 409 'inbounds', 'indir', 'indirectbr', 'info', 'initialexec', 'inline',
402 'sle', 'sge', 'ult', 'ugt', 'ule', 'uge', 'oeq', 'one', 'olt', 'ogt', 410 'inlineBits', 'inlinehint', 'inrange', 'inreg', 'insertelement', 'insertvalue',
403 'ole', 'oge', 'ord', 'uno', 'ueq', 'une', 'x', 'acq_rel', 'acquire', 411 'insts', 'intel_ocl_bicc', 'inteldialect', 'internal', 'inttoptr', 'invoke',
404 'alignstack', 'atomic', 'catch', 'cleanup', 'filter', 'inteldialect', 412 'jumptable', 'kind', 'label', 'landingpad', 'largest', 'linkage', 'linkonce',
405 'max', 'min', 'monotonic', 'nand', 'personality', 'release', 'seq_cst', 413 'linkonce_odr', 'live', 'load', 'local_unnamed_addr', 'localdynamic',
406 'singlethread', 'umax', 'umin', 'unordered', 'xchg', 'add', 'fadd', 414 'localexec', 'lshr', 'max', 'metadata', 'min', 'minsize', 'module', 'monotonic',
407 'sub', 'fsub', 'mul', 'fmul', 'udiv', 'sdiv', 'fdiv', 'urem', 'srem', 415 'msp430_intrcc', 'mul', 'musttail', 'naked', 'name', 'nand', 'ne', 'nest',
408 'frem', 'shl', 'lshr', 'ashr', 'and', 'or', 'xor', 'icmp', 'fcmp', 416 'ninf', 'nnan', 'noalias', 'nobuiltin', 'nocapture', 'nocf_check',
409 'phi', 'call', 'trunc', 'zext', 'sext', 'fptrunc', 'fpext', 'uitofp', 417 'noduplicate', 'noduplicates', 'noimplicitfloat', 'noinline', 'none',
410 'sitofp', 'fptoui', 'fptosi', 'inttoptr', 'ptrtoint', 'bitcast', 418 'nonlazybind', 'nonnull', 'norecurse', 'noRecurse', 'noredzone', 'noreturn',
411 'addrspacecast', 'select', 'va_arg', 'ret', 'br', 'switch', 'invoke', 419 'notail', 'notEligibleToImport', 'nounwind', 'nsw', 'nsz', 'null', 'nuw', 'oeq',
412 'unwind', 'unreachable', 'indirectbr', 'landingpad', 'resume', 420 'offset', 'oge', 'ogt', 'ole', 'olt', 'one', 'opaque', 'optforfuzzing',
413 'malloc', 'alloca', 'free', 'load', 'store', 'getelementptr', 421 'optnone', 'optsize', 'or', 'ord', 'path', 'personality', 'phi', 'ppc_fp128',
414 'extractelement', 'insertelement', 'shufflevector', 'getresult', 422 'prefix', 'preserve_allcc', 'preserve_mostcc', 'private', 'prologue',
415 'extractvalue', 'insertvalue', 'atomicrmw', 'cmpxchg', 'fence', 423 'protected', 'ptrtoint', 'ptx_device', 'ptx_kernel', 'readnone', 'readNone',
416 'allocsize', 'amdgpu_cs', 'amdgpu_gs', 'amdgpu_kernel', 'amdgpu_ps', 424 'readonly', 'readOnly', 'reassoc', 'refs', 'relbf', 'release', 'resByArg',
417 'amdgpu_vs', 'any', 'anyregcc', 'argmemonly', 'avr_intrcc', 425 'resume', 'ret', 'returnDoesNotAlias', 'returned', 'returns_twice', 'safestack',
418 'avr_signalcc', 'caller', 'catchpad', 'catchret', 'catchswitch', 426 'samesize', 'sanitize_address', 'sanitize_hwaddress', 'sanitize_memory',
419 'cleanuppad', 'cleanupret', 'comdat', 'convergent', 'cxx_fast_tlscc', 427 'sanitize_thread', 'sdiv', 'section', 'select', 'seq_cst', 'sext', 'sge', 'sgt',
420 'deplibs', 'dereferenceable', 'dereferenceable_or_null', 'distinct', 428 'shadowcallstack', 'shl', 'shufflevector', 'sideeffect', 'signext', 'single',
421 'exactmatch', 'externally_initialized', 'from', 'ghccc', 'hhvm_ccc', 429 'singleImpl', 'singleImplName', 'sitofp', 'sizeM1', 'sizeM1BitWidth', 'sle',
422 'hhvmcc', 'ifunc', 'inaccessiblemem_or_argmemonly', 'inaccessiblememonly', 430 'slt', 'source_filename', 'speculatable', 'spir_func', 'spir_kernel', 'srem',
423 'inalloca', 'jumptable', 'largest', 'local_unnamed_addr', 'minsize', 431 'sret', 'ssp', 'sspreq', 'sspstrong', 'store', 'strictfp', 'sub', 'summaries',
424 'musttail', 'noduplicates', 'none', 'nonnull', 'norecurse', 'notail', 432 'summary', 'swiftcc', 'swifterror', 'swiftself', 'switch', 'syncscope', 'tail',
425 'preserve_allcc', 'preserve_mostcc', 'prologue', 'safestack', 'samesize', 433 'target', 'thread_local', 'to', 'token', 'triple', 'true', 'trunc', 'type',
426 'source_filename', 'swiftcc', 'swifterror', 'swiftself', 'webkit_jscc', 434 'typeCheckedLoadConstVCalls', 'typeCheckedLoadVCalls', 'typeid', 'typeIdInfo',
427 'within', 'writeonly', 'x86_intrcc', 'x86_vectorcallcc'), 435 'typeTestAssumeConstVCalls', 'typeTestAssumeVCalls', 'typeTestRes', 'typeTests',
436 'udiv', 'ueq', 'uge', 'ugt', 'uitofp', 'ule', 'ult', 'umax', 'umin', 'undef',
437 'une', 'uniformRetVal', 'uniqueRetVal', 'unknown', 'unnamed_addr', 'uno',
438 'unordered', 'unreachable', 'unsat', 'unwind', 'urem', 'uselistorder',
439 'uselistorder_bb', 'uwtable', 'va_arg', 'variable', 'vFuncId',
440 'virtualConstProp', 'void', 'volatile', 'weak', 'weak_odr', 'webkit_jscc',
441 'win64cc', 'within', 'wpdRes', 'wpdResolutions', 'writeonly', 'x',
442 'x86_64_sysvcc', 'x86_fastcallcc', 'x86_fp80', 'x86_intrcc', 'x86_mmx',
443 'x86_regcallcc', 'x86_stdcallcc', 'x86_thiscallcc', 'x86_vectorcallcc', 'xchg',
444 'xor', 'zeroext', 'zeroinitializer', 'zext', 'immarg', 'willreturn'),
428 suffix=r'\b'), Keyword), 445 suffix=r'\b'), Keyword),
429 446
430 # Types 447 # Types
431 (words(('void', 'half', 'float', 'double', 'x86_fp80', 'fp128', 448 (words(('void', 'half', 'float', 'double', 'x86_fp80', 'fp128',
432 'ppc_fp128', 'label', 'metadata', 'token')), Keyword.Type), 449 'ppc_fp128', 'label', 'metadata', 'token')), Keyword.Type),
434 # Integer types 451 # Integer types
435 (r'i[1-9]\d*', Keyword) 452 (r'i[1-9]\d*', Keyword)
436 ] 453 ]
437 } 454 }
438 455
456 class LlvmMirBodyLexer(RegexLexer):
457 """
458 For LLVM MIR examples without the YAML wrapper
459
460 For more information on LLVM MIR see https://llvm.org/docs/MIRLangRef.html.
461
462 .. versionadded:: 2.6
463 """
464 name = 'LLVM-MIR Body'
465 aliases = ['llvm-mir-body']
466 filenames = []
467 mimetypes = []
468
469 tokens = {
470 'root': [
471 # Attributes on basic blocks
472 (words(('liveins', 'successors'), suffix=':'), Keyword),
473 # Basic Block Labels
474 (r'bb\.[0-9]+(\.[0-9a-zA-Z_.-]+)?( \(address-taken\))?:', Name.Label),
475 (r'bb\.[0-9]+ \(%[0-9a-zA-Z_.-]+\)( \(address-taken\))?:', Name.Label),
476 (r'%bb\.[0-9]+(\.\w+)?', Name.Label),
477 # Stack references
478 (r'%stack\.[0-9]+(\.\w+\.addr)?', Name),
479 # Subreg indices
480 (r'%subreg\.\w+', Name),
481 # Virtual registers
482 (r'%[0-9a-zA-Z_]+ *', Name.Variable, 'vreg'),
483 # Reference to LLVM-IR global
484 include('global'),
485 # Reference to Intrinsic
486 (r'intrinsic\(\@[0-9a-zA-Z_.]+\)', Name.Variable.Global),
487 # Comparison predicates
488 (words(('eq', 'ne', 'sgt', 'sge', 'slt', 'sle', 'ugt', 'uge', 'ult',
489 'ule'), prefix=r'intpred\(', suffix=r'\)'), Name.Builtin),
490 (words(('oeq', 'one', 'ogt', 'oge', 'olt', 'ole', 'ugt', 'uge',
491 'ult', 'ule'), prefix=r'floatpred\(', suffix=r'\)'),
492 Name.Builtin),
493 # Physical registers
494 (r'\$\w+', String.Single),
495 # Assignment operator
496 (r'[=]', Operator),
497 # gMIR Opcodes
498 (r'(G_ANYEXT|G_[SZ]EXT|G_SEXT_INREG|G_TRUNC|G_IMPLICIT_DEF|G_PHI|'
499 r'G_FRAME_INDEX|G_GLOBAL_VALUE|G_INTTOPTR|G_PTRTOINT|G_BITCAST|'
500 r'G_CONSTANT|G_FCONSTANT|G_VASTART|G_VAARG|G_CTLZ|G_CTLZ_ZERO_UNDEF|'
501 r'G_CTTZ|G_CTTZ_ZERO_UNDEF|G_CTPOP|G_BSWAP|G_BITREVERSE|'
502 r'G_ADDRSPACE_CAST|G_BLOCK_ADDR|G_JUMP_TABLE|G_DYN_STACKALLOC|'
503 r'G_ADD|G_SUB|G_MUL|G_[SU]DIV|G_[SU]REM|G_AND|G_OR|G_XOR|G_SHL|'
504 r'G_[LA]SHR|G_[IF]CMP|G_SELECT|G_GEP|G_PTR_MASK|G_SMIN|G_SMAX|'
505 r'G_UMIN|G_UMAX|G_[US]ADDO|G_[US]ADDE|G_[US]SUBO|G_[US]SUBE|'
506 r'G_[US]MULO|G_[US]MULH|G_FNEG|G_FPEXT|G_FPTRUNC|G_FPTO[US]I|'
507 r'G_[US]ITOFP|G_FABS|G_FCOPYSIGN|G_FCANONICALIZE|G_FMINNUM|'
508 r'G_FMAXNUM|G_FMINNUM_IEEE|G_FMAXNUM_IEEE|G_FMINIMUM|G_FMAXIMUM|'
509 r'G_FADD|G_FSUB|G_FMUL|G_FMA|G_FMAD|G_FDIV|G_FREM|G_FPOW|G_FEXP|'
510 r'G_FEXP2|G_FLOG|G_FLOG2|G_FLOG10|G_FCEIL|G_FCOS|G_FSIN|G_FSQRT|'
511 r'G_FFLOOR|G_FRINT|G_FNEARBYINT|G_INTRINSIC_TRUNC|'
512 r'G_INTRINSIC_ROUND|G_LOAD|G_[ZS]EXTLOAD|G_INDEXED_LOAD|'
513 r'G_INDEXED_[ZS]EXTLOAD|G_STORE|G_INDEXED_STORE|'
514 r'G_ATOMIC_CMPXCHG_WITH_SUCCESS|G_ATOMIC_CMPXCHG|'
515 r'G_ATOMICRMW_(XCHG|ADD|SUB|AND|NAND|OR|XOR|MAX|MIN|UMAX|UMIN|FADD|'
516 r'FSUB)'
517 r'|G_FENCE|G_EXTRACT|G_UNMERGE_VALUES|G_INSERT|G_MERGE_VALUES|'
518 r'G_BUILD_VECTOR|G_BUILD_VECTOR_TRUNC|G_CONCAT_VECTORS|'
519 r'G_INTRINSIC|G_INTRINSIC_W_SIDE_EFFECTS|G_BR|G_BRCOND|'
520 r'G_BRINDIRECT|G_BRJT|G_INSERT_VECTOR_ELT|G_EXTRACT_VECTOR_ELT|'
521 r'G_SHUFFLE_VECTOR)\b',
522 Name.Builtin),
523 # Target independent opcodes
524 (r'(COPY|PHI|INSERT_SUBREG|EXTRACT_SUBREG|REG_SEQUENCE)\b',
525 Name.Builtin),
526 # Flags
527 (words(('killed', 'implicit')), Keyword),
528 # ConstantInt values
529 (r'[i][0-9]+ +', Keyword.Type, 'constantint'),
530 # ConstantFloat values
531 (r'(half|float|double) +', Keyword.Type, 'constantfloat'),
532 # Bare immediates
533 include('integer'),
534 # MMO's
535 (r':: *', Operator, 'mmo'),
536 # MIR Comments
537 (r';.*', Comment),
538 # If we get here, assume it's a target instruction
539 (r'[0-9a-zA-Z_]+', Name),
540 # Everything else that isn't highlighted
541 (r'[(), \n]+', Text),
542 ],
543 # The integer constant from a ConstantInt value
544 'constantint': [
545 include('integer'),
546 (r'(?=.)', Text, '#pop'),
547 ],
548 # The floating point constant from a ConstantFloat value
549 'constantfloat': [
550 include('float'),
551 (r'(?=.)', Text, '#pop'),
552 ],
553 'vreg': [
554 # The bank or class if there is one
555 (r' *:(?!:)', Keyword, ('#pop', 'vreg_bank_or_class')),
556 # The LLT if there is one
557 (r' *\(', Text, 'vreg_type'),
558 (r'(?=.)', Text, '#pop'),
559 ],
560 'vreg_bank_or_class': [
561 # The unassigned bank/class
562 (r' *_', Name.Variable.Magic),
563 (r' *[0-9a-zA-Z_]+', Name.Variable),
564 # The LLT if there is one
565 (r' *\(', Text, 'vreg_type'),
566 (r'(?=.)', Text, '#pop'),
567 ],
568 'vreg_type': [
569 # Scalar and pointer types
570 (r' *[sp][0-9]+', Keyword.Type),
571 (r' *<[0-9]+ *x *[sp][0-9]+>', Keyword.Type),
572 (r'\)', Text, '#pop'),
573 (r'(?=.)', Text, '#pop'),
574 ],
575 'mmo': [
576 (r'\(', Text),
577 (r' +', Text),
578 (words(('load', 'store', 'on', 'into', 'from', 'align', 'monotonic',
579 'acquire', 'release', 'acq_rel', 'seq_cst')),
580 Keyword),
581 # IR references
582 (r'%ir\.[0-9a-zA-Z_.-]+', Name),
583 (r'%ir-block\.[0-9a-zA-Z_.-]+', Name),
584 (r'[-+]', Operator),
585 include('integer'),
586 include('global'),
587 (r',', Punctuation),
588 (r'\), \(', Text),
589 (r'\)', Text, '#pop'),
590 ],
591 'integer': [(r'-?[0-9]+', Number.Integer),],
592 'float': [(r'-?[0-9]+\.[0-9]+(e[+-][0-9]+)?', Number.Float)],
593 'global': [(r'\@[0-9a-zA-Z_.]+', Name.Variable.Global)],
594 }
595
596 class LlvmMirLexer(RegexLexer):
597 """
598 Lexer for the overall LLVM MIR document format
599
600 MIR is a human readable serialization format that's used to represent LLVM's
601 machine specific intermediate representation. It allows LLVM's developers to
602 see the state of the compilation process at various points, as well as test
603 individual pieces of the compiler.
604
605 For more information on LLVM MIR see https://llvm.org/docs/MIRLangRef.html.
606
607 .. versionadded:: 2.6
608 """
609 name = 'LLVM-MIR'
610 aliases = ['llvm-mir']
611 filenames = ['*.mir']
612
613 tokens = {
614 'root': [
615 # Comments are hashes at the YAML level
616 (r'#.*', Comment),
617 # Documents starting with | are LLVM-IR
618 (r'--- \|$', Keyword, 'llvm_ir'),
619 # Other documents are MIR
620 (r'---', Keyword, 'llvm_mir'),
621 # Consume everything else in one token for efficiency
622 (r'[^-#]+|.', Text),
623 ],
624 'llvm_ir': [
625 # Documents end with '...' or '---'
626 (r'(\.\.\.|(?=---))', Keyword, '#pop'),
627 # Delegate to the LlvmLexer
628 (r'((?:.|\n)+?)(?=(\.\.\.|---))', bygroups(using(LlvmLexer))),
629 ],
630 'llvm_mir': [
631 # Comments are hashes at the YAML level
632 (r'#.*', Comment),
633 # Documents end with '...' or '---'
634 (r'(\.\.\.|(?=---))', Keyword, '#pop'),
635 # Handle the simple attributes
636 (r'name:', Keyword, 'name'),
637 (words(('alignment', ),
638 suffix=':'), Keyword, 'number'),
639 (words(('legalized', 'regBankSelected', 'tracksRegLiveness',
640 'selected', 'exposesReturnsTwice'),
641 suffix=':'), Keyword, 'boolean'),
642 # Handle the attributes don't highlight inside
643 (words(('registers', 'stack', 'fixedStack', 'liveins', 'frameInfo',
644 'machineFunctionInfo'),
645 suffix=':'), Keyword),
646 # Delegate the body block to the LlvmMirBodyLexer
647 (r'body: *\|', Keyword, 'llvm_mir_body'),
648 # Consume everything else
649 (r'.+', Text),
650 (r'\n', Text),
651 ],
652 'name': [ (r'[^\n]+', Name), default('#pop') ],
653 'boolean': [ (r' *(true|false)', Name.Builtin), default('#pop') ],
654 'number': [ (r' *[0-9]+', Number), default('#pop') ],
655 'llvm_mir_body': [
656 # Documents end with '...' or '---'.
657 # We have to pop llvm_mir_body and llvm_mir
658 (r'(\.\.\.|(?=---))', Keyword, '#pop:2'),
659 # Delegate the body block to the LlvmMirBodyLexer
660 (r'((?:.|\n)+?)(?=\.\.\.|---)', bygroups(using(LlvmMirBodyLexer))),
661 # The '...' is optional. If we didn't already find it then it isn't
662 # there. There might be a '---' instead though.
663 (r'(?!\.\.\.|---)((.|\n)+)', bygroups(using(LlvmMirBodyLexer), Keyword)),
664 ],
665 }
666
439 667
440 class NasmLexer(RegexLexer): 668 class NasmLexer(RegexLexer):
441 """ 669 """
442 For Nasm (Intel) assembly code. 670 For Nasm (Intel) assembly code.
443 """ 671 """
444 name = 'NASM' 672 name = 'NASM'
445 aliases = ['nasm'] 673 aliases = ['nasm']
446 filenames = ['*.asm', '*.ASM'] 674 filenames = ['*.asm', '*.ASM']
447 mimetypes = ['text/x-nasm'] 675 mimetypes = ['text/x-nasm']
676
677 # Tasm uses the same file endings, but TASM is not as common as NASM, so
678 # we prioritize NASM higher by default
679 priority = 1.0
448 680
449 identifier = r'[a-z$._?][\w$.?#@~]*' 681 identifier = r'[a-z$._?][\w$.?#@~]*'
450 hexn = r'(?:0x[0-9a-f]+|$0[0-9a-f]*|[0-9]+[0-9a-f]*h)' 682 hexn = r'(?:0x[0-9a-f]+|$0[0-9a-f]*|[0-9]+[0-9a-f]*h)'
451 octn = r'[0-7]+q' 683 octn = r'[0-7]+q'
452 binn = r'[01]+b' 684 binn = r'[01]+b'
457 register = (r'r[0-9][0-5]?[bwd]|' 689 register = (r'r[0-9][0-5]?[bwd]|'
458 r'[a-d][lh]|[er]?[a-d]x|[er]?[sb]p|[er]?[sd]i|[c-gs]s|st[0-7]|' 690 r'[a-d][lh]|[er]?[a-d]x|[er]?[sb]p|[er]?[sd]i|[c-gs]s|st[0-7]|'
459 r'mm[0-7]|cr[0-4]|dr[0-367]|tr[3-7]') 691 r'mm[0-7]|cr[0-4]|dr[0-367]|tr[3-7]')
460 wordop = r'seg|wrt|strict' 692 wordop = r'seg|wrt|strict'
461 type = r'byte|[dq]?word' 693 type = r'byte|[dq]?word'
462 directives = (r'BITS|USE16|USE32|SECTION|SEGMENT|ABSOLUTE|EXTERN|GLOBAL|' 694 # Directives must be followed by whitespace, otherwise CPU will match
695 # cpuid for instance.
696 directives = (r'(?:BITS|USE16|USE32|SECTION|SEGMENT|ABSOLUTE|EXTERN|GLOBAL|'
463 r'ORG|ALIGN|STRUC|ENDSTRUC|COMMON|CPU|GROUP|UPPERCASE|IMPORT|' 697 r'ORG|ALIGN|STRUC|ENDSTRUC|COMMON|CPU|GROUP|UPPERCASE|IMPORT|'
464 r'EXPORT|LIBRARY|MODULE') 698 r'EXPORT|LIBRARY|MODULE)\s+')
465 699
466 flags = re.IGNORECASE | re.MULTILINE 700 flags = re.IGNORECASE | re.MULTILINE
467 tokens = { 701 tokens = {
468 'root': [ 702 'root': [
469 (r'^\s*%', Comment.Preproc, 'preproc'), 703 (r'^\s*%', Comment.Preproc, 'preproc'),
506 (r'[$]+', Keyword.Constant), 740 (r'[$]+', Keyword.Constant),
507 (wordop, Operator.Word), 741 (wordop, Operator.Word),
508 (type, Keyword.Type) 742 (type, Keyword.Type)
509 ], 743 ],
510 } 744 }
745
746 def analyse_text(text):
747 # Probably TASM
748 if re.match(r'PROC', text, re.IGNORECASE):
749 return False
511 750
512 751
513 class NasmObjdumpLexer(ObjdumpLexer): 752 class NasmObjdumpLexer(ObjdumpLexer):
514 """ 753 """
515 For the output of 'objdump -d -M intel'. 754 For the output of 'objdump -d -M intel'.
601 (wordop, Operator.Word), 840 (wordop, Operator.Word),
602 (type, Keyword.Type) 841 (type, Keyword.Type)
603 ], 842 ],
604 } 843 }
605 844
845 def analyse_text(text):
846 # See above
847 if re.match(r'PROC', text, re.I):
848 return True
849
606 850
607 class Ca65Lexer(RegexLexer): 851 class Ca65Lexer(RegexLexer):
608 """ 852 """
609 For ca65 assembler sources. 853 For ca65 assembler sources.
610 854
637 ] 881 ]
638 } 882 }
639 883
640 def analyse_text(self, text): 884 def analyse_text(self, text):
641 # comments in GAS start with "#" 885 # comments in GAS start with "#"
642 if re.match(r'^\s*;', text, re.MULTILINE): 886 if re.search(r'^\s*;', text, re.MULTILINE):
643 return 0.9 887 return 0.9
888
889
890 class Dasm16Lexer(RegexLexer):
891 """
892 Simple lexer for DCPU-16 Assembly
893
894 Check http://0x10c.com/doc/dcpu-16.txt
895
896 .. versionadded:: 2.4
897 """
898 name = 'DASM16'
899 aliases = ['dasm16']
900 filenames = ['*.dasm16', '*.dasm']
901 mimetypes = ['text/x-dasm16']
902
903 INSTRUCTIONS = [
904 'SET',
905 'ADD', 'SUB',
906 'MUL', 'MLI',
907 'DIV', 'DVI',
908 'MOD', 'MDI',
909 'AND', 'BOR', 'XOR',
910 'SHR', 'ASR', 'SHL',
911 'IFB', 'IFC', 'IFE', 'IFN', 'IFG', 'IFA', 'IFL', 'IFU',
912 'ADX', 'SBX',
913 'STI', 'STD',
914 'JSR',
915 'INT', 'IAG', 'IAS', 'RFI', 'IAQ', 'HWN', 'HWQ', 'HWI',
916 ]
917
918 REGISTERS = [
919 'A', 'B', 'C',
920 'X', 'Y', 'Z',
921 'I', 'J',
922 'SP', 'PC', 'EX',
923 'POP', 'PEEK', 'PUSH'
924 ]
925
926 # Regexes yo
927 char = r'[a-zA-Z$._0-9@]'
928 identifier = r'(?:[a-zA-Z$_]' + char + r'*|\.' + char + '+)'
929 number = r'[+-]?(?:0[xX][a-zA-Z0-9]+|\d+)'
930 binary_number = r'0b[01_]+'
931 instruction = r'(?i)(' + '|'.join(INSTRUCTIONS) + ')'
932 single_char = r"'\\?" + char + "'"
933 string = r'"(\\"|[^"])*"'
934
935 def guess_identifier(lexer, match):
936 ident = match.group(0)
937 klass = Name.Variable if ident.upper() in lexer.REGISTERS else Name.Label
938 yield match.start(), klass, ident
939
940 tokens = {
941 'root': [
942 include('whitespace'),
943 (':' + identifier, Name.Label),
944 (identifier + ':', Name.Label),
945 (instruction, Name.Function, 'instruction-args'),
946 (r'\.' + identifier, Name.Function, 'data-args'),
947 (r'[\r\n]+', Text)
948 ],
949
950 'numeric' : [
951 (binary_number, Number.Integer),
952 (number, Number.Integer),
953 (single_char, String),
954 ],
955
956 'arg' : [
957 (identifier, guess_identifier),
958 include('numeric')
959 ],
960
961 'deref' : [
962 (r'\+', Punctuation),
963 (r'\]', Punctuation, '#pop'),
964 include('arg'),
965 include('whitespace')
966 ],
967
968 'instruction-line' : [
969 (r'[\r\n]+', Text, '#pop'),
970 (r';.*?$', Comment, '#pop'),
971 include('whitespace')
972 ],
973
974 'instruction-args': [
975 (r',', Punctuation),
976 (r'\[', Punctuation, 'deref'),
977 include('arg'),
978 include('instruction-line')
979 ],
980
981 'data-args' : [
982 (r',', Punctuation),
983 include('numeric'),
984 (string, String),
985 include('instruction-line')
986 ],
987
988 'whitespace': [
989 (r'\n', Text),
990 (r'\s+', Text),
991 (r';.*?\n', Comment)
992 ],
993 }

eric ide

mercurial