eric6/ThirdParty/Pygments/pygments/lexers/asm.py

changeset 6942
2602857055c5
parent 6651
e8f3b5568b21
child 7547
21b0534faebc
equal deleted inserted replaced
6941:f99d60d6b59b 6942:2602857055c5
1 # -*- coding: utf-8 -*-
2 """
3 pygments.lexers.asm
4 ~~~~~~~~~~~~~~~~~~~
5
6 Lexers for assembly languages.
7
8 :copyright: Copyright 2006-2017 by the Pygments team, see AUTHORS.
9 :license: BSD, see LICENSE for details.
10 """
11
12 import re
13
14 from pygments.lexer import RegexLexer, include, bygroups, using, words, \
15 DelegatingLexer
16 from pygments.lexers.c_cpp import CppLexer, CLexer
17 from pygments.lexers.d import DLexer
18 from pygments.token import Text, Name, Number, String, Comment, Punctuation, \
19 Other, Keyword, Operator
20
21 __all__ = ['GasLexer', 'ObjdumpLexer', 'DObjdumpLexer', 'CppObjdumpLexer',
22 'CObjdumpLexer', 'HsailLexer', 'LlvmLexer', 'NasmLexer',
23 'NasmObjdumpLexer', 'TasmLexer', 'Ca65Lexer']
24
25
26 class GasLexer(RegexLexer):
27 """
28 For Gas (AT&T) assembly code.
29 """
30 name = 'GAS'
31 aliases = ['gas', 'asm']
32 filenames = ['*.s', '*.S']
33 mimetypes = ['text/x-gas']
34
35 #: optional Comment or Whitespace
36 string = r'"(\\"|[^"])*"'
37 char = r'[\w$.@-]'
38 identifier = r'(?:[a-zA-Z$_]' + char + r'*|\.' + char + '+)'
39 number = r'(?:0[xX][a-zA-Z0-9]+|\d+)'
40
41 tokens = {
42 'root': [
43 include('whitespace'),
44 (identifier + ':', Name.Label),
45 (r'\.' + identifier, Name.Attribute, 'directive-args'),
46 (r'lock|rep(n?z)?|data\d+', Name.Attribute),
47 (identifier, Name.Function, 'instruction-args'),
48 (r'[\r\n]+', Text)
49 ],
50 'directive-args': [
51 (identifier, Name.Constant),
52 (string, String),
53 ('@' + identifier, Name.Attribute),
54 (number, Number.Integer),
55 (r'[\r\n]+', Text, '#pop'),
56 (r'[;#].*?\n', Comment, '#pop'),
57
58 include('punctuation'),
59 include('whitespace')
60 ],
61 'instruction-args': [
62 # For objdump-disassembled code, shouldn't occur in
63 # actual assembler input
64 ('([a-z0-9]+)( )(<)('+identifier+')(>)',
65 bygroups(Number.Hex, Text, Punctuation, Name.Constant,
66 Punctuation)),
67 ('([a-z0-9]+)( )(<)('+identifier+')([-+])('+number+')(>)',
68 bygroups(Number.Hex, Text, Punctuation, Name.Constant,
69 Punctuation, Number.Integer, Punctuation)),
70
71 # Address constants
72 (identifier, Name.Constant),
73 (number, Number.Integer),
74 # Registers
75 ('%' + identifier, Name.Variable),
76 # Numeric constants
77 ('$'+number, Number.Integer),
78 (r"$'(.|\\')'", String.Char),
79 (r'[\r\n]+', Text, '#pop'),
80 (r'[;#].*?\n', Comment, '#pop'),
81
82 include('punctuation'),
83 include('whitespace')
84 ],
85 'whitespace': [
86 (r'\n', Text),
87 (r'\s+', Text),
88 (r'[;#].*?\n', Comment)
89 ],
90 'punctuation': [
91 (r'[-*,.()\[\]!:]+', Punctuation)
92 ]
93 }
94
95 def analyse_text(text):
96 if re.match(r'^\.(text|data|section)', text, re.M):
97 return True
98 elif re.match(r'^\.\w+', text, re.M):
99 return 0.1
100
101
102 def _objdump_lexer_tokens(asm_lexer):
103 """
104 Common objdump lexer tokens to wrap an ASM lexer.
105 """
106 hex_re = r'[0-9A-Za-z]'
107 return {
108 'root': [
109 # File name & format:
110 ('(.*?)(:)( +file format )(.*?)$',
111 bygroups(Name.Label, Punctuation, Text, String)),
112 # Section header
113 ('(Disassembly of section )(.*?)(:)$',
114 bygroups(Text, Name.Label, Punctuation)),
115 # Function labels
116 # (With offset)
117 ('('+hex_re+'+)( )(<)(.*?)([-+])(0[xX][A-Za-z0-9]+)(>:)$',
118 bygroups(Number.Hex, Text, Punctuation, Name.Function,
119 Punctuation, Number.Hex, Punctuation)),
120 # (Without offset)
121 ('('+hex_re+'+)( )(<)(.*?)(>:)$',
122 bygroups(Number.Hex, Text, Punctuation, Name.Function,
123 Punctuation)),
124 # Code line with disassembled instructions
125 ('( *)('+hex_re+r'+:)(\t)((?:'+hex_re+hex_re+' )+)( *\t)([a-zA-Z].*?)$',
126 bygroups(Text, Name.Label, Text, Number.Hex, Text,
127 using(asm_lexer))),
128 # Code line with ascii
129 ('( *)('+hex_re+r'+:)(\t)((?:'+hex_re+hex_re+' )+)( *)(.*?)$',
130 bygroups(Text, Name.Label, Text, Number.Hex, Text, String)),
131 # Continued code line, only raw opcodes without disassembled
132 # instruction
133 ('( *)('+hex_re+r'+:)(\t)((?:'+hex_re+hex_re+' )+)$',
134 bygroups(Text, Name.Label, Text, Number.Hex)),
135 # Skipped a few bytes
136 (r'\t\.\.\.$', Text),
137 # Relocation line
138 # (With offset)
139 (r'(\t\t\t)('+hex_re+r'+:)( )([^\t]+)(\t)(.*?)([-+])(0x'+hex_re+'+)$',
140 bygroups(Text, Name.Label, Text, Name.Property, Text,
141 Name.Constant, Punctuation, Number.Hex)),
142 # (Without offset)
143 (r'(\t\t\t)('+hex_re+r'+:)( )([^\t]+)(\t)(.*?)$',
144 bygroups(Text, Name.Label, Text, Name.Property, Text,
145 Name.Constant)),
146 (r'[^\n]+\n', Other)
147 ]
148 }
149
150
151 class ObjdumpLexer(RegexLexer):
152 """
153 For the output of 'objdump -dr'
154 """
155 name = 'objdump'
156 aliases = ['objdump']
157 filenames = ['*.objdump']
158 mimetypes = ['text/x-objdump']
159
160 tokens = _objdump_lexer_tokens(GasLexer)
161
162
163 class DObjdumpLexer(DelegatingLexer):
164 """
165 For the output of 'objdump -Sr on compiled D files'
166 """
167 name = 'd-objdump'
168 aliases = ['d-objdump']
169 filenames = ['*.d-objdump']
170 mimetypes = ['text/x-d-objdump']
171
172 def __init__(self, **options):
173 super(DObjdumpLexer, self).__init__(DLexer, ObjdumpLexer, **options)
174
175
176 class CppObjdumpLexer(DelegatingLexer):
177 """
178 For the output of 'objdump -Sr on compiled C++ files'
179 """
180 name = 'cpp-objdump'
181 aliases = ['cpp-objdump', 'c++-objdumb', 'cxx-objdump']
182 filenames = ['*.cpp-objdump', '*.c++-objdump', '*.cxx-objdump']
183 mimetypes = ['text/x-cpp-objdump']
184
185 def __init__(self, **options):
186 super(CppObjdumpLexer, self).__init__(CppLexer, ObjdumpLexer, **options)
187
188
189 class CObjdumpLexer(DelegatingLexer):
190 """
191 For the output of 'objdump -Sr on compiled C files'
192 """
193 name = 'c-objdump'
194 aliases = ['c-objdump']
195 filenames = ['*.c-objdump']
196 mimetypes = ['text/x-c-objdump']
197
198 def __init__(self, **options):
199 super(CObjdumpLexer, self).__init__(CLexer, ObjdumpLexer, **options)
200
201
202 class HsailLexer(RegexLexer):
203 """
204 For HSAIL assembly code.
205
206 .. versionadded:: 2.2
207 """
208 name = 'HSAIL'
209 aliases = ['hsail', 'hsa']
210 filenames = ['*.hsail']
211 mimetypes = ['text/x-hsail']
212
213 string = r'"[^"]*?"'
214 identifier = r'[a-zA-Z_][\w.]*'
215 # Registers
216 register_number = r'[0-9]+'
217 register = r'(\$(c|s|d|q)' + register_number + ')'
218 # Qualifiers
219 alignQual = r'(align\(\d+\))'
220 widthQual = r'(width\((\d+|all)\))'
221 allocQual = r'(alloc\(agent\))'
222 # Instruction Modifiers
223 roundingMod = (r'((_ftz)?(_up|_down|_zero|_near))')
224 datatypeMod = (r'_('
225 # packedTypes
226 r'u8x4|s8x4|u16x2|s16x2|u8x8|s8x8|u16x4|s16x4|u32x2|s32x2|'
227 r'u8x16|s8x16|u16x8|s16x8|u32x4|s32x4|u64x2|s64x2|'
228 r'f16x2|f16x4|f16x8|f32x2|f32x4|f64x2|'
229 # baseTypes
230 r'u8|s8|u16|s16|u32|s32|u64|s64|'
231 r'b128|b8|b16|b32|b64|b1|'
232 r'f16|f32|f64|'
233 # opaqueType
234 r'roimg|woimg|rwimg|samp|sig32|sig64)')
235
236 # Numeric Constant
237 float = r'((\d+\.)|(\d*\.\d+))[eE][+-]?\d+'
238 hexfloat = r'0[xX](([0-9a-fA-F]+\.[0-9a-fA-F]*)|([0-9a-fA-F]*\.[0-9a-fA-F]+))[pP][+-]?\d+'
239 ieeefloat = r'0((h|H)[0-9a-fA-F]{4}|(f|F)[0-9a-fA-F]{8}|(d|D)[0-9a-fA-F]{16})'
240
241 tokens = {
242 'root': [
243 include('whitespace'),
244 include('comments'),
245
246 (string, String),
247
248 (r'@' + identifier + ':?', Name.Label),
249
250 (register, Name.Variable.Anonymous),
251
252 include('keyword'),
253
254 (r'&' + identifier, Name.Variable.Global),
255 (r'%' + identifier, Name.Variable),
256
257 (hexfloat, Number.Hex),
258 (r'0[xX][a-fA-F0-9]+', Number.Hex),
259 (ieeefloat, Number.Float),
260 (float, Number.Float),
261 (r'\d+', Number.Integer),
262
263 (r'[=<>{}\[\]()*.,:;!]|x\b', Punctuation)
264 ],
265 'whitespace': [
266 (r'(\n|\s)+', Text),
267 ],
268 'comments': [
269 (r'/\*.*?\*/', Comment.Multiline),
270 (r'//.*?\n', Comment.Single),
271 ],
272 'keyword': [
273 # Types
274 (r'kernarg' + datatypeMod, Keyword.Type),
275
276 # Regular keywords
277 (r'\$(full|base|small|large|default|zero|near)', Keyword),
278 (words((
279 'module', 'extension', 'pragma', 'prog', 'indirect', 'signature',
280 'decl', 'kernel', 'function', 'enablebreakexceptions',
281 'enabledetectexceptions', 'maxdynamicgroupsize', 'maxflatgridsize',
282 'maxflatworkgroupsize', 'requireddim', 'requiredgridsize',
283 'requiredworkgroupsize', 'requirenopartialworkgroups'),
284 suffix=r'\b'), Keyword),
285
286 # instructions
287 (roundingMod, Keyword),
288 (datatypeMod, Keyword),
289 (r'_(' + alignQual + '|' + widthQual + ')', Keyword),
290 (r'_kernarg', Keyword),
291 (r'(nop|imagefence)\b', Keyword),
292 (words((
293 'cleardetectexcept', 'clock', 'cuid', 'debugtrap', 'dim',
294 'getdetectexcept', 'groupbaseptr', 'kernargbaseptr', 'laneid',
295 'maxcuid', 'maxwaveid', 'packetid', 'setdetectexcept', 'waveid',
296 'workitemflatabsid', 'workitemflatid', 'nullptr', 'abs', 'bitrev',
297 'currentworkgroupsize', 'currentworkitemflatid', 'fract', 'ncos',
298 'neg', 'nexp2', 'nlog2', 'nrcp', 'nrsqrt', 'nsin', 'nsqrt',
299 'gridgroups', 'gridsize', 'not', 'sqrt', 'workgroupid',
300 'workgroupsize', 'workitemabsid', 'workitemid', 'ceil', 'floor',
301 'rint', 'trunc', 'add', 'bitmask', 'borrow', 'carry', 'copysign',
302 'div', 'rem', 'sub', 'shl', 'shr', 'and', 'or', 'xor', 'unpackhi',
303 'unpacklo', 'max', 'min', 'fma', 'mad', 'bitextract', 'bitselect',
304 'shuffle', 'cmov', 'bitalign', 'bytealign', 'lerp', 'nfma', 'mul',
305 'mulhi', 'mul24hi', 'mul24', 'mad24', 'mad24hi', 'bitinsert',
306 'combine', 'expand', 'lda', 'mov', 'pack', 'unpack', 'packcvt',
307 'unpackcvt', 'sad', 'sementp', 'ftos', 'stof', 'cmp', 'ld', 'st',
308 '_eq', '_ne', '_lt', '_le', '_gt', '_ge', '_equ', '_neu', '_ltu',
309 '_leu', '_gtu', '_geu', '_num', '_nan', '_seq', '_sne', '_slt',
310 '_sle', '_sgt', '_sge', '_snum', '_snan', '_sequ', '_sneu', '_sltu',
311 '_sleu', '_sgtu', '_sgeu', 'atomic', '_ld', '_st', '_cas', '_add',
312 '_and', '_exch', '_max', '_min', '_or', '_sub', '_wrapdec',
313 '_wrapinc', '_xor', 'ret', 'cvt', '_readonly', '_kernarg', '_global',
314 'br', 'cbr', 'sbr', '_scacq', '_screl', '_scar', '_rlx', '_wave',
315 '_wg', '_agent', '_system', 'ldimage', 'stimage', '_v2', '_v3', '_v4',
316 '_1d', '_2d', '_3d', '_1da', '_2da', '_1db', '_2ddepth', '_2dadepth',
317 '_width', '_height', '_depth', '_array', '_channelorder',
318 '_channeltype', 'querysampler', '_coord', '_filter', '_addressing',
319 'barrier', 'wavebarrier', 'initfbar', 'joinfbar', 'waitfbar',
320 'arrivefbar', 'leavefbar', 'releasefbar', 'ldf', 'activelaneid',
321 'activelanecount', 'activelanemask', 'activelanepermute', 'call',
322 'scall', 'icall', 'alloca', 'packetcompletionsig',
323 'addqueuewriteindex', 'casqueuewriteindex', 'ldqueuereadindex',
324 'stqueuereadindex', 'readonly', 'global', 'private', 'group',
325 'spill', 'arg', '_upi', '_downi', '_zeroi', '_neari', '_upi_sat',
326 '_downi_sat', '_zeroi_sat', '_neari_sat', '_supi', '_sdowni',
327 '_szeroi', '_sneari', '_supi_sat', '_sdowni_sat', '_szeroi_sat',
328 '_sneari_sat', '_pp', '_ps', '_sp', '_ss', '_s', '_p', '_pp_sat',
329 '_ps_sat', '_sp_sat', '_ss_sat', '_s_sat', '_p_sat')), Keyword),
330
331 # Integer types
332 (r'i[1-9]\d*', Keyword)
333 ]
334 }
335
336
337 class LlvmLexer(RegexLexer):
338 """
339 For LLVM assembly code.
340 """
341 name = 'LLVM'
342 aliases = ['llvm']
343 filenames = ['*.ll']
344 mimetypes = ['text/x-llvm']
345
346 #: optional Comment or Whitespace
347 string = r'"[^"]*?"'
348 identifier = r'([-a-zA-Z$._][\w\-$.]*|' + string + ')'
349
350 tokens = {
351 'root': [
352 include('whitespace'),
353
354 # Before keywords, because keywords are valid label names :(...
355 (identifier + r'\s*:', Name.Label),
356
357 include('keyword'),
358
359 (r'%' + identifier, Name.Variable),
360 (r'@' + identifier, Name.Variable.Global),
361 (r'%\d+', Name.Variable.Anonymous),
362 (r'@\d+', Name.Variable.Global),
363 (r'#\d+', Name.Variable.Global),
364 (r'!' + identifier, Name.Variable),
365 (r'!\d+', Name.Variable.Anonymous),
366 (r'c?' + string, String),
367
368 (r'0[xX][a-fA-F0-9]+', Number),
369 (r'-?\d+(?:[.]\d+)?(?:[eE][-+]?\d+(?:[.]\d+)?)?', Number),
370
371 (r'[=<>{}\[\]()*.,!]|x\b', Punctuation)
372 ],
373 'whitespace': [
374 (r'(\n|\s)+', Text),
375 (r';.*?\n', Comment)
376 ],
377 'keyword': [
378 # Regular keywords
379 (words((
380 'begin', 'end', 'true', 'false', 'declare', 'define', 'global',
381 'constant', 'private', 'linker_private', 'internal',
382 'available_externally', 'linkonce', 'linkonce_odr', 'weak',
383 'weak_odr', 'appending', 'dllimport', 'dllexport', 'common',
384 'default', 'hidden', 'protected', 'extern_weak', 'external',
385 'thread_local', 'zeroinitializer', 'undef', 'null', 'to', 'tail',
386 'target', 'triple', 'datalayout', 'volatile', 'nuw', 'nsw', 'nnan',
387 'ninf', 'nsz', 'arcp', 'fast', 'exact', 'inbounds', 'align',
388 'addrspace', 'section', 'alias', 'module', 'asm', 'sideeffect',
389 'gc', 'dbg', 'linker_private_weak', 'attributes', 'blockaddress',
390 'initialexec', 'localdynamic', 'localexec', 'prefix', 'unnamed_addr',
391 'ccc', 'fastcc', 'coldcc', 'x86_stdcallcc', 'x86_fastcallcc',
392 'arm_apcscc', 'arm_aapcscc', 'arm_aapcs_vfpcc', 'ptx_device',
393 'ptx_kernel', 'intel_ocl_bicc', 'msp430_intrcc', 'spir_func',
394 'spir_kernel', 'x86_64_sysvcc', 'x86_64_win64cc', 'x86_thiscallcc',
395 'cc', 'c', 'signext', 'zeroext', 'inreg', 'sret', 'nounwind',
396 'noreturn', 'noalias', 'nocapture', 'byval', 'nest', 'readnone',
397 'readonly', 'inlinehint', 'noinline', 'alwaysinline', 'optsize', 'ssp',
398 'sspreq', 'noredzone', 'noimplicitfloat', 'naked', 'builtin', 'cold',
399 'nobuiltin', 'noduplicate', 'nonlazybind', 'optnone', 'returns_twice',
400 'sanitize_address', 'sanitize_memory', 'sanitize_thread', 'sspstrong',
401 'uwtable', 'returned', 'type', 'opaque', 'eq', 'ne', 'slt', 'sgt',
402 'sle', 'sge', 'ult', 'ugt', 'ule', 'uge', 'oeq', 'one', 'olt', 'ogt',
403 'ole', 'oge', 'ord', 'uno', 'ueq', 'une', 'x', 'acq_rel', 'acquire',
404 'alignstack', 'atomic', 'catch', 'cleanup', 'filter', 'inteldialect',
405 'max', 'min', 'monotonic', 'nand', 'personality', 'release', 'seq_cst',
406 'singlethread', 'umax', 'umin', 'unordered', 'xchg', 'add', 'fadd',
407 'sub', 'fsub', 'mul', 'fmul', 'udiv', 'sdiv', 'fdiv', 'urem', 'srem',
408 'frem', 'shl', 'lshr', 'ashr', 'and', 'or', 'xor', 'icmp', 'fcmp',
409 'phi', 'call', 'trunc', 'zext', 'sext', 'fptrunc', 'fpext', 'uitofp',
410 'sitofp', 'fptoui', 'fptosi', 'inttoptr', 'ptrtoint', 'bitcast',
411 'addrspacecast', 'select', 'va_arg', 'ret', 'br', 'switch', 'invoke',
412 'unwind', 'unreachable', 'indirectbr', 'landingpad', 'resume',
413 'malloc', 'alloca', 'free', 'load', 'store', 'getelementptr',
414 'extractelement', 'insertelement', 'shufflevector', 'getresult',
415 'extractvalue', 'insertvalue', 'atomicrmw', 'cmpxchg', 'fence',
416 'allocsize', 'amdgpu_cs', 'amdgpu_gs', 'amdgpu_kernel', 'amdgpu_ps',
417 'amdgpu_vs', 'any', 'anyregcc', 'argmemonly', 'avr_intrcc',
418 'avr_signalcc', 'caller', 'catchpad', 'catchret', 'catchswitch',
419 'cleanuppad', 'cleanupret', 'comdat', 'convergent', 'cxx_fast_tlscc',
420 'deplibs', 'dereferenceable', 'dereferenceable_or_null', 'distinct',
421 'exactmatch', 'externally_initialized', 'from', 'ghccc', 'hhvm_ccc',
422 'hhvmcc', 'ifunc', 'inaccessiblemem_or_argmemonly', 'inaccessiblememonly',
423 'inalloca', 'jumptable', 'largest', 'local_unnamed_addr', 'minsize',
424 'musttail', 'noduplicates', 'none', 'nonnull', 'norecurse', 'notail',
425 'preserve_allcc', 'preserve_mostcc', 'prologue', 'safestack', 'samesize',
426 'source_filename', 'swiftcc', 'swifterror', 'swiftself', 'webkit_jscc',
427 'within', 'writeonly', 'x86_intrcc', 'x86_vectorcallcc'),
428 suffix=r'\b'), Keyword),
429
430 # Types
431 (words(('void', 'half', 'float', 'double', 'x86_fp80', 'fp128',
432 'ppc_fp128', 'label', 'metadata', 'token')), Keyword.Type),
433
434 # Integer types
435 (r'i[1-9]\d*', Keyword)
436 ]
437 }
438
439
440 class NasmLexer(RegexLexer):
441 """
442 For Nasm (Intel) assembly code.
443 """
444 name = 'NASM'
445 aliases = ['nasm']
446 filenames = ['*.asm', '*.ASM']
447 mimetypes = ['text/x-nasm']
448
449 identifier = r'[a-z$._?][\w$.?#@~]*'
450 hexn = r'(?:0x[0-9a-f]+|$0[0-9a-f]*|[0-9]+[0-9a-f]*h)'
451 octn = r'[0-7]+q'
452 binn = r'[01]+b'
453 decn = r'[0-9]+'
454 floatn = decn + r'\.e?' + decn
455 string = r'"(\\"|[^"\n])*"|' + r"'(\\'|[^'\n])*'|" + r"`(\\`|[^`\n])*`"
456 declkw = r'(?:res|d)[bwdqt]|times'
457 register = (r'r[0-9][0-5]?[bwd]|'
458 r'[a-d][lh]|[er]?[a-d]x|[er]?[sb]p|[er]?[sd]i|[c-gs]s|st[0-7]|'
459 r'mm[0-7]|cr[0-4]|dr[0-367]|tr[3-7]')
460 wordop = r'seg|wrt|strict'
461 type = r'byte|[dq]?word'
462 directives = (r'BITS|USE16|USE32|SECTION|SEGMENT|ABSOLUTE|EXTERN|GLOBAL|'
463 r'ORG|ALIGN|STRUC|ENDSTRUC|COMMON|CPU|GROUP|UPPERCASE|IMPORT|'
464 r'EXPORT|LIBRARY|MODULE')
465
466 flags = re.IGNORECASE | re.MULTILINE
467 tokens = {
468 'root': [
469 (r'^\s*%', Comment.Preproc, 'preproc'),
470 include('whitespace'),
471 (identifier + ':', Name.Label),
472 (r'(%s)(\s+)(equ)' % identifier,
473 bygroups(Name.Constant, Keyword.Declaration, Keyword.Declaration),
474 'instruction-args'),
475 (directives, Keyword, 'instruction-args'),
476 (declkw, Keyword.Declaration, 'instruction-args'),
477 (identifier, Name.Function, 'instruction-args'),
478 (r'[\r\n]+', Text)
479 ],
480 'instruction-args': [
481 (string, String),
482 (hexn, Number.Hex),
483 (octn, Number.Oct),
484 (binn, Number.Bin),
485 (floatn, Number.Float),
486 (decn, Number.Integer),
487 include('punctuation'),
488 (register, Name.Builtin),
489 (identifier, Name.Variable),
490 (r'[\r\n]+', Text, '#pop'),
491 include('whitespace')
492 ],
493 'preproc': [
494 (r'[^;\n]+', Comment.Preproc),
495 (r';.*?\n', Comment.Single, '#pop'),
496 (r'\n', Comment.Preproc, '#pop'),
497 ],
498 'whitespace': [
499 (r'\n', Text),
500 (r'[ \t]+', Text),
501 (r';.*', Comment.Single)
502 ],
503 'punctuation': [
504 (r'[,():\[\]]+', Punctuation),
505 (r'[&|^<>+*/%~-]+', Operator),
506 (r'[$]+', Keyword.Constant),
507 (wordop, Operator.Word),
508 (type, Keyword.Type)
509 ],
510 }
511
512
513 class NasmObjdumpLexer(ObjdumpLexer):
514 """
515 For the output of 'objdump -d -M intel'.
516
517 .. versionadded:: 2.0
518 """
519 name = 'objdump-nasm'
520 aliases = ['objdump-nasm']
521 filenames = ['*.objdump-intel']
522 mimetypes = ['text/x-nasm-objdump']
523
524 tokens = _objdump_lexer_tokens(NasmLexer)
525
526
527 class TasmLexer(RegexLexer):
528 """
529 For Tasm (Turbo Assembler) assembly code.
530 """
531 name = 'TASM'
532 aliases = ['tasm']
533 filenames = ['*.asm', '*.ASM', '*.tasm']
534 mimetypes = ['text/x-tasm']
535
536 identifier = r'[@a-z$._?][\w$.?#@~]*'
537 hexn = r'(?:0x[0-9a-f]+|$0[0-9a-f]*|[0-9]+[0-9a-f]*h)'
538 octn = r'[0-7]+q'
539 binn = r'[01]+b'
540 decn = r'[0-9]+'
541 floatn = decn + r'\.e?' + decn
542 string = r'"(\\"|[^"\n])*"|' + r"'(\\'|[^'\n])*'|" + r"`(\\`|[^`\n])*`"
543 declkw = r'(?:res|d)[bwdqt]|times'
544 register = (r'r[0-9][0-5]?[bwd]|'
545 r'[a-d][lh]|[er]?[a-d]x|[er]?[sb]p|[er]?[sd]i|[c-gs]s|st[0-7]|'
546 r'mm[0-7]|cr[0-4]|dr[0-367]|tr[3-7]')
547 wordop = r'seg|wrt|strict'
548 type = r'byte|[dq]?word'
549 directives = (r'BITS|USE16|USE32|SECTION|SEGMENT|ABSOLUTE|EXTERN|GLOBAL|'
550 r'ORG|ALIGN|STRUC|ENDSTRUC|ENDS|COMMON|CPU|GROUP|UPPERCASE|INCLUDE|'
551 r'EXPORT|LIBRARY|MODULE|PROC|ENDP|USES|ARG|DATASEG|UDATASEG|END|IDEAL|'
552 r'P386|MODEL|ASSUME|CODESEG|SIZE')
553 # T[A-Z][a-z] is more of a convention. Lexer should filter out STRUC definitions
554 # and then 'add' them to datatype somehow.
555 datatype = (r'db|dd|dw|T[A-Z][a-z]+')
556
557 flags = re.IGNORECASE | re.MULTILINE
558 tokens = {
559 'root': [
560 (r'^\s*%', Comment.Preproc, 'preproc'),
561 include('whitespace'),
562 (identifier + ':', Name.Label),
563 (directives, Keyword, 'instruction-args'),
564 (r'(%s)(\s+)(%s)' % (identifier, datatype),
565 bygroups(Name.Constant, Keyword.Declaration, Keyword.Declaration),
566 'instruction-args'),
567 (declkw, Keyword.Declaration, 'instruction-args'),
568 (identifier, Name.Function, 'instruction-args'),
569 (r'[\r\n]+', Text)
570 ],
571 'instruction-args': [
572 (string, String),
573 (hexn, Number.Hex),
574 (octn, Number.Oct),
575 (binn, Number.Bin),
576 (floatn, Number.Float),
577 (decn, Number.Integer),
578 include('punctuation'),
579 (register, Name.Builtin),
580 (identifier, Name.Variable),
581 # Do not match newline when it's preceeded by a backslash
582 (r'(\\\s*)(;.*)([\r\n])', bygroups(Text, Comment.Single, Text)),
583 (r'[\r\n]+', Text, '#pop'),
584 include('whitespace')
585 ],
586 'preproc': [
587 (r'[^;\n]+', Comment.Preproc),
588 (r';.*?\n', Comment.Single, '#pop'),
589 (r'\n', Comment.Preproc, '#pop'),
590 ],
591 'whitespace': [
592 (r'[\n\r]', Text),
593 (r'\\[\n\r]', Text),
594 (r'[ \t]+', Text),
595 (r';.*', Comment.Single)
596 ],
597 'punctuation': [
598 (r'[,():\[\]]+', Punctuation),
599 (r'[&|^<>+*=/%~-]+', Operator),
600 (r'[$]+', Keyword.Constant),
601 (wordop, Operator.Word),
602 (type, Keyword.Type)
603 ],
604 }
605
606
607 class Ca65Lexer(RegexLexer):
608 """
609 For ca65 assembler sources.
610
611 .. versionadded:: 1.6
612 """
613 name = 'ca65 assembler'
614 aliases = ['ca65']
615 filenames = ['*.s']
616
617 flags = re.IGNORECASE
618
619 tokens = {
620 'root': [
621 (r';.*', Comment.Single),
622 (r'\s+', Text),
623 (r'[a-z_.@$][\w.@$]*:', Name.Label),
624 (r'((ld|st)[axy]|(in|de)[cxy]|asl|lsr|ro[lr]|adc|sbc|cmp|cp[xy]'
625 r'|cl[cvdi]|se[cdi]|jmp|jsr|bne|beq|bpl|bmi|bvc|bvs|bcc|bcs'
626 r'|p[lh][ap]|rt[is]|brk|nop|ta[xy]|t[xy]a|txs|tsx|and|ora|eor'
627 r'|bit)\b', Keyword),
628 (r'\.\w+', Keyword.Pseudo),
629 (r'[-+~*/^&|!<>=]', Operator),
630 (r'"[^"\n]*.', String),
631 (r"'[^'\n]*.", String.Char),
632 (r'\$[0-9a-f]+|[0-9a-f]+h\b', Number.Hex),
633 (r'\d+', Number.Integer),
634 (r'%[01]+', Number.Bin),
635 (r'[#,.:()=\[\]]', Punctuation),
636 (r'[a-z_.@$][\w.@$]*', Name),
637 ]
638 }
639
640 def analyse_text(self, text):
641 # comments in GAS start with "#"
642 if re.match(r'^\s*;', text, re.MULTILINE):
643 return 0.9

eric ide

mercurial