|
1 # -*- coding: utf-8 -*- |
|
2 """ |
|
3 pygments.lexers.asm |
|
4 ~~~~~~~~~~~~~~~~~~~ |
|
5 |
|
6 Lexers for assembly languages. |
|
7 |
|
8 :copyright: Copyright 2006-2017 by the Pygments team, see AUTHORS. |
|
9 :license: BSD, see LICENSE for details. |
|
10 """ |
|
11 |
|
12 import re |
|
13 |
|
14 from pygments.lexer import RegexLexer, include, bygroups, using, words, \ |
|
15 DelegatingLexer |
|
16 from pygments.lexers.c_cpp import CppLexer, CLexer |
|
17 from pygments.lexers.d import DLexer |
|
18 from pygments.token import Text, Name, Number, String, Comment, Punctuation, \ |
|
19 Other, Keyword, Operator |
|
20 |
|
21 __all__ = ['GasLexer', 'ObjdumpLexer', 'DObjdumpLexer', 'CppObjdumpLexer', |
|
22 'CObjdumpLexer', 'HsailLexer', 'LlvmLexer', 'NasmLexer', |
|
23 'NasmObjdumpLexer', 'TasmLexer', 'Ca65Lexer'] |
|
24 |
|
25 |
|
26 class GasLexer(RegexLexer): |
|
27 """ |
|
28 For Gas (AT&T) assembly code. |
|
29 """ |
|
30 name = 'GAS' |
|
31 aliases = ['gas', 'asm'] |
|
32 filenames = ['*.s', '*.S'] |
|
33 mimetypes = ['text/x-gas'] |
|
34 |
|
35 #: optional Comment or Whitespace |
|
36 string = r'"(\\"|[^"])*"' |
|
37 char = r'[\w$.@-]' |
|
38 identifier = r'(?:[a-zA-Z$_]' + char + r'*|\.' + char + '+)' |
|
39 number = r'(?:0[xX][a-zA-Z0-9]+|\d+)' |
|
40 |
|
41 tokens = { |
|
42 'root': [ |
|
43 include('whitespace'), |
|
44 (identifier + ':', Name.Label), |
|
45 (r'\.' + identifier, Name.Attribute, 'directive-args'), |
|
46 (r'lock|rep(n?z)?|data\d+', Name.Attribute), |
|
47 (identifier, Name.Function, 'instruction-args'), |
|
48 (r'[\r\n]+', Text) |
|
49 ], |
|
50 'directive-args': [ |
|
51 (identifier, Name.Constant), |
|
52 (string, String), |
|
53 ('@' + identifier, Name.Attribute), |
|
54 (number, Number.Integer), |
|
55 (r'[\r\n]+', Text, '#pop'), |
|
56 (r'[;#].*?\n', Comment, '#pop'), |
|
57 |
|
58 include('punctuation'), |
|
59 include('whitespace') |
|
60 ], |
|
61 'instruction-args': [ |
|
62 # For objdump-disassembled code, shouldn't occur in |
|
63 # actual assembler input |
|
64 ('([a-z0-9]+)( )(<)('+identifier+')(>)', |
|
65 bygroups(Number.Hex, Text, Punctuation, Name.Constant, |
|
66 Punctuation)), |
|
67 ('([a-z0-9]+)( )(<)('+identifier+')([-+])('+number+')(>)', |
|
68 bygroups(Number.Hex, Text, Punctuation, Name.Constant, |
|
69 Punctuation, Number.Integer, Punctuation)), |
|
70 |
|
71 # Address constants |
|
72 (identifier, Name.Constant), |
|
73 (number, Number.Integer), |
|
74 # Registers |
|
75 ('%' + identifier, Name.Variable), |
|
76 # Numeric constants |
|
77 ('$'+number, Number.Integer), |
|
78 (r"$'(.|\\')'", String.Char), |
|
79 (r'[\r\n]+', Text, '#pop'), |
|
80 (r'[;#].*?\n', Comment, '#pop'), |
|
81 |
|
82 include('punctuation'), |
|
83 include('whitespace') |
|
84 ], |
|
85 'whitespace': [ |
|
86 (r'\n', Text), |
|
87 (r'\s+', Text), |
|
88 (r'[;#].*?\n', Comment) |
|
89 ], |
|
90 'punctuation': [ |
|
91 (r'[-*,.()\[\]!:]+', Punctuation) |
|
92 ] |
|
93 } |
|
94 |
|
95 def analyse_text(text): |
|
96 if re.match(r'^\.(text|data|section)', text, re.M): |
|
97 return True |
|
98 elif re.match(r'^\.\w+', text, re.M): |
|
99 return 0.1 |
|
100 |
|
101 |
|
102 def _objdump_lexer_tokens(asm_lexer): |
|
103 """ |
|
104 Common objdump lexer tokens to wrap an ASM lexer. |
|
105 """ |
|
106 hex_re = r'[0-9A-Za-z]' |
|
107 return { |
|
108 'root': [ |
|
109 # File name & format: |
|
110 ('(.*?)(:)( +file format )(.*?)$', |
|
111 bygroups(Name.Label, Punctuation, Text, String)), |
|
112 # Section header |
|
113 ('(Disassembly of section )(.*?)(:)$', |
|
114 bygroups(Text, Name.Label, Punctuation)), |
|
115 # Function labels |
|
116 # (With offset) |
|
117 ('('+hex_re+'+)( )(<)(.*?)([-+])(0[xX][A-Za-z0-9]+)(>:)$', |
|
118 bygroups(Number.Hex, Text, Punctuation, Name.Function, |
|
119 Punctuation, Number.Hex, Punctuation)), |
|
120 # (Without offset) |
|
121 ('('+hex_re+'+)( )(<)(.*?)(>:)$', |
|
122 bygroups(Number.Hex, Text, Punctuation, Name.Function, |
|
123 Punctuation)), |
|
124 # Code line with disassembled instructions |
|
125 ('( *)('+hex_re+r'+:)(\t)((?:'+hex_re+hex_re+' )+)( *\t)([a-zA-Z].*?)$', |
|
126 bygroups(Text, Name.Label, Text, Number.Hex, Text, |
|
127 using(asm_lexer))), |
|
128 # Code line with ascii |
|
129 ('( *)('+hex_re+r'+:)(\t)((?:'+hex_re+hex_re+' )+)( *)(.*?)$', |
|
130 bygroups(Text, Name.Label, Text, Number.Hex, Text, String)), |
|
131 # Continued code line, only raw opcodes without disassembled |
|
132 # instruction |
|
133 ('( *)('+hex_re+r'+:)(\t)((?:'+hex_re+hex_re+' )+)$', |
|
134 bygroups(Text, Name.Label, Text, Number.Hex)), |
|
135 # Skipped a few bytes |
|
136 (r'\t\.\.\.$', Text), |
|
137 # Relocation line |
|
138 # (With offset) |
|
139 (r'(\t\t\t)('+hex_re+r'+:)( )([^\t]+)(\t)(.*?)([-+])(0x'+hex_re+'+)$', |
|
140 bygroups(Text, Name.Label, Text, Name.Property, Text, |
|
141 Name.Constant, Punctuation, Number.Hex)), |
|
142 # (Without offset) |
|
143 (r'(\t\t\t)('+hex_re+r'+:)( )([^\t]+)(\t)(.*?)$', |
|
144 bygroups(Text, Name.Label, Text, Name.Property, Text, |
|
145 Name.Constant)), |
|
146 (r'[^\n]+\n', Other) |
|
147 ] |
|
148 } |
|
149 |
|
150 |
|
151 class ObjdumpLexer(RegexLexer): |
|
152 """ |
|
153 For the output of 'objdump -dr' |
|
154 """ |
|
155 name = 'objdump' |
|
156 aliases = ['objdump'] |
|
157 filenames = ['*.objdump'] |
|
158 mimetypes = ['text/x-objdump'] |
|
159 |
|
160 tokens = _objdump_lexer_tokens(GasLexer) |
|
161 |
|
162 |
|
163 class DObjdumpLexer(DelegatingLexer): |
|
164 """ |
|
165 For the output of 'objdump -Sr on compiled D files' |
|
166 """ |
|
167 name = 'd-objdump' |
|
168 aliases = ['d-objdump'] |
|
169 filenames = ['*.d-objdump'] |
|
170 mimetypes = ['text/x-d-objdump'] |
|
171 |
|
172 def __init__(self, **options): |
|
173 super(DObjdumpLexer, self).__init__(DLexer, ObjdumpLexer, **options) |
|
174 |
|
175 |
|
176 class CppObjdumpLexer(DelegatingLexer): |
|
177 """ |
|
178 For the output of 'objdump -Sr on compiled C++ files' |
|
179 """ |
|
180 name = 'cpp-objdump' |
|
181 aliases = ['cpp-objdump', 'c++-objdumb', 'cxx-objdump'] |
|
182 filenames = ['*.cpp-objdump', '*.c++-objdump', '*.cxx-objdump'] |
|
183 mimetypes = ['text/x-cpp-objdump'] |
|
184 |
|
185 def __init__(self, **options): |
|
186 super(CppObjdumpLexer, self).__init__(CppLexer, ObjdumpLexer, **options) |
|
187 |
|
188 |
|
189 class CObjdumpLexer(DelegatingLexer): |
|
190 """ |
|
191 For the output of 'objdump -Sr on compiled C files' |
|
192 """ |
|
193 name = 'c-objdump' |
|
194 aliases = ['c-objdump'] |
|
195 filenames = ['*.c-objdump'] |
|
196 mimetypes = ['text/x-c-objdump'] |
|
197 |
|
198 def __init__(self, **options): |
|
199 super(CObjdumpLexer, self).__init__(CLexer, ObjdumpLexer, **options) |
|
200 |
|
201 |
|
202 class HsailLexer(RegexLexer): |
|
203 """ |
|
204 For HSAIL assembly code. |
|
205 |
|
206 .. versionadded:: 2.2 |
|
207 """ |
|
208 name = 'HSAIL' |
|
209 aliases = ['hsail', 'hsa'] |
|
210 filenames = ['*.hsail'] |
|
211 mimetypes = ['text/x-hsail'] |
|
212 |
|
213 string = r'"[^"]*?"' |
|
214 identifier = r'[a-zA-Z_][\w.]*' |
|
215 # Registers |
|
216 register_number = r'[0-9]+' |
|
217 register = r'(\$(c|s|d|q)' + register_number + ')' |
|
218 # Qualifiers |
|
219 alignQual = r'(align\(\d+\))' |
|
220 widthQual = r'(width\((\d+|all)\))' |
|
221 allocQual = r'(alloc\(agent\))' |
|
222 # Instruction Modifiers |
|
223 roundingMod = (r'((_ftz)?(_up|_down|_zero|_near))') |
|
224 datatypeMod = (r'_(' |
|
225 # packedTypes |
|
226 r'u8x4|s8x4|u16x2|s16x2|u8x8|s8x8|u16x4|s16x4|u32x2|s32x2|' |
|
227 r'u8x16|s8x16|u16x8|s16x8|u32x4|s32x4|u64x2|s64x2|' |
|
228 r'f16x2|f16x4|f16x8|f32x2|f32x4|f64x2|' |
|
229 # baseTypes |
|
230 r'u8|s8|u16|s16|u32|s32|u64|s64|' |
|
231 r'b128|b8|b16|b32|b64|b1|' |
|
232 r'f16|f32|f64|' |
|
233 # opaqueType |
|
234 r'roimg|woimg|rwimg|samp|sig32|sig64)') |
|
235 |
|
236 # Numeric Constant |
|
237 float = r'((\d+\.)|(\d*\.\d+))[eE][+-]?\d+' |
|
238 hexfloat = r'0[xX](([0-9a-fA-F]+\.[0-9a-fA-F]*)|([0-9a-fA-F]*\.[0-9a-fA-F]+))[pP][+-]?\d+' |
|
239 ieeefloat = r'0((h|H)[0-9a-fA-F]{4}|(f|F)[0-9a-fA-F]{8}|(d|D)[0-9a-fA-F]{16})' |
|
240 |
|
241 tokens = { |
|
242 'root': [ |
|
243 include('whitespace'), |
|
244 include('comments'), |
|
245 |
|
246 (string, String), |
|
247 |
|
248 (r'@' + identifier + ':?', Name.Label), |
|
249 |
|
250 (register, Name.Variable.Anonymous), |
|
251 |
|
252 include('keyword'), |
|
253 |
|
254 (r'&' + identifier, Name.Variable.Global), |
|
255 (r'%' + identifier, Name.Variable), |
|
256 |
|
257 (hexfloat, Number.Hex), |
|
258 (r'0[xX][a-fA-F0-9]+', Number.Hex), |
|
259 (ieeefloat, Number.Float), |
|
260 (float, Number.Float), |
|
261 (r'\d+', Number.Integer), |
|
262 |
|
263 (r'[=<>{}\[\]()*.,:;!]|x\b', Punctuation) |
|
264 ], |
|
265 'whitespace': [ |
|
266 (r'(\n|\s)+', Text), |
|
267 ], |
|
268 'comments': [ |
|
269 (r'/\*.*?\*/', Comment.Multiline), |
|
270 (r'//.*?\n', Comment.Single), |
|
271 ], |
|
272 'keyword': [ |
|
273 # Types |
|
274 (r'kernarg' + datatypeMod, Keyword.Type), |
|
275 |
|
276 # Regular keywords |
|
277 (r'\$(full|base|small|large|default|zero|near)', Keyword), |
|
278 (words(( |
|
279 'module', 'extension', 'pragma', 'prog', 'indirect', 'signature', |
|
280 'decl', 'kernel', 'function', 'enablebreakexceptions', |
|
281 'enabledetectexceptions', 'maxdynamicgroupsize', 'maxflatgridsize', |
|
282 'maxflatworkgroupsize', 'requireddim', 'requiredgridsize', |
|
283 'requiredworkgroupsize', 'requirenopartialworkgroups'), |
|
284 suffix=r'\b'), Keyword), |
|
285 |
|
286 # instructions |
|
287 (roundingMod, Keyword), |
|
288 (datatypeMod, Keyword), |
|
289 (r'_(' + alignQual + '|' + widthQual + ')', Keyword), |
|
290 (r'_kernarg', Keyword), |
|
291 (r'(nop|imagefence)\b', Keyword), |
|
292 (words(( |
|
293 'cleardetectexcept', 'clock', 'cuid', 'debugtrap', 'dim', |
|
294 'getdetectexcept', 'groupbaseptr', 'kernargbaseptr', 'laneid', |
|
295 'maxcuid', 'maxwaveid', 'packetid', 'setdetectexcept', 'waveid', |
|
296 'workitemflatabsid', 'workitemflatid', 'nullptr', 'abs', 'bitrev', |
|
297 'currentworkgroupsize', 'currentworkitemflatid', 'fract', 'ncos', |
|
298 'neg', 'nexp2', 'nlog2', 'nrcp', 'nrsqrt', 'nsin', 'nsqrt', |
|
299 'gridgroups', 'gridsize', 'not', 'sqrt', 'workgroupid', |
|
300 'workgroupsize', 'workitemabsid', 'workitemid', 'ceil', 'floor', |
|
301 'rint', 'trunc', 'add', 'bitmask', 'borrow', 'carry', 'copysign', |
|
302 'div', 'rem', 'sub', 'shl', 'shr', 'and', 'or', 'xor', 'unpackhi', |
|
303 'unpacklo', 'max', 'min', 'fma', 'mad', 'bitextract', 'bitselect', |
|
304 'shuffle', 'cmov', 'bitalign', 'bytealign', 'lerp', 'nfma', 'mul', |
|
305 'mulhi', 'mul24hi', 'mul24', 'mad24', 'mad24hi', 'bitinsert', |
|
306 'combine', 'expand', 'lda', 'mov', 'pack', 'unpack', 'packcvt', |
|
307 'unpackcvt', 'sad', 'sementp', 'ftos', 'stof', 'cmp', 'ld', 'st', |
|
308 '_eq', '_ne', '_lt', '_le', '_gt', '_ge', '_equ', '_neu', '_ltu', |
|
309 '_leu', '_gtu', '_geu', '_num', '_nan', '_seq', '_sne', '_slt', |
|
310 '_sle', '_sgt', '_sge', '_snum', '_snan', '_sequ', '_sneu', '_sltu', |
|
311 '_sleu', '_sgtu', '_sgeu', 'atomic', '_ld', '_st', '_cas', '_add', |
|
312 '_and', '_exch', '_max', '_min', '_or', '_sub', '_wrapdec', |
|
313 '_wrapinc', '_xor', 'ret', 'cvt', '_readonly', '_kernarg', '_global', |
|
314 'br', 'cbr', 'sbr', '_scacq', '_screl', '_scar', '_rlx', '_wave', |
|
315 '_wg', '_agent', '_system', 'ldimage', 'stimage', '_v2', '_v3', '_v4', |
|
316 '_1d', '_2d', '_3d', '_1da', '_2da', '_1db', '_2ddepth', '_2dadepth', |
|
317 '_width', '_height', '_depth', '_array', '_channelorder', |
|
318 '_channeltype', 'querysampler', '_coord', '_filter', '_addressing', |
|
319 'barrier', 'wavebarrier', 'initfbar', 'joinfbar', 'waitfbar', |
|
320 'arrivefbar', 'leavefbar', 'releasefbar', 'ldf', 'activelaneid', |
|
321 'activelanecount', 'activelanemask', 'activelanepermute', 'call', |
|
322 'scall', 'icall', 'alloca', 'packetcompletionsig', |
|
323 'addqueuewriteindex', 'casqueuewriteindex', 'ldqueuereadindex', |
|
324 'stqueuereadindex', 'readonly', 'global', 'private', 'group', |
|
325 'spill', 'arg', '_upi', '_downi', '_zeroi', '_neari', '_upi_sat', |
|
326 '_downi_sat', '_zeroi_sat', '_neari_sat', '_supi', '_sdowni', |
|
327 '_szeroi', '_sneari', '_supi_sat', '_sdowni_sat', '_szeroi_sat', |
|
328 '_sneari_sat', '_pp', '_ps', '_sp', '_ss', '_s', '_p', '_pp_sat', |
|
329 '_ps_sat', '_sp_sat', '_ss_sat', '_s_sat', '_p_sat')), Keyword), |
|
330 |
|
331 # Integer types |
|
332 (r'i[1-9]\d*', Keyword) |
|
333 ] |
|
334 } |
|
335 |
|
336 |
|
337 class LlvmLexer(RegexLexer): |
|
338 """ |
|
339 For LLVM assembly code. |
|
340 """ |
|
341 name = 'LLVM' |
|
342 aliases = ['llvm'] |
|
343 filenames = ['*.ll'] |
|
344 mimetypes = ['text/x-llvm'] |
|
345 |
|
346 #: optional Comment or Whitespace |
|
347 string = r'"[^"]*?"' |
|
348 identifier = r'([-a-zA-Z$._][\w\-$.]*|' + string + ')' |
|
349 |
|
350 tokens = { |
|
351 'root': [ |
|
352 include('whitespace'), |
|
353 |
|
354 # Before keywords, because keywords are valid label names :(... |
|
355 (identifier + r'\s*:', Name.Label), |
|
356 |
|
357 include('keyword'), |
|
358 |
|
359 (r'%' + identifier, Name.Variable), |
|
360 (r'@' + identifier, Name.Variable.Global), |
|
361 (r'%\d+', Name.Variable.Anonymous), |
|
362 (r'@\d+', Name.Variable.Global), |
|
363 (r'#\d+', Name.Variable.Global), |
|
364 (r'!' + identifier, Name.Variable), |
|
365 (r'!\d+', Name.Variable.Anonymous), |
|
366 (r'c?' + string, String), |
|
367 |
|
368 (r'0[xX][a-fA-F0-9]+', Number), |
|
369 (r'-?\d+(?:[.]\d+)?(?:[eE][-+]?\d+(?:[.]\d+)?)?', Number), |
|
370 |
|
371 (r'[=<>{}\[\]()*.,!]|x\b', Punctuation) |
|
372 ], |
|
373 'whitespace': [ |
|
374 (r'(\n|\s)+', Text), |
|
375 (r';.*?\n', Comment) |
|
376 ], |
|
377 'keyword': [ |
|
378 # Regular keywords |
|
379 (words(( |
|
380 'begin', 'end', 'true', 'false', 'declare', 'define', 'global', |
|
381 'constant', 'private', 'linker_private', 'internal', |
|
382 'available_externally', 'linkonce', 'linkonce_odr', 'weak', |
|
383 'weak_odr', 'appending', 'dllimport', 'dllexport', 'common', |
|
384 'default', 'hidden', 'protected', 'extern_weak', 'external', |
|
385 'thread_local', 'zeroinitializer', 'undef', 'null', 'to', 'tail', |
|
386 'target', 'triple', 'datalayout', 'volatile', 'nuw', 'nsw', 'nnan', |
|
387 'ninf', 'nsz', 'arcp', 'fast', 'exact', 'inbounds', 'align', |
|
388 'addrspace', 'section', 'alias', 'module', 'asm', 'sideeffect', |
|
389 'gc', 'dbg', 'linker_private_weak', 'attributes', 'blockaddress', |
|
390 'initialexec', 'localdynamic', 'localexec', 'prefix', 'unnamed_addr', |
|
391 'ccc', 'fastcc', 'coldcc', 'x86_stdcallcc', 'x86_fastcallcc', |
|
392 'arm_apcscc', 'arm_aapcscc', 'arm_aapcs_vfpcc', 'ptx_device', |
|
393 'ptx_kernel', 'intel_ocl_bicc', 'msp430_intrcc', 'spir_func', |
|
394 'spir_kernel', 'x86_64_sysvcc', 'x86_64_win64cc', 'x86_thiscallcc', |
|
395 'cc', 'c', 'signext', 'zeroext', 'inreg', 'sret', 'nounwind', |
|
396 'noreturn', 'noalias', 'nocapture', 'byval', 'nest', 'readnone', |
|
397 'readonly', 'inlinehint', 'noinline', 'alwaysinline', 'optsize', 'ssp', |
|
398 'sspreq', 'noredzone', 'noimplicitfloat', 'naked', 'builtin', 'cold', |
|
399 'nobuiltin', 'noduplicate', 'nonlazybind', 'optnone', 'returns_twice', |
|
400 'sanitize_address', 'sanitize_memory', 'sanitize_thread', 'sspstrong', |
|
401 'uwtable', 'returned', 'type', 'opaque', 'eq', 'ne', 'slt', 'sgt', |
|
402 'sle', 'sge', 'ult', 'ugt', 'ule', 'uge', 'oeq', 'one', 'olt', 'ogt', |
|
403 'ole', 'oge', 'ord', 'uno', 'ueq', 'une', 'x', 'acq_rel', 'acquire', |
|
404 'alignstack', 'atomic', 'catch', 'cleanup', 'filter', 'inteldialect', |
|
405 'max', 'min', 'monotonic', 'nand', 'personality', 'release', 'seq_cst', |
|
406 'singlethread', 'umax', 'umin', 'unordered', 'xchg', 'add', 'fadd', |
|
407 'sub', 'fsub', 'mul', 'fmul', 'udiv', 'sdiv', 'fdiv', 'urem', 'srem', |
|
408 'frem', 'shl', 'lshr', 'ashr', 'and', 'or', 'xor', 'icmp', 'fcmp', |
|
409 'phi', 'call', 'trunc', 'zext', 'sext', 'fptrunc', 'fpext', 'uitofp', |
|
410 'sitofp', 'fptoui', 'fptosi', 'inttoptr', 'ptrtoint', 'bitcast', |
|
411 'addrspacecast', 'select', 'va_arg', 'ret', 'br', 'switch', 'invoke', |
|
412 'unwind', 'unreachable', 'indirectbr', 'landingpad', 'resume', |
|
413 'malloc', 'alloca', 'free', 'load', 'store', 'getelementptr', |
|
414 'extractelement', 'insertelement', 'shufflevector', 'getresult', |
|
415 'extractvalue', 'insertvalue', 'atomicrmw', 'cmpxchg', 'fence', |
|
416 'allocsize', 'amdgpu_cs', 'amdgpu_gs', 'amdgpu_kernel', 'amdgpu_ps', |
|
417 'amdgpu_vs', 'any', 'anyregcc', 'argmemonly', 'avr_intrcc', |
|
418 'avr_signalcc', 'caller', 'catchpad', 'catchret', 'catchswitch', |
|
419 'cleanuppad', 'cleanupret', 'comdat', 'convergent', 'cxx_fast_tlscc', |
|
420 'deplibs', 'dereferenceable', 'dereferenceable_or_null', 'distinct', |
|
421 'exactmatch', 'externally_initialized', 'from', 'ghccc', 'hhvm_ccc', |
|
422 'hhvmcc', 'ifunc', 'inaccessiblemem_or_argmemonly', 'inaccessiblememonly', |
|
423 'inalloca', 'jumptable', 'largest', 'local_unnamed_addr', 'minsize', |
|
424 'musttail', 'noduplicates', 'none', 'nonnull', 'norecurse', 'notail', |
|
425 'preserve_allcc', 'preserve_mostcc', 'prologue', 'safestack', 'samesize', |
|
426 'source_filename', 'swiftcc', 'swifterror', 'swiftself', 'webkit_jscc', |
|
427 'within', 'writeonly', 'x86_intrcc', 'x86_vectorcallcc'), |
|
428 suffix=r'\b'), Keyword), |
|
429 |
|
430 # Types |
|
431 (words(('void', 'half', 'float', 'double', 'x86_fp80', 'fp128', |
|
432 'ppc_fp128', 'label', 'metadata', 'token')), Keyword.Type), |
|
433 |
|
434 # Integer types |
|
435 (r'i[1-9]\d*', Keyword) |
|
436 ] |
|
437 } |
|
438 |
|
439 |
|
440 class NasmLexer(RegexLexer): |
|
441 """ |
|
442 For Nasm (Intel) assembly code. |
|
443 """ |
|
444 name = 'NASM' |
|
445 aliases = ['nasm'] |
|
446 filenames = ['*.asm', '*.ASM'] |
|
447 mimetypes = ['text/x-nasm'] |
|
448 |
|
449 identifier = r'[a-z$._?][\w$.?#@~]*' |
|
450 hexn = r'(?:0x[0-9a-f]+|$0[0-9a-f]*|[0-9]+[0-9a-f]*h)' |
|
451 octn = r'[0-7]+q' |
|
452 binn = r'[01]+b' |
|
453 decn = r'[0-9]+' |
|
454 floatn = decn + r'\.e?' + decn |
|
455 string = r'"(\\"|[^"\n])*"|' + r"'(\\'|[^'\n])*'|" + r"`(\\`|[^`\n])*`" |
|
456 declkw = r'(?:res|d)[bwdqt]|times' |
|
457 register = (r'r[0-9][0-5]?[bwd]|' |
|
458 r'[a-d][lh]|[er]?[a-d]x|[er]?[sb]p|[er]?[sd]i|[c-gs]s|st[0-7]|' |
|
459 r'mm[0-7]|cr[0-4]|dr[0-367]|tr[3-7]') |
|
460 wordop = r'seg|wrt|strict' |
|
461 type = r'byte|[dq]?word' |
|
462 directives = (r'BITS|USE16|USE32|SECTION|SEGMENT|ABSOLUTE|EXTERN|GLOBAL|' |
|
463 r'ORG|ALIGN|STRUC|ENDSTRUC|COMMON|CPU|GROUP|UPPERCASE|IMPORT|' |
|
464 r'EXPORT|LIBRARY|MODULE') |
|
465 |
|
466 flags = re.IGNORECASE | re.MULTILINE |
|
467 tokens = { |
|
468 'root': [ |
|
469 (r'^\s*%', Comment.Preproc, 'preproc'), |
|
470 include('whitespace'), |
|
471 (identifier + ':', Name.Label), |
|
472 (r'(%s)(\s+)(equ)' % identifier, |
|
473 bygroups(Name.Constant, Keyword.Declaration, Keyword.Declaration), |
|
474 'instruction-args'), |
|
475 (directives, Keyword, 'instruction-args'), |
|
476 (declkw, Keyword.Declaration, 'instruction-args'), |
|
477 (identifier, Name.Function, 'instruction-args'), |
|
478 (r'[\r\n]+', Text) |
|
479 ], |
|
480 'instruction-args': [ |
|
481 (string, String), |
|
482 (hexn, Number.Hex), |
|
483 (octn, Number.Oct), |
|
484 (binn, Number.Bin), |
|
485 (floatn, Number.Float), |
|
486 (decn, Number.Integer), |
|
487 include('punctuation'), |
|
488 (register, Name.Builtin), |
|
489 (identifier, Name.Variable), |
|
490 (r'[\r\n]+', Text, '#pop'), |
|
491 include('whitespace') |
|
492 ], |
|
493 'preproc': [ |
|
494 (r'[^;\n]+', Comment.Preproc), |
|
495 (r';.*?\n', Comment.Single, '#pop'), |
|
496 (r'\n', Comment.Preproc, '#pop'), |
|
497 ], |
|
498 'whitespace': [ |
|
499 (r'\n', Text), |
|
500 (r'[ \t]+', Text), |
|
501 (r';.*', Comment.Single) |
|
502 ], |
|
503 'punctuation': [ |
|
504 (r'[,():\[\]]+', Punctuation), |
|
505 (r'[&|^<>+*/%~-]+', Operator), |
|
506 (r'[$]+', Keyword.Constant), |
|
507 (wordop, Operator.Word), |
|
508 (type, Keyword.Type) |
|
509 ], |
|
510 } |
|
511 |
|
512 |
|
513 class NasmObjdumpLexer(ObjdumpLexer): |
|
514 """ |
|
515 For the output of 'objdump -d -M intel'. |
|
516 |
|
517 .. versionadded:: 2.0 |
|
518 """ |
|
519 name = 'objdump-nasm' |
|
520 aliases = ['objdump-nasm'] |
|
521 filenames = ['*.objdump-intel'] |
|
522 mimetypes = ['text/x-nasm-objdump'] |
|
523 |
|
524 tokens = _objdump_lexer_tokens(NasmLexer) |
|
525 |
|
526 |
|
527 class TasmLexer(RegexLexer): |
|
528 """ |
|
529 For Tasm (Turbo Assembler) assembly code. |
|
530 """ |
|
531 name = 'TASM' |
|
532 aliases = ['tasm'] |
|
533 filenames = ['*.asm', '*.ASM', '*.tasm'] |
|
534 mimetypes = ['text/x-tasm'] |
|
535 |
|
536 identifier = r'[@a-z$._?][\w$.?#@~]*' |
|
537 hexn = r'(?:0x[0-9a-f]+|$0[0-9a-f]*|[0-9]+[0-9a-f]*h)' |
|
538 octn = r'[0-7]+q' |
|
539 binn = r'[01]+b' |
|
540 decn = r'[0-9]+' |
|
541 floatn = decn + r'\.e?' + decn |
|
542 string = r'"(\\"|[^"\n])*"|' + r"'(\\'|[^'\n])*'|" + r"`(\\`|[^`\n])*`" |
|
543 declkw = r'(?:res|d)[bwdqt]|times' |
|
544 register = (r'r[0-9][0-5]?[bwd]|' |
|
545 r'[a-d][lh]|[er]?[a-d]x|[er]?[sb]p|[er]?[sd]i|[c-gs]s|st[0-7]|' |
|
546 r'mm[0-7]|cr[0-4]|dr[0-367]|tr[3-7]') |
|
547 wordop = r'seg|wrt|strict' |
|
548 type = r'byte|[dq]?word' |
|
549 directives = (r'BITS|USE16|USE32|SECTION|SEGMENT|ABSOLUTE|EXTERN|GLOBAL|' |
|
550 r'ORG|ALIGN|STRUC|ENDSTRUC|ENDS|COMMON|CPU|GROUP|UPPERCASE|INCLUDE|' |
|
551 r'EXPORT|LIBRARY|MODULE|PROC|ENDP|USES|ARG|DATASEG|UDATASEG|END|IDEAL|' |
|
552 r'P386|MODEL|ASSUME|CODESEG|SIZE') |
|
553 # T[A-Z][a-z] is more of a convention. Lexer should filter out STRUC definitions |
|
554 # and then 'add' them to datatype somehow. |
|
555 datatype = (r'db|dd|dw|T[A-Z][a-z]+') |
|
556 |
|
557 flags = re.IGNORECASE | re.MULTILINE |
|
558 tokens = { |
|
559 'root': [ |
|
560 (r'^\s*%', Comment.Preproc, 'preproc'), |
|
561 include('whitespace'), |
|
562 (identifier + ':', Name.Label), |
|
563 (directives, Keyword, 'instruction-args'), |
|
564 (r'(%s)(\s+)(%s)' % (identifier, datatype), |
|
565 bygroups(Name.Constant, Keyword.Declaration, Keyword.Declaration), |
|
566 'instruction-args'), |
|
567 (declkw, Keyword.Declaration, 'instruction-args'), |
|
568 (identifier, Name.Function, 'instruction-args'), |
|
569 (r'[\r\n]+', Text) |
|
570 ], |
|
571 'instruction-args': [ |
|
572 (string, String), |
|
573 (hexn, Number.Hex), |
|
574 (octn, Number.Oct), |
|
575 (binn, Number.Bin), |
|
576 (floatn, Number.Float), |
|
577 (decn, Number.Integer), |
|
578 include('punctuation'), |
|
579 (register, Name.Builtin), |
|
580 (identifier, Name.Variable), |
|
581 # Do not match newline when it's preceeded by a backslash |
|
582 (r'(\\\s*)(;.*)([\r\n])', bygroups(Text, Comment.Single, Text)), |
|
583 (r'[\r\n]+', Text, '#pop'), |
|
584 include('whitespace') |
|
585 ], |
|
586 'preproc': [ |
|
587 (r'[^;\n]+', Comment.Preproc), |
|
588 (r';.*?\n', Comment.Single, '#pop'), |
|
589 (r'\n', Comment.Preproc, '#pop'), |
|
590 ], |
|
591 'whitespace': [ |
|
592 (r'[\n\r]', Text), |
|
593 (r'\\[\n\r]', Text), |
|
594 (r'[ \t]+', Text), |
|
595 (r';.*', Comment.Single) |
|
596 ], |
|
597 'punctuation': [ |
|
598 (r'[,():\[\]]+', Punctuation), |
|
599 (r'[&|^<>+*=/%~-]+', Operator), |
|
600 (r'[$]+', Keyword.Constant), |
|
601 (wordop, Operator.Word), |
|
602 (type, Keyword.Type) |
|
603 ], |
|
604 } |
|
605 |
|
606 |
|
607 class Ca65Lexer(RegexLexer): |
|
608 """ |
|
609 For ca65 assembler sources. |
|
610 |
|
611 .. versionadded:: 1.6 |
|
612 """ |
|
613 name = 'ca65 assembler' |
|
614 aliases = ['ca65'] |
|
615 filenames = ['*.s'] |
|
616 |
|
617 flags = re.IGNORECASE |
|
618 |
|
619 tokens = { |
|
620 'root': [ |
|
621 (r';.*', Comment.Single), |
|
622 (r'\s+', Text), |
|
623 (r'[a-z_.@$][\w.@$]*:', Name.Label), |
|
624 (r'((ld|st)[axy]|(in|de)[cxy]|asl|lsr|ro[lr]|adc|sbc|cmp|cp[xy]' |
|
625 r'|cl[cvdi]|se[cdi]|jmp|jsr|bne|beq|bpl|bmi|bvc|bvs|bcc|bcs' |
|
626 r'|p[lh][ap]|rt[is]|brk|nop|ta[xy]|t[xy]a|txs|tsx|and|ora|eor' |
|
627 r'|bit)\b', Keyword), |
|
628 (r'\.\w+', Keyword.Pseudo), |
|
629 (r'[-+~*/^&|!<>=]', Operator), |
|
630 (r'"[^"\n]*.', String), |
|
631 (r"'[^'\n]*.", String.Char), |
|
632 (r'\$[0-9a-f]+|[0-9a-f]+h\b', Number.Hex), |
|
633 (r'\d+', Number.Integer), |
|
634 (r'%[01]+', Number.Bin), |
|
635 (r'[#,.:()=\[\]]', Punctuation), |
|
636 (r'[a-z_.@$][\w.@$]*', Name), |
|
637 ] |
|
638 } |
|
639 |
|
640 def analyse_text(self, text): |
|
641 # comments in GAS start with "#" |
|
642 if re.match(r'^\s*;', text, re.MULTILINE): |
|
643 return 0.9 |