3 pygments.lexers.asm |
3 pygments.lexers.asm |
4 ~~~~~~~~~~~~~~~~~~~ |
4 ~~~~~~~~~~~~~~~~~~~ |
5 |
5 |
6 Lexers for assembly languages. |
6 Lexers for assembly languages. |
7 |
7 |
8 :copyright: Copyright 2006-2013 by the Pygments team, see AUTHORS. |
8 :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS. |
9 :license: BSD, see LICENSE for details. |
9 :license: BSD, see LICENSE for details. |
10 """ |
10 """ |
11 |
11 |
12 from __future__ import unicode_literals |
|
13 |
|
14 import re |
12 import re |
15 |
13 |
16 from pygments.lexer import RegexLexer, include, bygroups, using, DelegatingLexer |
14 from pygments.lexer import RegexLexer, include, bygroups, using, DelegatingLexer |
17 from pygments.lexers.compiled import DLexer, CppLexer, CLexer |
15 from pygments.lexers.c_cpp import CppLexer, CLexer |
|
16 from pygments.lexers.d import DLexer |
18 from pygments.token import Text, Name, Number, String, Comment, Punctuation, \ |
17 from pygments.token import Text, Name, Number, String, Comment, Punctuation, \ |
19 Other, Keyword, Operator |
18 Other, Keyword, Operator |
20 |
19 |
21 __all__ = ['GasLexer', 'ObjdumpLexer','DObjdumpLexer', 'CppObjdumpLexer', |
20 __all__ = ['GasLexer', 'ObjdumpLexer', 'DObjdumpLexer', 'CppObjdumpLexer', |
22 'CObjdumpLexer', 'LlvmLexer', 'NasmLexer', 'Ca65Lexer'] |
21 'CObjdumpLexer', 'LlvmLexer', 'NasmLexer', 'NasmObjdumpLexer', |
|
22 'Ca65Lexer'] |
23 |
23 |
24 |
24 |
25 class GasLexer(RegexLexer): |
25 class GasLexer(RegexLexer): |
26 """ |
26 """ |
27 For Gas (AT&T) assembly code. |
27 For Gas (AT&T) assembly code. |
28 """ |
28 """ |
29 name = 'GAS' |
29 name = 'GAS' |
30 aliases = ['gas'] |
30 aliases = ['gas', 'asm'] |
31 filenames = ['*.s', '*.S'] |
31 filenames = ['*.s', '*.S'] |
32 mimetypes = ['text/x-gas'] |
32 mimetypes = ['text/x-gas'] |
33 |
33 |
34 #: optional Comment or Whitespace |
34 #: optional Comment or Whitespace |
35 string = r'"(\\"|[^"])*"' |
35 string = r'"(\\"|[^"])*"' |
36 char = r'[a-zA-Z$._0-9@-]' |
36 char = r'[\w$.@-]' |
37 identifier = r'(?:[a-zA-Z$_]' + char + '*|\.' + char + '+)' |
37 identifier = r'(?:[a-zA-Z$_]' + char + '*|\.' + char + '+)' |
38 number = r'(?:0[xX][a-zA-Z0-9]+|\d+)' |
38 number = r'(?:0[xX][a-zA-Z0-9]+|\d+)' |
39 |
39 |
40 tokens = { |
40 tokens = { |
41 'root': [ |
41 'root': [ |
96 return True |
96 return True |
97 elif re.match(r'^\.\w+', text, re.M): |
97 elif re.match(r'^\.\w+', text, re.M): |
98 return 0.1 |
98 return 0.1 |
99 |
99 |
100 |
100 |
101 class ObjdumpLexer(RegexLexer): |
101 def _objdump_lexer_tokens(asm_lexer): |
102 """ |
102 """ |
103 For the output of 'objdump -dr' |
103 Common objdump lexer tokens to wrap an ASM lexer. |
104 """ |
104 """ |
105 name = 'objdump' |
105 hex_re = r'[0-9A-Za-z]' |
106 aliases = ['objdump'] |
106 return { |
107 filenames = ['*.objdump'] |
|
108 mimetypes = ['text/x-objdump'] |
|
109 |
|
110 hex = r'[0-9A-Za-z]' |
|
111 |
|
112 tokens = { |
|
113 'root': [ |
107 'root': [ |
114 # File name & format: |
108 # File name & format: |
115 ('(.*?)(:)( +file format )(.*?)$', |
109 ('(.*?)(:)( +file format )(.*?)$', |
116 bygroups(Name.Label, Punctuation, Text, String)), |
110 bygroups(Name.Label, Punctuation, Text, String)), |
117 # Section header |
111 # Section header |
118 ('(Disassembly of section )(.*?)(:)$', |
112 ('(Disassembly of section )(.*?)(:)$', |
119 bygroups(Text, Name.Label, Punctuation)), |
113 bygroups(Text, Name.Label, Punctuation)), |
120 # Function labels |
114 # Function labels |
121 # (With offset) |
115 # (With offset) |
122 ('('+hex+'+)( )(<)(.*?)([-+])(0[xX][A-Za-z0-9]+)(>:)$', |
116 ('('+hex_re+'+)( )(<)(.*?)([-+])(0[xX][A-Za-z0-9]+)(>:)$', |
123 bygroups(Number.Hex, Text, Punctuation, Name.Function, |
117 bygroups(Number.Hex, Text, Punctuation, Name.Function, |
124 Punctuation, Number.Hex, Punctuation)), |
118 Punctuation, Number.Hex, Punctuation)), |
125 # (Without offset) |
119 # (Without offset) |
126 ('('+hex+'+)( )(<)(.*?)(>:)$', |
120 ('('+hex_re+'+)( )(<)(.*?)(>:)$', |
127 bygroups(Number.Hex, Text, Punctuation, Name.Function, |
121 bygroups(Number.Hex, Text, Punctuation, Name.Function, |
128 Punctuation)), |
122 Punctuation)), |
129 # Code line with disassembled instructions |
123 # Code line with disassembled instructions |
130 ('( *)('+hex+r'+:)(\t)((?:'+hex+hex+' )+)( *\t)([a-zA-Z].*?)$', |
124 ('( *)('+hex_re+r'+:)(\t)((?:'+hex_re+hex_re+' )+)( *\t)([a-zA-Z].*?)$', |
131 bygroups(Text, Name.Label, Text, Number.Hex, Text, |
125 bygroups(Text, Name.Label, Text, Number.Hex, Text, |
132 using(GasLexer))), |
126 using(asm_lexer))), |
133 # Code line with ascii |
127 # Code line with ascii |
134 ('( *)('+hex+r'+:)(\t)((?:'+hex+hex+' )+)( *)(.*?)$', |
128 ('( *)('+hex_re+r'+:)(\t)((?:'+hex_re+hex_re+' )+)( *)(.*?)$', |
135 bygroups(Text, Name.Label, Text, Number.Hex, Text, String)), |
129 bygroups(Text, Name.Label, Text, Number.Hex, Text, String)), |
136 # Continued code line, only raw opcodes without disassembled |
130 # Continued code line, only raw opcodes without disassembled |
137 # instruction |
131 # instruction |
138 ('( *)('+hex+r'+:)(\t)((?:'+hex+hex+' )+)$', |
132 ('( *)('+hex_re+r'+:)(\t)((?:'+hex_re+hex_re+' )+)$', |
139 bygroups(Text, Name.Label, Text, Number.Hex)), |
133 bygroups(Text, Name.Label, Text, Number.Hex)), |
140 # Skipped a few bytes |
134 # Skipped a few bytes |
141 (r'\t\.\.\.$', Text), |
135 (r'\t\.\.\.$', Text), |
142 # Relocation line |
136 # Relocation line |
143 # (With offset) |
137 # (With offset) |
144 (r'(\t\t\t)('+hex+r'+:)( )([^\t]+)(\t)(.*?)([-+])(0x' + hex + '+)$', |
138 (r'(\t\t\t)('+hex_re+r'+:)( )([^\t]+)(\t)(.*?)([-+])(0x'+hex_re+'+)$', |
145 bygroups(Text, Name.Label, Text, Name.Property, Text, |
139 bygroups(Text, Name.Label, Text, Name.Property, Text, |
146 Name.Constant, Punctuation, Number.Hex)), |
140 Name.Constant, Punctuation, Number.Hex)), |
147 # (Without offset) |
141 # (Without offset) |
148 (r'(\t\t\t)('+hex+r'+:)( )([^\t]+)(\t)(.*?)$', |
142 (r'(\t\t\t)('+hex_re+r'+:)( )([^\t]+)(\t)(.*?)$', |
149 bygroups(Text, Name.Label, Text, Name.Property, Text, |
143 bygroups(Text, Name.Label, Text, Name.Property, Text, |
150 Name.Constant)), |
144 Name.Constant)), |
151 (r'[^\n]+\n', Other) |
145 (r'[^\n]+\n', Other) |
152 ] |
146 ] |
153 } |
147 } |
154 |
148 |
155 |
149 |
|
150 class ObjdumpLexer(RegexLexer): |
|
151 """ |
|
152 For the output of 'objdump -dr' |
|
153 """ |
|
154 name = 'objdump' |
|
155 aliases = ['objdump'] |
|
156 filenames = ['*.objdump'] |
|
157 mimetypes = ['text/x-objdump'] |
|
158 |
|
159 tokens = _objdump_lexer_tokens(GasLexer) |
|
160 |
|
161 |
156 class DObjdumpLexer(DelegatingLexer): |
162 class DObjdumpLexer(DelegatingLexer): |
157 """ |
163 """ |
158 For the output of 'objdump -Sr on compiled D files' |
164 For the output of 'objdump -Sr on compiled D files' |
159 """ |
165 """ |
160 name = 'd-objdump' |
166 name = 'd-objdump' |
201 filenames = ['*.ll'] |
207 filenames = ['*.ll'] |
202 mimetypes = ['text/x-llvm'] |
208 mimetypes = ['text/x-llvm'] |
203 |
209 |
204 #: optional Comment or Whitespace |
210 #: optional Comment or Whitespace |
205 string = r'"[^"]*?"' |
211 string = r'"[^"]*?"' |
206 identifier = r'([-a-zA-Z$._][-a-zA-Z$._0-9]*|' + string + ')' |
212 identifier = r'([-a-zA-Z$._][\w\-$.]*|' + string + ')' |
207 |
213 |
208 tokens = { |
214 tokens = { |
209 'root': [ |
215 'root': [ |
210 include('whitespace'), |
216 include('whitespace'), |
211 |
217 |
212 # Before keywords, because keywords are valid label names :(... |
218 # Before keywords, because keywords are valid label names :(... |
213 (identifier + '\s*:', Name.Label), |
219 (identifier + '\s*:', Name.Label), |
214 |
220 |
215 include('keyword'), |
221 include('keyword'), |
216 |
222 |
217 (r'%' + identifier, Name.Variable),#Name.Identifier.Local), |
223 (r'%' + identifier, Name.Variable), |
218 (r'@' + identifier, Name.Variable.Global),#Name.Identifier.Global), |
224 (r'@' + identifier, Name.Variable.Global), |
219 (r'%\d+', Name.Variable.Anonymous),#Name.Identifier.Anonymous), |
225 (r'%\d+', Name.Variable.Anonymous), |
220 (r'@\d+', Name.Variable.Global),#Name.Identifier.Anonymous), |
226 (r'@\d+', Name.Variable.Global), |
|
227 (r'#\d+', Name.Variable.Global), |
221 (r'!' + identifier, Name.Variable), |
228 (r'!' + identifier, Name.Variable), |
222 (r'!\d+', Name.Variable.Anonymous), |
229 (r'!\d+', Name.Variable.Anonymous), |
223 (r'c?' + string, String), |
230 (r'c?' + string, String), |
224 |
231 |
225 (r'0[xX][a-fA-F0-9]+', Number), |
232 (r'0[xX][a-fA-F0-9]+', Number), |
242 r'|linkonce_odr|weak|weak_odr|appending|dllimport|dllexport' |
249 r'|linkonce_odr|weak|weak_odr|appending|dllimport|dllexport' |
243 r'|common|default|hidden|protected|extern_weak|external' |
250 r'|common|default|hidden|protected|extern_weak|external' |
244 r'|thread_local|zeroinitializer|undef|null|to|tail|target|triple' |
251 r'|thread_local|zeroinitializer|undef|null|to|tail|target|triple' |
245 r'|datalayout|volatile|nuw|nsw|nnan|ninf|nsz|arcp|fast|exact|inbounds' |
252 r'|datalayout|volatile|nuw|nsw|nnan|ninf|nsz|arcp|fast|exact|inbounds' |
246 r'|align|addrspace|section|alias|module|asm|sideeffect|gc|dbg' |
253 r'|align|addrspace|section|alias|module|asm|sideeffect|gc|dbg' |
|
254 r'|linker_private_weak' |
|
255 r'|attributes|blockaddress|initialexec|localdynamic|localexec' |
|
256 r'|prefix|unnamed_addr' |
247 |
257 |
248 r'|ccc|fastcc|coldcc|x86_stdcallcc|x86_fastcallcc|arm_apcscc' |
258 r'|ccc|fastcc|coldcc|x86_stdcallcc|x86_fastcallcc|arm_apcscc' |
249 r'|arm_aapcscc|arm_aapcs_vfpcc' |
259 r'|arm_aapcscc|arm_aapcs_vfpcc|ptx_device|ptx_kernel' |
|
260 r'|intel_ocl_bicc|msp430_intrcc|spir_func|spir_kernel' |
|
261 r'|x86_64_sysvcc|x86_64_win64cc|x86_thiscallcc' |
250 |
262 |
251 r'|cc|c' |
263 r'|cc|c' |
252 |
264 |
253 r'|signext|zeroext|inreg|sret|nounwind|noreturn|noalias|nocapture' |
265 r'|signext|zeroext|inreg|sret|nounwind|noreturn|noalias|nocapture' |
254 r'|byval|nest|readnone|readonly' |
266 r'|byval|nest|readnone|readonly' |
255 |
|
256 r'|inlinehint|noinline|alwaysinline|optsize|ssp|sspreq|noredzone' |
267 r'|inlinehint|noinline|alwaysinline|optsize|ssp|sspreq|noredzone' |
257 r'|noimplicitfloat|naked' |
268 r'|noimplicitfloat|naked' |
|
269 r'|builtin|cold|nobuiltin|noduplicate|nonlazybind|optnone' |
|
270 r'|returns_twice|sanitize_address|sanitize_memory|sanitize_thread' |
|
271 r'|sspstrong|uwtable|returned' |
258 |
272 |
259 r'|type|opaque' |
273 r'|type|opaque' |
260 |
274 |
261 r'|eq|ne|slt|sgt|sle' |
275 r'|eq|ne|slt|sgt|sle' |
262 r'|sge|ult|ugt|ule|uge' |
276 r'|sge|ult|ugt|ule|uge' |
263 r'|oeq|one|olt|ogt|ole' |
277 r'|oeq|one|olt|ogt|ole' |
264 r'|oge|ord|uno|ueq|une' |
278 r'|oge|ord|uno|ueq|une' |
265 r'|x' |
279 r'|x' |
|
280 r'|acq_rel|acquire|alignstack|atomic|catch|cleanup|filter' |
|
281 r'|inteldialect|max|min|monotonic|nand|personality|release' |
|
282 r'|seq_cst|singlethread|umax|umin|unordered|xchg' |
266 |
283 |
267 # instructions |
284 # instructions |
268 r'|add|fadd|sub|fsub|mul|fmul|udiv|sdiv|fdiv|urem|srem|frem|shl' |
285 r'|add|fadd|sub|fsub|mul|fmul|udiv|sdiv|fdiv|urem|srem|frem|shl' |
269 r'|lshr|ashr|and|or|xor|icmp|fcmp' |
286 r'|lshr|ashr|and|or|xor|icmp|fcmp' |
270 |
287 |
271 r'|phi|call|trunc|zext|sext|fptrunc|fpext|uitofp|sitofp|fptoui' |
288 r'|phi|call|trunc|zext|sext|fptrunc|fpext|uitofp|sitofp|fptoui' |
272 r'fptosi|inttoptr|ptrtoint|bitcast|select|va_arg|ret|br|switch' |
289 r'|fptosi|inttoptr|ptrtoint|bitcast|select|va_arg|ret|br|switch' |
273 r'|invoke|unwind|unreachable' |
290 r'|invoke|unwind|unreachable' |
|
291 r'|indirectbr|landingpad|resume' |
274 |
292 |
275 r'|malloc|alloca|free|load|store|getelementptr' |
293 r'|malloc|alloca|free|load|store|getelementptr' |
276 |
294 |
277 r'|extractelement|insertelement|shufflevector|getresult' |
295 r'|extractelement|insertelement|shufflevector|getresult' |
278 r'|extractvalue|insertvalue' |
296 r'|extractvalue|insertvalue' |
279 |
297 |
|
298 r'|atomicrmw|cmpxchg|fence' |
|
299 |
280 r')\b', Keyword), |
300 r')\b', Keyword), |
281 |
301 |
282 # Types |
302 # Types |
283 (r'void|float|double|x86_fp80|fp128|ppc_fp128|label|metadata', |
303 (r'void|half|float|double|x86_fp80|fp128|ppc_fp128|label|metadata', |
284 Keyword.Type), |
304 Keyword.Type), |
285 |
305 |
286 # Integer types |
306 # Integer types |
287 (r'i[1-9]\d*', Keyword) |
307 (r'i[1-9]\d*', Keyword) |
288 ] |
308 ] |
316 r'EXPORT|LIBRARY|MODULE') |
336 r'EXPORT|LIBRARY|MODULE') |
317 |
337 |
318 flags = re.IGNORECASE | re.MULTILINE |
338 flags = re.IGNORECASE | re.MULTILINE |
319 tokens = { |
339 tokens = { |
320 'root': [ |
340 'root': [ |
|
341 (r'^\s*%', Comment.Preproc, 'preproc'), |
321 include('whitespace'), |
342 include('whitespace'), |
322 (r'^\s*%', Comment.Preproc, 'preproc'), |
|
323 (identifier + ':', Name.Label), |
343 (identifier + ':', Name.Label), |
324 (r'(%s)(\s+)(equ)' % identifier, |
344 (r'(%s)(\s+)(equ)' % identifier, |
325 bygroups(Name.Constant, Keyword.Declaration, Keyword.Declaration), |
345 bygroups(Name.Constant, Keyword.Declaration, Keyword.Declaration), |
326 'instruction-args'), |
346 'instruction-args'), |
327 (directives, Keyword, 'instruction-args'), |
347 (directives, Keyword, 'instruction-args'), |
381 (r'[a-z_.@$][\w.@$]*:', Name.Label), |
415 (r'[a-z_.@$][\w.@$]*:', Name.Label), |
382 (r'((ld|st)[axy]|(in|de)[cxy]|asl|lsr|ro[lr]|adc|sbc|cmp|cp[xy]' |
416 (r'((ld|st)[axy]|(in|de)[cxy]|asl|lsr|ro[lr]|adc|sbc|cmp|cp[xy]' |
383 r'|cl[cvdi]|se[cdi]|jmp|jsr|bne|beq|bpl|bmi|bvc|bvs|bcc|bcs' |
417 r'|cl[cvdi]|se[cdi]|jmp|jsr|bne|beq|bpl|bmi|bvc|bvs|bcc|bcs' |
384 r'|p[lh][ap]|rt[is]|brk|nop|ta[xy]|t[xy]a|txs|tsx|and|ora|eor' |
418 r'|p[lh][ap]|rt[is]|brk|nop|ta[xy]|t[xy]a|txs|tsx|and|ora|eor' |
385 r'|bit)\b', Keyword), |
419 r'|bit)\b', Keyword), |
386 (r'\.[a-z0-9_]+', Keyword.Pseudo), |
420 (r'\.\w+', Keyword.Pseudo), |
387 (r'[-+~*/^&|!<>=]', Operator), |
421 (r'[-+~*/^&|!<>=]', Operator), |
388 (r'"[^"\n]*.', String), |
422 (r'"[^"\n]*.', String), |
389 (r"'[^'\n]*.", String.Char), |
423 (r"'[^'\n]*.", String.Char), |
390 (r'\$[0-9a-f]+|[0-9a-f]+h\b', Number.Hex), |
424 (r'\$[0-9a-f]+|[0-9a-f]+h\b', Number.Hex), |
391 (r'\d+|%[01]+', Number.Integer), |
425 (r'\d+', Number.Integer), |
392 (r'[#,.:()=]', Punctuation), |
426 (r'%[01]+', Number.Bin), |
|
427 (r'[#,.:()=\[\]]', Punctuation), |
393 (r'[a-z_.@$][\w.@$]*', Name), |
428 (r'[a-z_.@$][\w.@$]*', Name), |
394 ] |
429 ] |
395 } |
430 } |
396 |
431 |
397 def analyse_text(self, text): |
432 def analyse_text(self, text): |