ThirdParty/Pygments/pygments/lexers/asm.py

changeset 4172
4f20dba37ab6
parent 3145
a9de05d4a22f
child 4697
c2e9bf425554
equal deleted inserted replaced
4170:8bc578136279 4172:4f20dba37ab6
3 pygments.lexers.asm 3 pygments.lexers.asm
4 ~~~~~~~~~~~~~~~~~~~ 4 ~~~~~~~~~~~~~~~~~~~
5 5
6 Lexers for assembly languages. 6 Lexers for assembly languages.
7 7
8 :copyright: Copyright 2006-2013 by the Pygments team, see AUTHORS. 8 :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS.
9 :license: BSD, see LICENSE for details. 9 :license: BSD, see LICENSE for details.
10 """ 10 """
11 11
12 from __future__ import unicode_literals
13
14 import re 12 import re
15 13
16 from pygments.lexer import RegexLexer, include, bygroups, using, DelegatingLexer 14 from pygments.lexer import RegexLexer, include, bygroups, using, DelegatingLexer
17 from pygments.lexers.compiled import DLexer, CppLexer, CLexer 15 from pygments.lexers.c_cpp import CppLexer, CLexer
16 from pygments.lexers.d import DLexer
18 from pygments.token import Text, Name, Number, String, Comment, Punctuation, \ 17 from pygments.token import Text, Name, Number, String, Comment, Punctuation, \
19 Other, Keyword, Operator 18 Other, Keyword, Operator
20 19
21 __all__ = ['GasLexer', 'ObjdumpLexer','DObjdumpLexer', 'CppObjdumpLexer', 20 __all__ = ['GasLexer', 'ObjdumpLexer', 'DObjdumpLexer', 'CppObjdumpLexer',
22 'CObjdumpLexer', 'LlvmLexer', 'NasmLexer', 'Ca65Lexer'] 21 'CObjdumpLexer', 'LlvmLexer', 'NasmLexer', 'NasmObjdumpLexer',
22 'Ca65Lexer']
23 23
24 24
25 class GasLexer(RegexLexer): 25 class GasLexer(RegexLexer):
26 """ 26 """
27 For Gas (AT&T) assembly code. 27 For Gas (AT&T) assembly code.
28 """ 28 """
29 name = 'GAS' 29 name = 'GAS'
30 aliases = ['gas'] 30 aliases = ['gas', 'asm']
31 filenames = ['*.s', '*.S'] 31 filenames = ['*.s', '*.S']
32 mimetypes = ['text/x-gas'] 32 mimetypes = ['text/x-gas']
33 33
34 #: optional Comment or Whitespace 34 #: optional Comment or Whitespace
35 string = r'"(\\"|[^"])*"' 35 string = r'"(\\"|[^"])*"'
36 char = r'[a-zA-Z$._0-9@-]' 36 char = r'[\w$.@-]'
37 identifier = r'(?:[a-zA-Z$_]' + char + '*|\.' + char + '+)' 37 identifier = r'(?:[a-zA-Z$_]' + char + '*|\.' + char + '+)'
38 number = r'(?:0[xX][a-zA-Z0-9]+|\d+)' 38 number = r'(?:0[xX][a-zA-Z0-9]+|\d+)'
39 39
40 tokens = { 40 tokens = {
41 'root': [ 41 'root': [
96 return True 96 return True
97 elif re.match(r'^\.\w+', text, re.M): 97 elif re.match(r'^\.\w+', text, re.M):
98 return 0.1 98 return 0.1
99 99
100 100
101 class ObjdumpLexer(RegexLexer): 101 def _objdump_lexer_tokens(asm_lexer):
102 """ 102 """
103 For the output of 'objdump -dr' 103 Common objdump lexer tokens to wrap an ASM lexer.
104 """ 104 """
105 name = 'objdump' 105 hex_re = r'[0-9A-Za-z]'
106 aliases = ['objdump'] 106 return {
107 filenames = ['*.objdump']
108 mimetypes = ['text/x-objdump']
109
110 hex = r'[0-9A-Za-z]'
111
112 tokens = {
113 'root': [ 107 'root': [
114 # File name & format: 108 # File name & format:
115 ('(.*?)(:)( +file format )(.*?)$', 109 ('(.*?)(:)( +file format )(.*?)$',
116 bygroups(Name.Label, Punctuation, Text, String)), 110 bygroups(Name.Label, Punctuation, Text, String)),
117 # Section header 111 # Section header
118 ('(Disassembly of section )(.*?)(:)$', 112 ('(Disassembly of section )(.*?)(:)$',
119 bygroups(Text, Name.Label, Punctuation)), 113 bygroups(Text, Name.Label, Punctuation)),
120 # Function labels 114 # Function labels
121 # (With offset) 115 # (With offset)
122 ('('+hex+'+)( )(<)(.*?)([-+])(0[xX][A-Za-z0-9]+)(>:)$', 116 ('('+hex_re+'+)( )(<)(.*?)([-+])(0[xX][A-Za-z0-9]+)(>:)$',
123 bygroups(Number.Hex, Text, Punctuation, Name.Function, 117 bygroups(Number.Hex, Text, Punctuation, Name.Function,
124 Punctuation, Number.Hex, Punctuation)), 118 Punctuation, Number.Hex, Punctuation)),
125 # (Without offset) 119 # (Without offset)
126 ('('+hex+'+)( )(<)(.*?)(>:)$', 120 ('('+hex_re+'+)( )(<)(.*?)(>:)$',
127 bygroups(Number.Hex, Text, Punctuation, Name.Function, 121 bygroups(Number.Hex, Text, Punctuation, Name.Function,
128 Punctuation)), 122 Punctuation)),
129 # Code line with disassembled instructions 123 # Code line with disassembled instructions
130 ('( *)('+hex+r'+:)(\t)((?:'+hex+hex+' )+)( *\t)([a-zA-Z].*?)$', 124 ('( *)('+hex_re+r'+:)(\t)((?:'+hex_re+hex_re+' )+)( *\t)([a-zA-Z].*?)$',
131 bygroups(Text, Name.Label, Text, Number.Hex, Text, 125 bygroups(Text, Name.Label, Text, Number.Hex, Text,
132 using(GasLexer))), 126 using(asm_lexer))),
133 # Code line with ascii 127 # Code line with ascii
134 ('( *)('+hex+r'+:)(\t)((?:'+hex+hex+' )+)( *)(.*?)$', 128 ('( *)('+hex_re+r'+:)(\t)((?:'+hex_re+hex_re+' )+)( *)(.*?)$',
135 bygroups(Text, Name.Label, Text, Number.Hex, Text, String)), 129 bygroups(Text, Name.Label, Text, Number.Hex, Text, String)),
136 # Continued code line, only raw opcodes without disassembled 130 # Continued code line, only raw opcodes without disassembled
137 # instruction 131 # instruction
138 ('( *)('+hex+r'+:)(\t)((?:'+hex+hex+' )+)$', 132 ('( *)('+hex_re+r'+:)(\t)((?:'+hex_re+hex_re+' )+)$',
139 bygroups(Text, Name.Label, Text, Number.Hex)), 133 bygroups(Text, Name.Label, Text, Number.Hex)),
140 # Skipped a few bytes 134 # Skipped a few bytes
141 (r'\t\.\.\.$', Text), 135 (r'\t\.\.\.$', Text),
142 # Relocation line 136 # Relocation line
143 # (With offset) 137 # (With offset)
144 (r'(\t\t\t)('+hex+r'+:)( )([^\t]+)(\t)(.*?)([-+])(0x' + hex + '+)$', 138 (r'(\t\t\t)('+hex_re+r'+:)( )([^\t]+)(\t)(.*?)([-+])(0x'+hex_re+'+)$',
145 bygroups(Text, Name.Label, Text, Name.Property, Text, 139 bygroups(Text, Name.Label, Text, Name.Property, Text,
146 Name.Constant, Punctuation, Number.Hex)), 140 Name.Constant, Punctuation, Number.Hex)),
147 # (Without offset) 141 # (Without offset)
148 (r'(\t\t\t)('+hex+r'+:)( )([^\t]+)(\t)(.*?)$', 142 (r'(\t\t\t)('+hex_re+r'+:)( )([^\t]+)(\t)(.*?)$',
149 bygroups(Text, Name.Label, Text, Name.Property, Text, 143 bygroups(Text, Name.Label, Text, Name.Property, Text,
150 Name.Constant)), 144 Name.Constant)),
151 (r'[^\n]+\n', Other) 145 (r'[^\n]+\n', Other)
152 ] 146 ]
153 } 147 }
154 148
155 149
150 class ObjdumpLexer(RegexLexer):
151 """
152 For the output of 'objdump -dr'
153 """
154 name = 'objdump'
155 aliases = ['objdump']
156 filenames = ['*.objdump']
157 mimetypes = ['text/x-objdump']
158
159 tokens = _objdump_lexer_tokens(GasLexer)
160
161
156 class DObjdumpLexer(DelegatingLexer): 162 class DObjdumpLexer(DelegatingLexer):
157 """ 163 """
158 For the output of 'objdump -Sr on compiled D files' 164 For the output of 'objdump -Sr on compiled D files'
159 """ 165 """
160 name = 'd-objdump' 166 name = 'd-objdump'
201 filenames = ['*.ll'] 207 filenames = ['*.ll']
202 mimetypes = ['text/x-llvm'] 208 mimetypes = ['text/x-llvm']
203 209
204 #: optional Comment or Whitespace 210 #: optional Comment or Whitespace
205 string = r'"[^"]*?"' 211 string = r'"[^"]*?"'
206 identifier = r'([-a-zA-Z$._][-a-zA-Z$._0-9]*|' + string + ')' 212 identifier = r'([-a-zA-Z$._][\w\-$.]*|' + string + ')'
207 213
208 tokens = { 214 tokens = {
209 'root': [ 215 'root': [
210 include('whitespace'), 216 include('whitespace'),
211 217
212 # Before keywords, because keywords are valid label names :(... 218 # Before keywords, because keywords are valid label names :(...
213 (identifier + '\s*:', Name.Label), 219 (identifier + '\s*:', Name.Label),
214 220
215 include('keyword'), 221 include('keyword'),
216 222
217 (r'%' + identifier, Name.Variable),#Name.Identifier.Local), 223 (r'%' + identifier, Name.Variable),
218 (r'@' + identifier, Name.Variable.Global),#Name.Identifier.Global), 224 (r'@' + identifier, Name.Variable.Global),
219 (r'%\d+', Name.Variable.Anonymous),#Name.Identifier.Anonymous), 225 (r'%\d+', Name.Variable.Anonymous),
220 (r'@\d+', Name.Variable.Global),#Name.Identifier.Anonymous), 226 (r'@\d+', Name.Variable.Global),
227 (r'#\d+', Name.Variable.Global),
221 (r'!' + identifier, Name.Variable), 228 (r'!' + identifier, Name.Variable),
222 (r'!\d+', Name.Variable.Anonymous), 229 (r'!\d+', Name.Variable.Anonymous),
223 (r'c?' + string, String), 230 (r'c?' + string, String),
224 231
225 (r'0[xX][a-fA-F0-9]+', Number), 232 (r'0[xX][a-fA-F0-9]+', Number),
242 r'|linkonce_odr|weak|weak_odr|appending|dllimport|dllexport' 249 r'|linkonce_odr|weak|weak_odr|appending|dllimport|dllexport'
243 r'|common|default|hidden|protected|extern_weak|external' 250 r'|common|default|hidden|protected|extern_weak|external'
244 r'|thread_local|zeroinitializer|undef|null|to|tail|target|triple' 251 r'|thread_local|zeroinitializer|undef|null|to|tail|target|triple'
245 r'|datalayout|volatile|nuw|nsw|nnan|ninf|nsz|arcp|fast|exact|inbounds' 252 r'|datalayout|volatile|nuw|nsw|nnan|ninf|nsz|arcp|fast|exact|inbounds'
246 r'|align|addrspace|section|alias|module|asm|sideeffect|gc|dbg' 253 r'|align|addrspace|section|alias|module|asm|sideeffect|gc|dbg'
254 r'|linker_private_weak'
255 r'|attributes|blockaddress|initialexec|localdynamic|localexec'
256 r'|prefix|unnamed_addr'
247 257
248 r'|ccc|fastcc|coldcc|x86_stdcallcc|x86_fastcallcc|arm_apcscc' 258 r'|ccc|fastcc|coldcc|x86_stdcallcc|x86_fastcallcc|arm_apcscc'
249 r'|arm_aapcscc|arm_aapcs_vfpcc' 259 r'|arm_aapcscc|arm_aapcs_vfpcc|ptx_device|ptx_kernel'
260 r'|intel_ocl_bicc|msp430_intrcc|spir_func|spir_kernel'
261 r'|x86_64_sysvcc|x86_64_win64cc|x86_thiscallcc'
250 262
251 r'|cc|c' 263 r'|cc|c'
252 264
253 r'|signext|zeroext|inreg|sret|nounwind|noreturn|noalias|nocapture' 265 r'|signext|zeroext|inreg|sret|nounwind|noreturn|noalias|nocapture'
254 r'|byval|nest|readnone|readonly' 266 r'|byval|nest|readnone|readonly'
255
256 r'|inlinehint|noinline|alwaysinline|optsize|ssp|sspreq|noredzone' 267 r'|inlinehint|noinline|alwaysinline|optsize|ssp|sspreq|noredzone'
257 r'|noimplicitfloat|naked' 268 r'|noimplicitfloat|naked'
269 r'|builtin|cold|nobuiltin|noduplicate|nonlazybind|optnone'
270 r'|returns_twice|sanitize_address|sanitize_memory|sanitize_thread'
271 r'|sspstrong|uwtable|returned'
258 272
259 r'|type|opaque' 273 r'|type|opaque'
260 274
261 r'|eq|ne|slt|sgt|sle' 275 r'|eq|ne|slt|sgt|sle'
262 r'|sge|ult|ugt|ule|uge' 276 r'|sge|ult|ugt|ule|uge'
263 r'|oeq|one|olt|ogt|ole' 277 r'|oeq|one|olt|ogt|ole'
264 r'|oge|ord|uno|ueq|une' 278 r'|oge|ord|uno|ueq|une'
265 r'|x' 279 r'|x'
280 r'|acq_rel|acquire|alignstack|atomic|catch|cleanup|filter'
281 r'|inteldialect|max|min|monotonic|nand|personality|release'
282 r'|seq_cst|singlethread|umax|umin|unordered|xchg'
266 283
267 # instructions 284 # instructions
268 r'|add|fadd|sub|fsub|mul|fmul|udiv|sdiv|fdiv|urem|srem|frem|shl' 285 r'|add|fadd|sub|fsub|mul|fmul|udiv|sdiv|fdiv|urem|srem|frem|shl'
269 r'|lshr|ashr|and|or|xor|icmp|fcmp' 286 r'|lshr|ashr|and|or|xor|icmp|fcmp'
270 287
271 r'|phi|call|trunc|zext|sext|fptrunc|fpext|uitofp|sitofp|fptoui' 288 r'|phi|call|trunc|zext|sext|fptrunc|fpext|uitofp|sitofp|fptoui'
272 r'fptosi|inttoptr|ptrtoint|bitcast|select|va_arg|ret|br|switch' 289 r'|fptosi|inttoptr|ptrtoint|bitcast|select|va_arg|ret|br|switch'
273 r'|invoke|unwind|unreachable' 290 r'|invoke|unwind|unreachable'
291 r'|indirectbr|landingpad|resume'
274 292
275 r'|malloc|alloca|free|load|store|getelementptr' 293 r'|malloc|alloca|free|load|store|getelementptr'
276 294
277 r'|extractelement|insertelement|shufflevector|getresult' 295 r'|extractelement|insertelement|shufflevector|getresult'
278 r'|extractvalue|insertvalue' 296 r'|extractvalue|insertvalue'
279 297
298 r'|atomicrmw|cmpxchg|fence'
299
280 r')\b', Keyword), 300 r')\b', Keyword),
281 301
282 # Types 302 # Types
283 (r'void|float|double|x86_fp80|fp128|ppc_fp128|label|metadata', 303 (r'void|half|float|double|x86_fp80|fp128|ppc_fp128|label|metadata',
284 Keyword.Type), 304 Keyword.Type),
285 305
286 # Integer types 306 # Integer types
287 (r'i[1-9]\d*', Keyword) 307 (r'i[1-9]\d*', Keyword)
288 ] 308 ]
296 name = 'NASM' 316 name = 'NASM'
297 aliases = ['nasm'] 317 aliases = ['nasm']
298 filenames = ['*.asm', '*.ASM'] 318 filenames = ['*.asm', '*.ASM']
299 mimetypes = ['text/x-nasm'] 319 mimetypes = ['text/x-nasm']
300 320
301 identifier = r'[a-zA-Z$._?][a-zA-Z0-9$._?#@~]*' 321 identifier = r'[a-z$._?][\w$.?#@~]*'
302 hexn = r'(?:0[xX][0-9a-fA-F]+|$0[0-9a-fA-F]*|[0-9]+[0-9a-fA-F]*h)' 322 hexn = r'(?:0x[0-9a-f]+|$0[0-9a-f]*|[0-9]+[0-9a-f]*h)'
303 octn = r'[0-7]+q' 323 octn = r'[0-7]+q'
304 binn = r'[01]+b' 324 binn = r'[01]+b'
305 decn = r'[0-9]+' 325 decn = r'[0-9]+'
306 floatn = decn + r'\.e?' + decn 326 floatn = decn + r'\.e?' + decn
307 string = r'"(\\"|[^"\n])*"|' + r"'(\\'|[^'\n])*'|" + r"`(\\`|[^`\n])*`" 327 string = r'"(\\"|[^"\n])*"|' + r"'(\\'|[^'\n])*'|" + r"`(\\`|[^`\n])*`"
316 r'EXPORT|LIBRARY|MODULE') 336 r'EXPORT|LIBRARY|MODULE')
317 337
318 flags = re.IGNORECASE | re.MULTILINE 338 flags = re.IGNORECASE | re.MULTILINE
319 tokens = { 339 tokens = {
320 'root': [ 340 'root': [
341 (r'^\s*%', Comment.Preproc, 'preproc'),
321 include('whitespace'), 342 include('whitespace'),
322 (r'^\s*%', Comment.Preproc, 'preproc'),
323 (identifier + ':', Name.Label), 343 (identifier + ':', Name.Label),
324 (r'(%s)(\s+)(equ)' % identifier, 344 (r'(%s)(\s+)(equ)' % identifier,
325 bygroups(Name.Constant, Keyword.Declaration, Keyword.Declaration), 345 bygroups(Name.Constant, Keyword.Declaration, Keyword.Declaration),
326 'instruction-args'), 346 'instruction-args'),
327 (directives, Keyword, 'instruction-args'), 347 (directives, Keyword, 'instruction-args'),
331 ], 351 ],
332 'instruction-args': [ 352 'instruction-args': [
333 (string, String), 353 (string, String),
334 (hexn, Number.Hex), 354 (hexn, Number.Hex),
335 (octn, Number.Oct), 355 (octn, Number.Oct),
336 (binn, Number), 356 (binn, Number.Bin),
337 (floatn, Number.Float), 357 (floatn, Number.Float),
338 (decn, Number.Integer), 358 (decn, Number.Integer),
339 include('punctuation'), 359 include('punctuation'),
340 (register, Name.Builtin), 360 (register, Name.Builtin),
341 (identifier, Name.Variable), 361 (identifier, Name.Variable),
360 (type, Keyword.Type) 380 (type, Keyword.Type)
361 ], 381 ],
362 } 382 }
363 383
364 384
385 class NasmObjdumpLexer(ObjdumpLexer):
386 """
387 For the output of 'objdump -d -M intel'.
388
389 .. versionadded:: 2.0
390 """
391 name = 'objdump-nasm'
392 aliases = ['objdump-nasm']
393 filenames = ['*.objdump-intel']
394 mimetypes = ['text/x-nasm-objdump']
395
396 tokens = _objdump_lexer_tokens(NasmLexer)
397
398
365 class Ca65Lexer(RegexLexer): 399 class Ca65Lexer(RegexLexer):
366 """ 400 """
367 For ca65 assembler sources. 401 For ca65 assembler sources.
368 402
369 *New in Pygments 1.6.* 403 .. versionadded:: 1.6
370 """ 404 """
371 name = 'ca65' 405 name = 'ca65 assembler'
372 aliases = ['ca65'] 406 aliases = ['ca65']
373 filenames = ['*.s'] 407 filenames = ['*.s']
374 408
375 flags = re.IGNORECASE 409 flags = re.IGNORECASE
376 410
381 (r'[a-z_.@$][\w.@$]*:', Name.Label), 415 (r'[a-z_.@$][\w.@$]*:', Name.Label),
382 (r'((ld|st)[axy]|(in|de)[cxy]|asl|lsr|ro[lr]|adc|sbc|cmp|cp[xy]' 416 (r'((ld|st)[axy]|(in|de)[cxy]|asl|lsr|ro[lr]|adc|sbc|cmp|cp[xy]'
383 r'|cl[cvdi]|se[cdi]|jmp|jsr|bne|beq|bpl|bmi|bvc|bvs|bcc|bcs' 417 r'|cl[cvdi]|se[cdi]|jmp|jsr|bne|beq|bpl|bmi|bvc|bvs|bcc|bcs'
384 r'|p[lh][ap]|rt[is]|brk|nop|ta[xy]|t[xy]a|txs|tsx|and|ora|eor' 418 r'|p[lh][ap]|rt[is]|brk|nop|ta[xy]|t[xy]a|txs|tsx|and|ora|eor'
385 r'|bit)\b', Keyword), 419 r'|bit)\b', Keyword),
386 (r'\.[a-z0-9_]+', Keyword.Pseudo), 420 (r'\.\w+', Keyword.Pseudo),
387 (r'[-+~*/^&|!<>=]', Operator), 421 (r'[-+~*/^&|!<>=]', Operator),
388 (r'"[^"\n]*.', String), 422 (r'"[^"\n]*.', String),
389 (r"'[^'\n]*.", String.Char), 423 (r"'[^'\n]*.", String.Char),
390 (r'\$[0-9a-f]+|[0-9a-f]+h\b', Number.Hex), 424 (r'\$[0-9a-f]+|[0-9a-f]+h\b', Number.Hex),
391 (r'\d+|%[01]+', Number.Integer), 425 (r'\d+', Number.Integer),
392 (r'[#,.:()=]', Punctuation), 426 (r'%[01]+', Number.Bin),
427 (r'[#,.:()=\[\]]', Punctuation),
393 (r'[a-z_.@$][\w.@$]*', Name), 428 (r'[a-z_.@$][\w.@$]*', Name),
394 ] 429 ]
395 } 430 }
396 431
397 def analyse_text(self, text): 432 def analyse_text(self, text):

eric ide

mercurial