ThirdParty/Pygments/pygments/lexers/asm.py

changeset 0
de9c2efb9d02
child 684
2f29a0b6e1c7
equal deleted inserted replaced
-1:000000000000 0:de9c2efb9d02
1 # -*- coding: utf-8 -*-
2 """
3 pygments.lexers.asm
4 ~~~~~~~~~~~~~~~~~~~
5
6 Lexers for assembly languages.
7
8 :copyright: Copyright 2006-2009 by the Pygments team, see AUTHORS.
9 :license: BSD, see LICENSE for details.
10 """
11
12 import re
13 try:
14 set
15 except NameError:
16 from sets import Set as set
17
18 from pygments.lexer import RegexLexer, include, bygroups, using, DelegatingLexer
19 from pygments.lexers.compiled import DLexer, CppLexer, CLexer
20 from pygments.token import *
21
22 __all__ = ['GasLexer', 'ObjdumpLexer','DObjdumpLexer', 'CppObjdumpLexer',
23 'CObjdumpLexer', 'LlvmLexer', 'NasmLexer']
24
25
26 class GasLexer(RegexLexer):
27 """
28 For Gas (AT&T) assembly code.
29 """
30 name = 'GAS'
31 aliases = ['gas']
32 filenames = ['*.s', '*.S']
33 mimetypes = ['text/x-gas']
34
35 #: optional Comment or Whitespace
36 string = r'"(\\"|[^"])*"'
37 char = r'[a-zA-Z$._0-9@]'
38 identifier = r'(?:[a-zA-Z$_]' + char + '*|\.' + char + '+)'
39 number = r'(?:0[xX][a-zA-Z0-9]+|\d+)'
40
41 tokens = {
42 'root': [
43 include('whitespace'),
44 (identifier + ':', Name.Label),
45 (r'\.' + identifier, Name.Attribute, 'directive-args'),
46 (r'lock|rep(n?z)?|data\d+', Name.Attribute),
47 (identifier, Name.Function, 'instruction-args'),
48 (r'[\r\n]+', Text)
49 ],
50 'directive-args': [
51 (identifier, Name.Constant),
52 (string, String),
53 ('@' + identifier, Name.Attribute),
54 (number, Number.Integer),
55 (r'[\r\n]+', Text, '#pop'),
56
57 (r'#.*?$', Comment, '#pop'),
58
59 include('punctuation'),
60 include('whitespace')
61 ],
62 'instruction-args': [
63 # For objdump-disassembled code, shouldn't occur in
64 # actual assembler input
65 ('([a-z0-9]+)( )(<)('+identifier+')(>)',
66 bygroups(Number.Hex, Text, Punctuation, Name.Constant,
67 Punctuation)),
68 ('([a-z0-9]+)( )(<)('+identifier+')([-+])('+number+')(>)',
69 bygroups(Number.Hex, Text, Punctuation, Name.Constant,
70 Punctuation, Number.Integer, Punctuation)),
71
72 # Address constants
73 (identifier, Name.Constant),
74 (number, Number.Integer),
75 # Registers
76 ('%' + identifier, Name.Variable),
77 # Numeric constants
78 ('$'+number, Number.Integer),
79 (r'[\r\n]+', Text, '#pop'),
80 (r'#.*?$', Comment, '#pop'),
81 include('punctuation'),
82 include('whitespace')
83 ],
84 'whitespace': [
85 (r'\n', Text),
86 (r'\s+', Text),
87 (r'#.*?\n', Comment)
88 ],
89 'punctuation': [
90 (r'[-*,.():]+', Punctuation)
91 ]
92 }
93
94 def analyse_text(text):
95 return re.match(r'^\.\w+', text, re.M)
96
97 class ObjdumpLexer(RegexLexer):
98 """
99 For the output of 'objdump -dr'
100 """
101 name = 'objdump'
102 aliases = ['objdump']
103 filenames = ['*.objdump']
104 mimetypes = ['text/x-objdump']
105
106 hex = r'[0-9A-Za-z]'
107
108 tokens = {
109 'root': [
110 # File name & format:
111 ('(.*?)(:)( +file format )(.*?)$',
112 bygroups(Name.Label, Punctuation, Text, String)),
113 # Section header
114 ('(Disassembly of section )(.*?)(:)$',
115 bygroups(Text, Name.Label, Punctuation)),
116 # Function labels
117 # (With offset)
118 ('('+hex+'+)( )(<)(.*?)([-+])(0[xX][A-Za-z0-9]+)(>:)$',
119 bygroups(Number.Hex, Text, Punctuation, Name.Function,
120 Punctuation, Number.Hex, Punctuation)),
121 # (Without offset)
122 ('('+hex+'+)( )(<)(.*?)(>:)$',
123 bygroups(Number.Hex, Text, Punctuation, Name.Function,
124 Punctuation)),
125 # Code line with disassembled instructions
126 ('( *)('+hex+r'+:)(\t)((?:'+hex+hex+' )+)( *\t)([a-zA-Z].*?)$',
127 bygroups(Text, Name.Label, Text, Number.Hex, Text,
128 using(GasLexer))),
129 # Code line with ascii
130 ('( *)('+hex+r'+:)(\t)((?:'+hex+hex+' )+)( *)(.*?)$',
131 bygroups(Text, Name.Label, Text, Number.Hex, Text, String)),
132 # Continued code line, only raw opcodes without disassembled
133 # instruction
134 ('( *)('+hex+r'+:)(\t)((?:'+hex+hex+' )+)$',
135 bygroups(Text, Name.Label, Text, Number.Hex)),
136 # Skipped a few bytes
137 ('\t\.\.\.$', Text),
138 # Relocation line
139 # (With offset)
140 ('(\t\t\t)('+hex+'+:)( )([^\t]+)(\t)(.*?)([-+])(0x' + hex + '+)$',
141 bygroups(Text, Name.Label, Text, Name.Property, Text,
142 Name.Constant, Punctuation, Number.Hex)),
143 # (Without offset)
144 ('(\t\t\t)('+hex+'+:)( )([^\t]+)(\t)(.*?)$',
145 bygroups(Text, Name.Label, Text, Name.Property, Text,
146 Name.Constant)),
147 ('[^\n]+\n', Other)
148 ]
149 }
150
151
152 class DObjdumpLexer(DelegatingLexer):
153 """
154 For the output of 'objdump -Sr on compiled D files'
155 """
156 name = 'd-objdump'
157 aliases = ['d-objdump']
158 filenames = ['*.d-objdump']
159 mimetypes = ['text/x-d-objdump']
160
161 def __init__(self, **options):
162 super(DObjdumpLexer, self).__init__(DLexer, ObjdumpLexer, **options)
163
164
165 class CppObjdumpLexer(DelegatingLexer):
166 """
167 For the output of 'objdump -Sr on compiled C++ files'
168 """
169 name = 'cpp-objdump'
170 aliases = ['cpp-objdump', 'c++-objdumb', 'cxx-objdump']
171 filenames = ['*.cpp-objdump', '*.c++-objdump', '*.cxx-objdump']
172 mimetypes = ['text/x-cpp-objdump']
173
174 def __init__(self, **options):
175 super(CppObjdumpLexer, self).__init__(CppLexer, ObjdumpLexer, **options)
176
177
178 class CObjdumpLexer(DelegatingLexer):
179 """
180 For the output of 'objdump -Sr on compiled C files'
181 """
182 name = 'c-objdump'
183 aliases = ['c-objdump']
184 filenames = ['*.c-objdump']
185 mimetypes = ['text/x-c-objdump']
186
187 def __init__(self, **options):
188 super(CObjdumpLexer, self).__init__(CLexer, ObjdumpLexer, **options)
189
190
191 class LlvmLexer(RegexLexer):
192 """
193 For LLVM assembly code.
194 """
195 name = 'LLVM'
196 aliases = ['llvm']
197 filenames = ['*.ll']
198 mimetypes = ['text/x-llvm']
199
200 #: optional Comment or Whitespace
201 string = r'"[^"]*?"'
202 identifier = r'([a-zA-Z$._][a-zA-Z$._0-9]*|' + string + ')'
203
204 tokens = {
205 'root': [
206 include('whitespace'),
207
208 # Before keywords, because keywords are valid label names :(...
209 (r'^\s*' + identifier + '\s*:', Name.Label),
210
211 include('keyword'),
212
213 (r'%' + identifier, Name.Variable),#Name.Identifier.Local),
214 (r'@' + identifier, Name.Constant),#Name.Identifier.Global),
215 (r'%\d+', Name.Variable.Anonymous),#Name.Identifier.Anonymous),
216 (r'c?' + string, String),
217
218 (r'0[xX][a-fA-F0-9]+', Number),
219 (r'-?\d+(?:[.]\d+)?(?:[eE][-+]?\d+(?:[.]\d+)?)?', Number),
220
221 (r'[=<>{}\[\]()*.,]|x\b', Punctuation)
222 ],
223 'whitespace': [
224 (r'(\n|\s)+', Text),
225 (r';.*?\n', Comment)
226 ],
227 'keyword': [
228 # Regular keywords
229 (r'(void|label|float|double|opaque'
230 r'|to'
231 r'|alias|type'
232 r'|zeroext|signext|inreg|sret|noalias|noreturn|nounwind|nest'
233 r'|module|asm|target|datalayout|triple'
234 r'|true|false|null|zeroinitializer|undef'
235 r'|global|internal|external|linkonce|weak|appending|extern_weak'
236 r'|dllimport|dllexport'
237 r'|ccc|fastcc|coldcc|cc|tail'
238 r'|default|hidden|protected'
239 r'|thread_local|constant|align|section'
240 r'|define|declare'
241
242 # Statements & expressions
243 r'|trunc|zext|sext|fptrunc|fpext|fptoui|fptosi|uitofp|sitofp'
244 r'|ptrtoint|inttoptr|bitcast|getelementptr|select|icmp|fcmp'
245 r'|extractelement|insertelement|shufflevector'
246 r'|sideeffect|volatile'
247 r'|ret|br|switch|invoke|unwind|unreachable'
248 r'|add|sub|mul|udiv|sdiv|fdiv|urem|srem|frem'
249 r'|shl|lshr|ashr|and|or|xor'
250 r'|malloc|free|alloca|load|store'
251 r'|phi|call|va_arg|va_list'
252
253 # Comparison condition codes for icmp
254 r'|eq|ne|ugt|uge|ult|ule|sgt|sge|slt|sle'
255 # Ditto for fcmp: (minus keywords mentioned in other contexts)
256 r'|oeq|ogt|oge|olt|ole|one|ord|ueq|ugt|uge|une|uno'
257
258 r')\b', Keyword),
259 # Integer types
260 (r'i[1-9]\d*', Keyword)
261 ]
262 }
263
264
265 class NasmLexer(RegexLexer):
266 """
267 For Nasm (Intel) assembly code.
268 """
269 name = 'NASM'
270 aliases = ['nasm']
271 filenames = ['*.asm', '*.ASM']
272 mimetypes = ['text/x-nasm']
273
274 identifier = r'[a-zA-Z$._?][a-zA-Z0-9$._?#@~]*'
275 hexn = r'(?:0[xX][0-9a-fA-F]+|$0[0-9a-fA-F]*|[0-9a-fA-F]+h)'
276 octn = r'[0-7]+q'
277 binn = r'[01]+b'
278 decn = r'[0-9]+'
279 floatn = decn + r'\.e?' + decn
280 string = r'"(\\"|[^"])*"|' + r"'(\\'|[^'])*'"
281 declkw = r'(?:res|d)[bwdqt]|times'
282 register = (r'[a-d][lh]|e?[a-d]x|e?[sb]p|e?[sd]i|[c-gs]s|st[0-7]|'
283 r'mm[0-7]|cr[0-4]|dr[0-367]|tr[3-7]')
284 wordop = r'seg|wrt|strict'
285 type = r'byte|[dq]?word'
286 directives = (r'BITS|USE16|USE32|SECTION|SEGMENT|ABSOLUTE|EXTERN|GLOBAL|'
287 r'COMMON|CPU|GROUP|UPPERCASE|IMPORT|EXPORT|LIBRARY|MODULE')
288
289 flags = re.IGNORECASE | re.MULTILINE
290 tokens = {
291 'root': [
292 include('whitespace'),
293 (r'^\s*%', Comment.Preproc, 'preproc'),
294 (identifier + ':', Name.Label),
295 (directives, Keyword, 'instruction-args'),
296 (r'(%s)\s+(equ)' % identifier,
297 bygroups(Name.Constant, Keyword.Declaration),
298 'instruction-args'),
299 (declkw, Keyword.Declaration, 'instruction-args'),
300 (identifier, Name.Function, 'instruction-args'),
301 (r'[\r\n]+', Text)
302 ],
303 'instruction-args': [
304 (string, String),
305 (hexn, Number.Hex),
306 (octn, Number.Oct),
307 (binn, Number),
308 (floatn, Number.Float),
309 (decn, Number.Integer),
310 include('punctuation'),
311 (register, Name.Builtin),
312 (identifier, Name.Variable),
313 (r'[\r\n]+', Text, '#pop'),
314 include('whitespace')
315 ],
316 'preproc': [
317 (r'[^;\n]+', Comment.Preproc),
318 (r';.*?\n', Comment.Single, '#pop'),
319 (r'\n', Comment.Preproc, '#pop'),
320 ],
321 'whitespace': [
322 (r'\n', Text),
323 (r'[ \t]+', Text),
324 (r';.*', Comment.Single)
325 ],
326 'punctuation': [
327 (r'[,():\[\]]+', Punctuation),
328 (r'[&|^<>+*/%~-]+', Operator),
329 (r'[$]+', Keyword.Constant),
330 (wordop, Operator.Word),
331 (type, Keyword.Type)
332 ],
333 }

eric ide

mercurial