|
1 # -*- coding: utf-8 -*- |
|
2 """ |
|
3 pygments.lexers.asm |
|
4 ~~~~~~~~~~~~~~~~~~~ |
|
5 |
|
6 Lexers for assembly languages. |
|
7 |
|
8 :copyright: Copyright 2006-2009 by the Pygments team, see AUTHORS. |
|
9 :license: BSD, see LICENSE for details. |
|
10 """ |
|
11 |
|
12 import re |
|
13 try: |
|
14 set |
|
15 except NameError: |
|
16 from sets import Set as set |
|
17 |
|
18 from pygments.lexer import RegexLexer, include, bygroups, using, DelegatingLexer |
|
19 from pygments.lexers.compiled import DLexer, CppLexer, CLexer |
|
20 from pygments.token import * |
|
21 |
|
22 __all__ = ['GasLexer', 'ObjdumpLexer','DObjdumpLexer', 'CppObjdumpLexer', |
|
23 'CObjdumpLexer', 'LlvmLexer', 'NasmLexer'] |
|
24 |
|
25 |
|
26 class GasLexer(RegexLexer): |
|
27 """ |
|
28 For Gas (AT&T) assembly code. |
|
29 """ |
|
30 name = 'GAS' |
|
31 aliases = ['gas'] |
|
32 filenames = ['*.s', '*.S'] |
|
33 mimetypes = ['text/x-gas'] |
|
34 |
|
35 #: optional Comment or Whitespace |
|
36 string = r'"(\\"|[^"])*"' |
|
37 char = r'[a-zA-Z$._0-9@]' |
|
38 identifier = r'(?:[a-zA-Z$_]' + char + '*|\.' + char + '+)' |
|
39 number = r'(?:0[xX][a-zA-Z0-9]+|\d+)' |
|
40 |
|
41 tokens = { |
|
42 'root': [ |
|
43 include('whitespace'), |
|
44 (identifier + ':', Name.Label), |
|
45 (r'\.' + identifier, Name.Attribute, 'directive-args'), |
|
46 (r'lock|rep(n?z)?|data\d+', Name.Attribute), |
|
47 (identifier, Name.Function, 'instruction-args'), |
|
48 (r'[\r\n]+', Text) |
|
49 ], |
|
50 'directive-args': [ |
|
51 (identifier, Name.Constant), |
|
52 (string, String), |
|
53 ('@' + identifier, Name.Attribute), |
|
54 (number, Number.Integer), |
|
55 (r'[\r\n]+', Text, '#pop'), |
|
56 |
|
57 (r'#.*?$', Comment, '#pop'), |
|
58 |
|
59 include('punctuation'), |
|
60 include('whitespace') |
|
61 ], |
|
62 'instruction-args': [ |
|
63 # For objdump-disassembled code, shouldn't occur in |
|
64 # actual assembler input |
|
65 ('([a-z0-9]+)( )(<)('+identifier+')(>)', |
|
66 bygroups(Number.Hex, Text, Punctuation, Name.Constant, |
|
67 Punctuation)), |
|
68 ('([a-z0-9]+)( )(<)('+identifier+')([-+])('+number+')(>)', |
|
69 bygroups(Number.Hex, Text, Punctuation, Name.Constant, |
|
70 Punctuation, Number.Integer, Punctuation)), |
|
71 |
|
72 # Address constants |
|
73 (identifier, Name.Constant), |
|
74 (number, Number.Integer), |
|
75 # Registers |
|
76 ('%' + identifier, Name.Variable), |
|
77 # Numeric constants |
|
78 ('$'+number, Number.Integer), |
|
79 (r'[\r\n]+', Text, '#pop'), |
|
80 (r'#.*?$', Comment, '#pop'), |
|
81 include('punctuation'), |
|
82 include('whitespace') |
|
83 ], |
|
84 'whitespace': [ |
|
85 (r'\n', Text), |
|
86 (r'\s+', Text), |
|
87 (r'#.*?\n', Comment) |
|
88 ], |
|
89 'punctuation': [ |
|
90 (r'[-*,.():]+', Punctuation) |
|
91 ] |
|
92 } |
|
93 |
|
94 def analyse_text(text): |
|
95 return re.match(r'^\.\w+', text, re.M) |
|
96 |
|
97 class ObjdumpLexer(RegexLexer): |
|
98 """ |
|
99 For the output of 'objdump -dr' |
|
100 """ |
|
101 name = 'objdump' |
|
102 aliases = ['objdump'] |
|
103 filenames = ['*.objdump'] |
|
104 mimetypes = ['text/x-objdump'] |
|
105 |
|
106 hex = r'[0-9A-Za-z]' |
|
107 |
|
108 tokens = { |
|
109 'root': [ |
|
110 # File name & format: |
|
111 ('(.*?)(:)( +file format )(.*?)$', |
|
112 bygroups(Name.Label, Punctuation, Text, String)), |
|
113 # Section header |
|
114 ('(Disassembly of section )(.*?)(:)$', |
|
115 bygroups(Text, Name.Label, Punctuation)), |
|
116 # Function labels |
|
117 # (With offset) |
|
118 ('('+hex+'+)( )(<)(.*?)([-+])(0[xX][A-Za-z0-9]+)(>:)$', |
|
119 bygroups(Number.Hex, Text, Punctuation, Name.Function, |
|
120 Punctuation, Number.Hex, Punctuation)), |
|
121 # (Without offset) |
|
122 ('('+hex+'+)( )(<)(.*?)(>:)$', |
|
123 bygroups(Number.Hex, Text, Punctuation, Name.Function, |
|
124 Punctuation)), |
|
125 # Code line with disassembled instructions |
|
126 ('( *)('+hex+r'+:)(\t)((?:'+hex+hex+' )+)( *\t)([a-zA-Z].*?)$', |
|
127 bygroups(Text, Name.Label, Text, Number.Hex, Text, |
|
128 using(GasLexer))), |
|
129 # Code line with ascii |
|
130 ('( *)('+hex+r'+:)(\t)((?:'+hex+hex+' )+)( *)(.*?)$', |
|
131 bygroups(Text, Name.Label, Text, Number.Hex, Text, String)), |
|
132 # Continued code line, only raw opcodes without disassembled |
|
133 # instruction |
|
134 ('( *)('+hex+r'+:)(\t)((?:'+hex+hex+' )+)$', |
|
135 bygroups(Text, Name.Label, Text, Number.Hex)), |
|
136 # Skipped a few bytes |
|
137 ('\t\.\.\.$', Text), |
|
138 # Relocation line |
|
139 # (With offset) |
|
140 ('(\t\t\t)('+hex+'+:)( )([^\t]+)(\t)(.*?)([-+])(0x' + hex + '+)$', |
|
141 bygroups(Text, Name.Label, Text, Name.Property, Text, |
|
142 Name.Constant, Punctuation, Number.Hex)), |
|
143 # (Without offset) |
|
144 ('(\t\t\t)('+hex+'+:)( )([^\t]+)(\t)(.*?)$', |
|
145 bygroups(Text, Name.Label, Text, Name.Property, Text, |
|
146 Name.Constant)), |
|
147 ('[^\n]+\n', Other) |
|
148 ] |
|
149 } |
|
150 |
|
151 |
|
152 class DObjdumpLexer(DelegatingLexer): |
|
153 """ |
|
154 For the output of 'objdump -Sr on compiled D files' |
|
155 """ |
|
156 name = 'd-objdump' |
|
157 aliases = ['d-objdump'] |
|
158 filenames = ['*.d-objdump'] |
|
159 mimetypes = ['text/x-d-objdump'] |
|
160 |
|
161 def __init__(self, **options): |
|
162 super(DObjdumpLexer, self).__init__(DLexer, ObjdumpLexer, **options) |
|
163 |
|
164 |
|
165 class CppObjdumpLexer(DelegatingLexer): |
|
166 """ |
|
167 For the output of 'objdump -Sr on compiled C++ files' |
|
168 """ |
|
169 name = 'cpp-objdump' |
|
170 aliases = ['cpp-objdump', 'c++-objdumb', 'cxx-objdump'] |
|
171 filenames = ['*.cpp-objdump', '*.c++-objdump', '*.cxx-objdump'] |
|
172 mimetypes = ['text/x-cpp-objdump'] |
|
173 |
|
174 def __init__(self, **options): |
|
175 super(CppObjdumpLexer, self).__init__(CppLexer, ObjdumpLexer, **options) |
|
176 |
|
177 |
|
178 class CObjdumpLexer(DelegatingLexer): |
|
179 """ |
|
180 For the output of 'objdump -Sr on compiled C files' |
|
181 """ |
|
182 name = 'c-objdump' |
|
183 aliases = ['c-objdump'] |
|
184 filenames = ['*.c-objdump'] |
|
185 mimetypes = ['text/x-c-objdump'] |
|
186 |
|
187 def __init__(self, **options): |
|
188 super(CObjdumpLexer, self).__init__(CLexer, ObjdumpLexer, **options) |
|
189 |
|
190 |
|
191 class LlvmLexer(RegexLexer): |
|
192 """ |
|
193 For LLVM assembly code. |
|
194 """ |
|
195 name = 'LLVM' |
|
196 aliases = ['llvm'] |
|
197 filenames = ['*.ll'] |
|
198 mimetypes = ['text/x-llvm'] |
|
199 |
|
200 #: optional Comment or Whitespace |
|
201 string = r'"[^"]*?"' |
|
202 identifier = r'([a-zA-Z$._][a-zA-Z$._0-9]*|' + string + ')' |
|
203 |
|
204 tokens = { |
|
205 'root': [ |
|
206 include('whitespace'), |
|
207 |
|
208 # Before keywords, because keywords are valid label names :(... |
|
209 (r'^\s*' + identifier + '\s*:', Name.Label), |
|
210 |
|
211 include('keyword'), |
|
212 |
|
213 (r'%' + identifier, Name.Variable),#Name.Identifier.Local), |
|
214 (r'@' + identifier, Name.Constant),#Name.Identifier.Global), |
|
215 (r'%\d+', Name.Variable.Anonymous),#Name.Identifier.Anonymous), |
|
216 (r'c?' + string, String), |
|
217 |
|
218 (r'0[xX][a-fA-F0-9]+', Number), |
|
219 (r'-?\d+(?:[.]\d+)?(?:[eE][-+]?\d+(?:[.]\d+)?)?', Number), |
|
220 |
|
221 (r'[=<>{}\[\]()*.,]|x\b', Punctuation) |
|
222 ], |
|
223 'whitespace': [ |
|
224 (r'(\n|\s)+', Text), |
|
225 (r';.*?\n', Comment) |
|
226 ], |
|
227 'keyword': [ |
|
228 # Regular keywords |
|
229 (r'(void|label|float|double|opaque' |
|
230 r'|to' |
|
231 r'|alias|type' |
|
232 r'|zeroext|signext|inreg|sret|noalias|noreturn|nounwind|nest' |
|
233 r'|module|asm|target|datalayout|triple' |
|
234 r'|true|false|null|zeroinitializer|undef' |
|
235 r'|global|internal|external|linkonce|weak|appending|extern_weak' |
|
236 r'|dllimport|dllexport' |
|
237 r'|ccc|fastcc|coldcc|cc|tail' |
|
238 r'|default|hidden|protected' |
|
239 r'|thread_local|constant|align|section' |
|
240 r'|define|declare' |
|
241 |
|
242 # Statements & expressions |
|
243 r'|trunc|zext|sext|fptrunc|fpext|fptoui|fptosi|uitofp|sitofp' |
|
244 r'|ptrtoint|inttoptr|bitcast|getelementptr|select|icmp|fcmp' |
|
245 r'|extractelement|insertelement|shufflevector' |
|
246 r'|sideeffect|volatile' |
|
247 r'|ret|br|switch|invoke|unwind|unreachable' |
|
248 r'|add|sub|mul|udiv|sdiv|fdiv|urem|srem|frem' |
|
249 r'|shl|lshr|ashr|and|or|xor' |
|
250 r'|malloc|free|alloca|load|store' |
|
251 r'|phi|call|va_arg|va_list' |
|
252 |
|
253 # Comparison condition codes for icmp |
|
254 r'|eq|ne|ugt|uge|ult|ule|sgt|sge|slt|sle' |
|
255 # Ditto for fcmp: (minus keywords mentioned in other contexts) |
|
256 r'|oeq|ogt|oge|olt|ole|one|ord|ueq|ugt|uge|une|uno' |
|
257 |
|
258 r')\b', Keyword), |
|
259 # Integer types |
|
260 (r'i[1-9]\d*', Keyword) |
|
261 ] |
|
262 } |
|
263 |
|
264 |
|
265 class NasmLexer(RegexLexer): |
|
266 """ |
|
267 For Nasm (Intel) assembly code. |
|
268 """ |
|
269 name = 'NASM' |
|
270 aliases = ['nasm'] |
|
271 filenames = ['*.asm', '*.ASM'] |
|
272 mimetypes = ['text/x-nasm'] |
|
273 |
|
274 identifier = r'[a-zA-Z$._?][a-zA-Z0-9$._?#@~]*' |
|
275 hexn = r'(?:0[xX][0-9a-fA-F]+|$0[0-9a-fA-F]*|[0-9a-fA-F]+h)' |
|
276 octn = r'[0-7]+q' |
|
277 binn = r'[01]+b' |
|
278 decn = r'[0-9]+' |
|
279 floatn = decn + r'\.e?' + decn |
|
280 string = r'"(\\"|[^"])*"|' + r"'(\\'|[^'])*'" |
|
281 declkw = r'(?:res|d)[bwdqt]|times' |
|
282 register = (r'[a-d][lh]|e?[a-d]x|e?[sb]p|e?[sd]i|[c-gs]s|st[0-7]|' |
|
283 r'mm[0-7]|cr[0-4]|dr[0-367]|tr[3-7]') |
|
284 wordop = r'seg|wrt|strict' |
|
285 type = r'byte|[dq]?word' |
|
286 directives = (r'BITS|USE16|USE32|SECTION|SEGMENT|ABSOLUTE|EXTERN|GLOBAL|' |
|
287 r'COMMON|CPU|GROUP|UPPERCASE|IMPORT|EXPORT|LIBRARY|MODULE') |
|
288 |
|
289 flags = re.IGNORECASE | re.MULTILINE |
|
290 tokens = { |
|
291 'root': [ |
|
292 include('whitespace'), |
|
293 (r'^\s*%', Comment.Preproc, 'preproc'), |
|
294 (identifier + ':', Name.Label), |
|
295 (directives, Keyword, 'instruction-args'), |
|
296 (r'(%s)\s+(equ)' % identifier, |
|
297 bygroups(Name.Constant, Keyword.Declaration), |
|
298 'instruction-args'), |
|
299 (declkw, Keyword.Declaration, 'instruction-args'), |
|
300 (identifier, Name.Function, 'instruction-args'), |
|
301 (r'[\r\n]+', Text) |
|
302 ], |
|
303 'instruction-args': [ |
|
304 (string, String), |
|
305 (hexn, Number.Hex), |
|
306 (octn, Number.Oct), |
|
307 (binn, Number), |
|
308 (floatn, Number.Float), |
|
309 (decn, Number.Integer), |
|
310 include('punctuation'), |
|
311 (register, Name.Builtin), |
|
312 (identifier, Name.Variable), |
|
313 (r'[\r\n]+', Text, '#pop'), |
|
314 include('whitespace') |
|
315 ], |
|
316 'preproc': [ |
|
317 (r'[^;\n]+', Comment.Preproc), |
|
318 (r';.*?\n', Comment.Single, '#pop'), |
|
319 (r'\n', Comment.Preproc, '#pop'), |
|
320 ], |
|
321 'whitespace': [ |
|
322 (r'\n', Text), |
|
323 (r'[ \t]+', Text), |
|
324 (r';.*', Comment.Single) |
|
325 ], |
|
326 'punctuation': [ |
|
327 (r'[,():\[\]]+', Punctuation), |
|
328 (r'[&|^<>+*/%~-]+', Operator), |
|
329 (r'[$]+', Keyword.Constant), |
|
330 (wordop, Operator.Word), |
|
331 (type, Keyword.Type) |
|
332 ], |
|
333 } |