|
1 # -*- coding: utf-8 -*- |
|
2 """ |
|
3 pygments.lexers.esoteric |
|
4 ~~~~~~~~~~~~~~~~~~~~~~~~ |
|
5 |
|
6 Lexers for esoteric languages. |
|
7 |
|
8 :copyright: Copyright 2006-2017 by the Pygments team, see AUTHORS. |
|
9 :license: BSD, see LICENSE for details. |
|
10 """ |
|
11 |
|
12 from pygments.lexer import RegexLexer, include, words |
|
13 from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ |
|
14 Number, Punctuation, Error |
|
15 |
|
16 __all__ = ['BrainfuckLexer', 'BefungeLexer', 'RedcodeLexer', 'CAmkESLexer', |
|
17 'CapDLLexer', 'AheuiLexer'] |
|
18 |
|
19 |
|
20 class BrainfuckLexer(RegexLexer): |
|
21 """ |
|
22 Lexer for the esoteric `BrainFuck <http://www.muppetlabs.com/~breadbox/bf/>`_ |
|
23 language. |
|
24 """ |
|
25 |
|
26 name = 'Brainfuck' |
|
27 aliases = ['brainfuck', 'bf'] |
|
28 filenames = ['*.bf', '*.b'] |
|
29 mimetypes = ['application/x-brainfuck'] |
|
30 |
|
31 tokens = { |
|
32 'common': [ |
|
33 # use different colors for different instruction types |
|
34 (r'[.,]+', Name.Tag), |
|
35 (r'[+-]+', Name.Builtin), |
|
36 (r'[<>]+', Name.Variable), |
|
37 (r'[^.,+\-<>\[\]]+', Comment), |
|
38 ], |
|
39 'root': [ |
|
40 (r'\[', Keyword, 'loop'), |
|
41 (r'\]', Error), |
|
42 include('common'), |
|
43 ], |
|
44 'loop': [ |
|
45 (r'\[', Keyword, '#push'), |
|
46 (r'\]', Keyword, '#pop'), |
|
47 include('common'), |
|
48 ] |
|
49 } |
|
50 |
|
51 |
|
52 class BefungeLexer(RegexLexer): |
|
53 """ |
|
54 Lexer for the esoteric `Befunge <http://en.wikipedia.org/wiki/Befunge>`_ |
|
55 language. |
|
56 |
|
57 .. versionadded:: 0.7 |
|
58 """ |
|
59 name = 'Befunge' |
|
60 aliases = ['befunge'] |
|
61 filenames = ['*.befunge'] |
|
62 mimetypes = ['application/x-befunge'] |
|
63 |
|
64 tokens = { |
|
65 'root': [ |
|
66 (r'[0-9a-f]', Number), |
|
67 (r'[+*/%!`-]', Operator), # Traditional math |
|
68 (r'[<>^v?\[\]rxjk]', Name.Variable), # Move, imperatives |
|
69 (r'[:\\$.,n]', Name.Builtin), # Stack ops, imperatives |
|
70 (r'[|_mw]', Keyword), |
|
71 (r'[{}]', Name.Tag), # Befunge-98 stack ops |
|
72 (r'".*?"', String.Double), # Strings don't appear to allow escapes |
|
73 (r'\'.', String.Single), # Single character |
|
74 (r'[#;]', Comment), # Trampoline... depends on direction hit |
|
75 (r'[pg&~=@iotsy]', Keyword), # Misc |
|
76 (r'[()A-Z]', Comment), # Fingerprints |
|
77 (r'\s+', Text), # Whitespace doesn't matter |
|
78 ], |
|
79 } |
|
80 |
|
81 |
|
82 class CAmkESLexer(RegexLexer): |
|
83 """ |
|
84 Basic lexer for the input language for the |
|
85 `CAmkES <https://sel4.systems/CAmkES/>`_ component platform. |
|
86 |
|
87 .. versionadded:: 2.1 |
|
88 """ |
|
89 name = 'CAmkES' |
|
90 aliases = ['camkes', 'idl4'] |
|
91 filenames = ['*.camkes', '*.idl4'] |
|
92 |
|
93 tokens = { |
|
94 'root': [ |
|
95 # C pre-processor directive |
|
96 (r'^\s*#.*\n', Comment.Preproc), |
|
97 |
|
98 # Whitespace, comments |
|
99 (r'\s+', Text), |
|
100 (r'/\*(.|\n)*?\*/', Comment), |
|
101 (r'//.*\n', Comment), |
|
102 |
|
103 (r'[\[(){},.;\]]', Punctuation), |
|
104 (r'[~!%^&*+=|?:<>/-]', Operator), |
|
105 |
|
106 (words(('assembly', 'attribute', 'component', 'composition', |
|
107 'configuration', 'connection', 'connector', 'consumes', |
|
108 'control', 'dataport', 'Dataport', 'Dataports', 'emits', |
|
109 'event', 'Event', 'Events', 'export', 'from', 'group', |
|
110 'hardware', 'has', 'interface', 'Interface', 'maybe', |
|
111 'procedure', 'Procedure', 'Procedures', 'provides', |
|
112 'template', 'thread', 'threads', 'to', 'uses', 'with'), |
|
113 suffix=r'\b'), Keyword), |
|
114 |
|
115 (words(('bool', 'boolean', 'Buf', 'char', 'character', 'double', |
|
116 'float', 'in', 'inout', 'int', 'int16_6', 'int32_t', |
|
117 'int64_t', 'int8_t', 'integer', 'mutex', 'out', 'real', |
|
118 'refin', 'semaphore', 'signed', 'string', 'struct', |
|
119 'uint16_t', 'uint32_t', 'uint64_t', 'uint8_t', 'uintptr_t', |
|
120 'unsigned', 'void'), |
|
121 suffix=r'\b'), Keyword.Type), |
|
122 |
|
123 # Recognised attributes |
|
124 (r'[a-zA-Z_]\w*_(priority|domain|buffer)', Keyword.Reserved), |
|
125 (words(('dma_pool', 'from_access', 'to_access'), suffix=r'\b'), |
|
126 Keyword.Reserved), |
|
127 |
|
128 # CAmkES-level include |
|
129 (r'import\s+(<[^>]*>|"[^"]*");', Comment.Preproc), |
|
130 |
|
131 # C-level include |
|
132 (r'include\s+(<[^>]*>|"[^"]*");', Comment.Preproc), |
|
133 |
|
134 # Literals |
|
135 (r'0[xX][\da-fA-F]+', Number.Hex), |
|
136 (r'-?[\d]+', Number), |
|
137 (r'-?[\d]+\.[\d]+', Number.Float), |
|
138 (r'"[^"]*"', String), |
|
139 (r'[Tt]rue|[Ff]alse', Name.Builtin), |
|
140 |
|
141 # Identifiers |
|
142 (r'[a-zA-Z_]\w*', Name), |
|
143 ], |
|
144 } |
|
145 |
|
146 |
|
147 class CapDLLexer(RegexLexer): |
|
148 """ |
|
149 Basic lexer for |
|
150 `CapDL <https://ssrg.nicta.com.au/publications/nictaabstracts/Kuz_KLW_10.abstract.pml>`_. |
|
151 |
|
152 The source of the primary tool that reads such specifications is available |
|
153 at https://github.com/seL4/capdl/tree/master/capDL-tool. Note that this |
|
154 lexer only supports a subset of the grammar. For example, identifiers can |
|
155 shadow type names, but these instances are currently incorrectly |
|
156 highlighted as types. Supporting this would need a stateful lexer that is |
|
157 considered unnecessarily complex for now. |
|
158 |
|
159 .. versionadded:: 2.2 |
|
160 """ |
|
161 name = 'CapDL' |
|
162 aliases = ['capdl'] |
|
163 filenames = ['*.cdl'] |
|
164 |
|
165 tokens = { |
|
166 'root': [ |
|
167 # C pre-processor directive |
|
168 (r'^\s*#.*\n', Comment.Preproc), |
|
169 |
|
170 # Whitespace, comments |
|
171 (r'\s+', Text), |
|
172 (r'/\*(.|\n)*?\*/', Comment), |
|
173 (r'(//|--).*\n', Comment), |
|
174 |
|
175 (r'[<>\[(){},:;=\]]', Punctuation), |
|
176 (r'\.\.', Punctuation), |
|
177 |
|
178 (words(('arch', 'arm11', 'caps', 'child_of', 'ia32', 'irq', 'maps', |
|
179 'objects'), suffix=r'\b'), Keyword), |
|
180 |
|
181 (words(('aep', 'asid_pool', 'cnode', 'ep', 'frame', 'io_device', |
|
182 'io_ports', 'io_pt', 'notification', 'pd', 'pt', 'tcb', |
|
183 'ut', 'vcpu'), suffix=r'\b'), Keyword.Type), |
|
184 |
|
185 # Properties |
|
186 (words(('asid', 'addr', 'badge', 'cached', 'dom', 'domainID', 'elf', |
|
187 'fault_ep', 'G', 'guard', 'guard_size', 'init', 'ip', |
|
188 'prio', 'sp', 'R', 'RG', 'RX', 'RW', 'RWG', 'RWX', 'W', |
|
189 'WG', 'WX', 'level', 'masked', 'master_reply', 'paddr', |
|
190 'ports', 'reply', 'uncached'), suffix=r'\b'), |
|
191 Keyword.Reserved), |
|
192 |
|
193 # Literals |
|
194 (r'0[xX][\da-fA-F]+', Number.Hex), |
|
195 (r'\d+(\.\d+)?(k|M)?', Number), |
|
196 (words(('bits',), suffix=r'\b'), Number), |
|
197 (words(('cspace', 'vspace', 'reply_slot', 'caller_slot', |
|
198 'ipc_buffer_slot'), suffix=r'\b'), Number), |
|
199 |
|
200 # Identifiers |
|
201 (r'[a-zA-Z_][-@\.\w]*', Name), |
|
202 ], |
|
203 } |
|
204 |
|
205 |
|
206 class RedcodeLexer(RegexLexer): |
|
207 """ |
|
208 A simple Redcode lexer based on ICWS'94. |
|
209 Contributed by Adam Blinkinsop <blinks@acm.org>. |
|
210 |
|
211 .. versionadded:: 0.8 |
|
212 """ |
|
213 name = 'Redcode' |
|
214 aliases = ['redcode'] |
|
215 filenames = ['*.cw'] |
|
216 |
|
217 opcodes = ('DAT', 'MOV', 'ADD', 'SUB', 'MUL', 'DIV', 'MOD', |
|
218 'JMP', 'JMZ', 'JMN', 'DJN', 'CMP', 'SLT', 'SPL', |
|
219 'ORG', 'EQU', 'END') |
|
220 modifiers = ('A', 'B', 'AB', 'BA', 'F', 'X', 'I') |
|
221 |
|
222 tokens = { |
|
223 'root': [ |
|
224 # Whitespace: |
|
225 (r'\s+', Text), |
|
226 (r';.*$', Comment.Single), |
|
227 # Lexemes: |
|
228 # Identifiers |
|
229 (r'\b(%s)\b' % '|'.join(opcodes), Name.Function), |
|
230 (r'\b(%s)\b' % '|'.join(modifiers), Name.Decorator), |
|
231 (r'[A-Za-z_]\w+', Name), |
|
232 # Operators |
|
233 (r'[-+*/%]', Operator), |
|
234 (r'[#$@<>]', Operator), # mode |
|
235 (r'[.,]', Punctuation), # mode |
|
236 # Numbers |
|
237 (r'[-+]?\d+', Number.Integer), |
|
238 ], |
|
239 } |
|
240 |
|
241 |
|
242 class AheuiLexer(RegexLexer): |
|
243 """ |
|
244 Aheui_ Lexer. |
|
245 |
|
246 Aheui_ is esoteric language based on Korean alphabets. |
|
247 |
|
248 .. _Aheui:: http://aheui.github.io/ |
|
249 |
|
250 """ |
|
251 |
|
252 name = 'Aheui' |
|
253 aliases = ['aheui'] |
|
254 filenames = ['*.aheui'] |
|
255 |
|
256 tokens = { |
|
257 'root': [ |
|
258 (u'[' |
|
259 u'나-낳냐-냫너-넣녀-녛노-놓뇨-눟뉴-닇' |
|
260 u'다-닿댜-댷더-덯뎌-뎧도-돟됴-둫듀-딓' |
|
261 u'따-땋땨-떃떠-떻뗘-뗳또-똫뚀-뚷뜌-띟' |
|
262 u'라-랗랴-럏러-렇려-렿로-롷료-뤃류-릫' |
|
263 u'마-맣먀-먛머-멓며-몋모-뫃묘-뭏뮤-믷' |
|
264 u'바-밯뱌-뱧버-벟벼-볗보-봏뵤-붛뷰-빃' |
|
265 u'빠-빻뺘-뺳뻐-뻫뼈-뼣뽀-뽛뾰-뿧쀼-삏' |
|
266 u'사-샇샤-샿서-섷셔-셯소-솧쇼-숳슈-싛' |
|
267 u'싸-쌓쌰-썋써-쎃쎠-쎻쏘-쏳쑈-쑿쓔-씧' |
|
268 u'자-잫쟈-쟣저-젛져-졓조-좋죠-줗쥬-즿' |
|
269 u'차-챃챠-챻처-첳쳐-쳫초-촣쵸-춯츄-칗' |
|
270 u'카-캏캬-컇커-컿켜-켷코-콯쿄-쿻큐-킣' |
|
271 u'타-탛탸-턓터-텋텨-톃토-톻툐-퉇튜-틯' |
|
272 u'파-팧퍄-퍟퍼-펗펴-폏포-퐇표-풓퓨-픻' |
|
273 u'하-핳햐-햫허-헣혀-혛호-홓효-훟휴-힇' |
|
274 u']', Operator), |
|
275 ('.', Comment), |
|
276 ], |
|
277 } |