|
1 # -*- coding: utf-8 -*- |
|
2 """ |
|
3 pygments.lexers.clean |
|
4 ~~~~~~~~~~~~~~~~~~~~~ |
|
5 |
|
6 Lexer for the Clean language. |
|
7 |
|
8 :copyright: Copyright 2006-2017 by the Pygments team, see AUTHORS. |
|
9 :license: BSD, see LICENSE for details. |
|
10 """ |
|
11 |
|
12 from pygments.lexer import ExtendedRegexLexer, LexerContext, \ |
|
13 bygroups, words, include, default |
|
14 from pygments.token import Comment, Keyword, Literal, Name, Number, Operator, \ |
|
15 Punctuation, String, Text, Whitespace |
|
16 |
|
17 __all__ = ['CleanLexer'] |
|
18 |
|
19 |
|
20 class CleanLexer(ExtendedRegexLexer): |
|
21 """ |
|
22 Lexer for the general purpose, state-of-the-art, pure and lazy functional |
|
23 programming language Clean (http://clean.cs.ru.nl/Clean). |
|
24 |
|
25 .. versionadded: 2.2 |
|
26 """ |
|
27 name = 'Clean' |
|
28 aliases = ['clean'] |
|
29 filenames = ['*.icl', '*.dcl'] |
|
30 |
|
31 def get_tokens_unprocessed(self, text=None, context=None): |
|
32 ctx = LexerContext(text, 0) |
|
33 ctx.indent = 0 |
|
34 return ExtendedRegexLexer.get_tokens_unprocessed(self, text, context=ctx) |
|
35 |
|
36 def check_class_not_import(lexer, match, ctx): |
|
37 if match.group(0) == 'import': |
|
38 yield match.start(), Keyword.Namespace, match.group(0) |
|
39 ctx.stack = ctx.stack[:-1] + ['fromimportfunc'] |
|
40 else: |
|
41 yield match.start(), Name.Class, match.group(0) |
|
42 ctx.pos = match.end() |
|
43 |
|
44 def check_instance_class(lexer, match, ctx): |
|
45 if match.group(0) == 'instance' or match.group(0) == 'class': |
|
46 yield match.start(), Keyword, match.group(0) |
|
47 else: |
|
48 yield match.start(), Name.Function, match.group(0) |
|
49 ctx.stack = ctx.stack + ['fromimportfunctype'] |
|
50 ctx.pos = match.end() |
|
51 |
|
52 @staticmethod |
|
53 def indent_len(text): |
|
54 # Tabs are four spaces: |
|
55 # https://svn.cs.ru.nl/repos/clean-platform/trunk/doc/STANDARDS.txt |
|
56 text = text.replace('\n', '') |
|
57 return len(text.replace('\t', ' ')), len(text) |
|
58 |
|
59 def store_indent(lexer, match, ctx): |
|
60 ctx.indent, _ = CleanLexer.indent_len(match.group(0)) |
|
61 ctx.pos = match.end() |
|
62 yield match.start(), Text, match.group(0) |
|
63 |
|
64 def check_indent1(lexer, match, ctx): |
|
65 indent, reallen = CleanLexer.indent_len(match.group(0)) |
|
66 if indent > ctx.indent: |
|
67 yield match.start(), Whitespace, match.group(0) |
|
68 ctx.pos = match.start() + reallen + 1 |
|
69 else: |
|
70 ctx.indent = 0 |
|
71 ctx.pos = match.start() |
|
72 ctx.stack = ctx.stack[:-1] |
|
73 yield match.start(), Whitespace, match.group(0)[1:] |
|
74 |
|
75 def check_indent2(lexer, match, ctx): |
|
76 indent, reallen = CleanLexer.indent_len(match.group(0)) |
|
77 if indent > ctx.indent: |
|
78 yield match.start(), Whitespace, match.group(0) |
|
79 ctx.pos = match.start() + reallen + 1 |
|
80 else: |
|
81 ctx.indent = 0 |
|
82 ctx.pos = match.start() |
|
83 ctx.stack = ctx.stack[:-2] |
|
84 |
|
85 def check_indent3(lexer, match, ctx): |
|
86 indent, reallen = CleanLexer.indent_len(match.group(0)) |
|
87 if indent > ctx.indent: |
|
88 yield match.start(), Whitespace, match.group(0) |
|
89 ctx.pos = match.start() + reallen + 1 |
|
90 else: |
|
91 ctx.indent = 0 |
|
92 ctx.pos = match.start() |
|
93 ctx.stack = ctx.stack[:-3] |
|
94 yield match.start(), Whitespace, match.group(0)[1:] |
|
95 if match.group(0) == '\n\n': |
|
96 ctx.pos = ctx.pos + 1 |
|
97 |
|
98 def skip(lexer, match, ctx): |
|
99 ctx.stack = ctx.stack[:-1] |
|
100 ctx.pos = match.end() |
|
101 yield match.start(), Comment, match.group(0) |
|
102 |
|
103 keywords = ('class', 'instance', 'where', 'with', 'let', 'let!', |
|
104 'in', 'case', 'of', 'infix', 'infixr', 'infixl', 'generic', |
|
105 'derive', 'otherwise', 'code', 'inline') |
|
106 |
|
107 tokens = { |
|
108 'common': [ |
|
109 (r';', Punctuation, '#pop'), |
|
110 (r'//', Comment, 'singlecomment'), |
|
111 ], |
|
112 'root': [ |
|
113 # Comments |
|
114 (r'//.*\n', Comment.Single), |
|
115 (r'(?s)/\*\*.*?\*/', Comment.Special), |
|
116 (r'(?s)/\*.*?\*/', Comment.Multi), |
|
117 |
|
118 # Modules, imports, etc. |
|
119 (r'\b((?:implementation|definition|system)\s+)?(module)(\s+)([\w`.]+)', |
|
120 bygroups(Keyword.Namespace, Keyword.Namespace, Text, Name.Class)), |
|
121 (r'(?<=\n)import(?=\s)', Keyword.Namespace, 'import'), |
|
122 (r'(?<=\n)from(?=\s)', Keyword.Namespace, 'fromimport'), |
|
123 |
|
124 # Keywords |
|
125 # We cannot use (?s)^|(?<=\s) as prefix, so need to repeat this |
|
126 (words(keywords, prefix=r'(?<=\s)', suffix=r'(?=\s)'), Keyword), |
|
127 (words(keywords, prefix=r'^', suffix=r'(?=\s)'), Keyword), |
|
128 |
|
129 # Function definitions |
|
130 (r'(?=\{\|)', Whitespace, 'genericfunction'), |
|
131 (r'(?<=\n)([ \t]*)([\w`$()=\-<>~*\^|+&%]+)((?:\s+\w)*)(\s*)(::)', |
|
132 bygroups(store_indent, Name.Function, Keyword.Type, Whitespace, |
|
133 Punctuation), |
|
134 'functiondefargs'), |
|
135 |
|
136 # Type definitions |
|
137 (r'(?<=\n)([ \t]*)(::)', bygroups(store_indent, Punctuation), 'typedef'), |
|
138 (r'^([ \t]*)(::)', bygroups(store_indent, Punctuation), 'typedef'), |
|
139 |
|
140 # Literals |
|
141 (r'\'\\?.(?<!\\)\'', String.Char), |
|
142 (r'\'\\\d+\'', String.Char), |
|
143 (r'\'\\\\\'', String.Char), # (special case for '\\') |
|
144 (r'[+\-~]?\s*\d+\.\d+(E[+\-~]?\d+)?\b', Number.Float), |
|
145 (r'[+\-~]?\s*0[0-7]\b', Number.Oct), |
|
146 (r'[+\-~]?\s*0x[0-9a-fA-F]\b', Number.Hex), |
|
147 (r'[+\-~]?\s*\d+\b', Number.Integer), |
|
148 (r'"', String.Double, 'doubleqstring'), |
|
149 (words(('True', 'False'), prefix=r'(?<=\s)', suffix=r'(?=\s)'), |
|
150 Literal), |
|
151 |
|
152 # Qualified names |
|
153 (r'(\')([\w.]+)(\'\.)', |
|
154 bygroups(Punctuation, Name.Namespace, Punctuation)), |
|
155 |
|
156 # Everything else is some name |
|
157 (r'([\w`$%/?@]+\.?)*[\w`$%/?@]+', Name), |
|
158 |
|
159 # Punctuation |
|
160 (r'[{}()\[\],:;.#]', Punctuation), |
|
161 (r'[+\-=!<>|&~*\^/]', Operator), |
|
162 (r'\\\\', Operator), |
|
163 |
|
164 # Lambda expressions |
|
165 (r'\\.*?(->|\.|=)', Name.Function), |
|
166 |
|
167 # Whitespace |
|
168 (r'\s', Whitespace), |
|
169 |
|
170 include('common'), |
|
171 ], |
|
172 'fromimport': [ |
|
173 include('common'), |
|
174 (r'([\w`.]+)', check_class_not_import), |
|
175 (r'\n', Whitespace, '#pop'), |
|
176 (r'\s', Whitespace), |
|
177 ], |
|
178 'fromimportfunc': [ |
|
179 include('common'), |
|
180 (r'(::)(\s+)([^,\s]+)', bygroups(Punctuation, Text, Keyword.Type)), |
|
181 (r'([\w`$()=\-<>~*\^|+&%/]+)', check_instance_class), |
|
182 (r',', Punctuation), |
|
183 (r'\n', Whitespace, '#pop'), |
|
184 (r'\s', Whitespace), |
|
185 ], |
|
186 'fromimportfunctype': [ |
|
187 include('common'), |
|
188 (r'[{(\[]', Punctuation, 'combtype'), |
|
189 (r',', Punctuation, '#pop'), |
|
190 (r'[:;.#]', Punctuation), |
|
191 (r'\n', Whitespace, '#pop:2'), |
|
192 (r'[^\S\n]+', Whitespace), |
|
193 (r'\S+', Keyword.Type), |
|
194 ], |
|
195 'combtype': [ |
|
196 include('common'), |
|
197 (r'[})\]]', Punctuation, '#pop'), |
|
198 (r'[{(\[]', Punctuation, '#pop'), |
|
199 (r'[,:;.#]', Punctuation), |
|
200 (r'\s+', Whitespace), |
|
201 (r'\S+', Keyword.Type), |
|
202 ], |
|
203 'import': [ |
|
204 include('common'), |
|
205 (words(('from', 'import', 'as', 'qualified'), |
|
206 prefix='(?<=\s)', suffix='(?=\s)'), Keyword.Namespace), |
|
207 (r'[\w`.]+', Name.Class), |
|
208 (r'\n', Whitespace, '#pop'), |
|
209 (r',', Punctuation), |
|
210 (r'[^\S\n]+', Whitespace), |
|
211 ], |
|
212 'singlecomment': [ |
|
213 (r'(.)(?=\n)', skip), |
|
214 (r'.+(?!\n)', Comment), |
|
215 ], |
|
216 'doubleqstring': [ |
|
217 (r'[^\\"]+', String.Double), |
|
218 (r'"', String.Double, '#pop'), |
|
219 (r'\\.', String.Double), |
|
220 ], |
|
221 'typedef': [ |
|
222 include('common'), |
|
223 (r'[\w`]+', Keyword.Type), |
|
224 (r'[:=|(),\[\]{}!*]', Punctuation), |
|
225 (r'->', Punctuation), |
|
226 (r'\n(?=[^\s|])', Whitespace, '#pop'), |
|
227 (r'\s', Whitespace), |
|
228 (r'.', Keyword.Type), |
|
229 ], |
|
230 'genericfunction': [ |
|
231 include('common'), |
|
232 (r'\{\|', Punctuation), |
|
233 (r'\|\}', Punctuation, '#pop'), |
|
234 (r',', Punctuation), |
|
235 (r'->', Punctuation), |
|
236 (r'(\s+of\s+)(\{)', bygroups(Keyword, Punctuation), 'genericftypes'), |
|
237 (r'\s', Whitespace), |
|
238 (r'[\w`\[\]{}!]+', Keyword.Type), |
|
239 (r'[*()]', Punctuation), |
|
240 ], |
|
241 'genericftypes': [ |
|
242 include('common'), |
|
243 (r'[\w`]+', Keyword.Type), |
|
244 (r',', Punctuation), |
|
245 (r'\s', Whitespace), |
|
246 (r'\}', Punctuation, '#pop'), |
|
247 ], |
|
248 'functiondefargs': [ |
|
249 include('common'), |
|
250 (r'\n(\s*)', check_indent1), |
|
251 (r'[!{}()\[\],:;.#]', Punctuation), |
|
252 (r'->', Punctuation, 'functiondefres'), |
|
253 (r'^(?=\S)', Whitespace, '#pop'), |
|
254 (r'\S', Keyword.Type), |
|
255 (r'\s', Whitespace), |
|
256 ], |
|
257 'functiondefres': [ |
|
258 include('common'), |
|
259 (r'\n(\s*)', check_indent2), |
|
260 (r'^(?=\S)', Whitespace, '#pop:2'), |
|
261 (r'[!{}()\[\],:;.#]', Punctuation), |
|
262 (r'\|', Punctuation, 'functiondefclasses'), |
|
263 (r'\S', Keyword.Type), |
|
264 (r'\s', Whitespace), |
|
265 ], |
|
266 'functiondefclasses': [ |
|
267 include('common'), |
|
268 (r'\n(\s*)', check_indent3), |
|
269 (r'^(?=\S)', Whitespace, '#pop:3'), |
|
270 (r'[,&]', Punctuation), |
|
271 (r'\[', Punctuation, 'functiondefuniquneq'), |
|
272 (r'[\w`$()=\-<>~*\^|+&%/{}\[\]@]', Name.Function, 'functionname'), |
|
273 (r'\s+', Whitespace), |
|
274 ], |
|
275 'functiondefuniquneq': [ |
|
276 include('common'), |
|
277 (r'[a-z]+', Keyword.Type), |
|
278 (r'\s+', Whitespace), |
|
279 (r'<=|,', Punctuation), |
|
280 (r'\]', Punctuation, '#pop') |
|
281 ], |
|
282 'functionname': [ |
|
283 include('common'), |
|
284 (r'[\w`$()=\-<>~*\^|+&%/]+', Name.Function), |
|
285 (r'(?=\{\|)', Punctuation, 'genericfunction'), |
|
286 default('#pop'), |
|
287 ] |
|
288 } |