|
1 # -*- coding: utf-8 -*- |
|
2 """ |
|
3 pygments.lexers.text |
|
4 ~~~~~~~~~~~~~~~~~~~~ |
|
5 |
|
6 Lexers for non-source code file types. |
|
7 |
|
8 :copyright: Copyright 2006-2009 by the Pygments team, see AUTHORS. |
|
9 :license: BSD, see LICENSE for details. |
|
10 """ |
|
11 |
|
12 import re |
|
13 try: |
|
14 set |
|
15 except NameError: |
|
16 from sets import Set as set |
|
17 from bisect import bisect |
|
18 |
|
19 from pygments.lexer import Lexer, LexerContext, RegexLexer, ExtendedRegexLexer, \ |
|
20 bygroups, include, using, this, do_insertions |
|
21 from pygments.token import Punctuation, Text, Comment, Keyword, Name, String, \ |
|
22 Generic, Operator, Number, Whitespace, Literal |
|
23 from pygments.util import get_bool_opt |
|
24 from pygments.lexers.other import BashLexer |
|
25 |
|
26 __all__ = ['IniLexer', 'SourcesListLexer', 'BaseMakefileLexer', |
|
27 'MakefileLexer', 'DiffLexer', 'IrcLogsLexer', 'TexLexer', |
|
28 'GroffLexer', 'ApacheConfLexer', 'BBCodeLexer', 'MoinWikiLexer', |
|
29 'RstLexer', 'VimLexer', 'GettextLexer', 'SquidConfLexer', |
|
30 'DebianControlLexer', 'DarcsPatchLexer', 'YamlLexer', |
|
31 'LighttpdConfLexer', 'NginxConfLexer'] |
|
32 |
|
33 |
|
34 class IniLexer(RegexLexer): |
|
35 """ |
|
36 Lexer for configuration files in INI style. |
|
37 """ |
|
38 |
|
39 name = 'INI' |
|
40 aliases = ['ini', 'cfg'] |
|
41 filenames = ['*.ini', '*.cfg', '*.properties'] |
|
42 mimetypes = ['text/x-ini'] |
|
43 |
|
44 tokens = { |
|
45 'root': [ |
|
46 (r'\s+', Text), |
|
47 (r'[;#].*?$', Comment), |
|
48 (r'\[.*?\]$', Keyword), |
|
49 (r'(.*?)([ \t]*)(=)([ \t]*)(.*?)$', |
|
50 bygroups(Name.Attribute, Text, Operator, Text, String)) |
|
51 ] |
|
52 } |
|
53 |
|
54 def analyse_text(text): |
|
55 npos = text.find('\n') |
|
56 if npos < 3: |
|
57 return False |
|
58 return text[0] == '[' and text[npos-1] == ']' |
|
59 |
|
60 |
|
61 class SourcesListLexer(RegexLexer): |
|
62 """ |
|
63 Lexer that highlights debian sources.list files. |
|
64 |
|
65 *New in Pygments 0.7.* |
|
66 """ |
|
67 |
|
68 name = 'Debian Sourcelist' |
|
69 aliases = ['sourceslist', 'sources.list'] |
|
70 filenames = ['sources.list'] |
|
71 mimetype = ['application/x-debian-sourceslist'] |
|
72 |
|
73 tokens = { |
|
74 'root': [ |
|
75 (r'\s+', Text), |
|
76 (r'#.*?$', Comment), |
|
77 (r'^(deb(?:-src)?)(\s+)', |
|
78 bygroups(Keyword, Text), 'distribution') |
|
79 ], |
|
80 'distribution': [ |
|
81 (r'#.*?$', Comment, '#pop'), |
|
82 (r'\$\(ARCH\)', Name.Variable), |
|
83 (r'[^\s$[]+', String), |
|
84 (r'\[', String.Other, 'escaped-distribution'), |
|
85 (r'\$', String), |
|
86 (r'\s+', Text, 'components') |
|
87 ], |
|
88 'escaped-distribution': [ |
|
89 (r'\]', String.Other, '#pop'), |
|
90 (r'\$\(ARCH\)', Name.Variable), |
|
91 (r'[^\]$]+', String.Other), |
|
92 (r'\$', String.Other) |
|
93 ], |
|
94 'components': [ |
|
95 (r'#.*?$', Comment, '#pop:2'), |
|
96 (r'$', Text, '#pop:2'), |
|
97 (r'\s+', Text), |
|
98 (r'\S+', Keyword.Pseudo), |
|
99 ] |
|
100 } |
|
101 |
|
102 def analyse_text(text): |
|
103 for line in text.split('\n'): |
|
104 line = line.strip() |
|
105 if not (line.startswith('#') or line.startswith('deb ') or |
|
106 line.startswith('deb-src ') or not line): |
|
107 return False |
|
108 return True |
|
109 |
|
110 |
|
111 class MakefileLexer(Lexer): |
|
112 """ |
|
113 Lexer for BSD and GNU make extensions (lenient enough to handle both in |
|
114 the same file even). |
|
115 |
|
116 *Rewritten in Pygments 0.10.* |
|
117 """ |
|
118 |
|
119 name = 'Makefile' |
|
120 aliases = ['make', 'makefile', 'mf', 'bsdmake'] |
|
121 filenames = ['*.mak', 'Makefile', 'makefile', 'Makefile.*', 'GNUmakefile'] |
|
122 mimetypes = ['text/x-makefile'] |
|
123 |
|
124 r_special = re.compile(r'^(?:' |
|
125 # BSD Make |
|
126 r'\.\s*(include|undef|error|warning|if|else|elif|endif|for|endfor)|' |
|
127 # GNU Make |
|
128 r'\s*(ifeq|ifneq|ifdef|ifndef|else|endif|-?include|define|endef|:))(?=\s)') |
|
129 r_comment = re.compile(r'^\s*@?#') |
|
130 |
|
131 def get_tokens_unprocessed(self, text): |
|
132 ins = [] |
|
133 lines = text.splitlines(True) |
|
134 done = '' |
|
135 lex = BaseMakefileLexer(**self.options) |
|
136 backslashflag = False |
|
137 for line in lines: |
|
138 if self.r_special.match(line) or backslashflag: |
|
139 ins.append((len(done), [(0, Comment.Preproc, line)])) |
|
140 backslashflag = line.strip().endswith('\\') |
|
141 elif self.r_comment.match(line): |
|
142 ins.append((len(done), [(0, Comment, line)])) |
|
143 else: |
|
144 done += line |
|
145 for item in do_insertions(ins, lex.get_tokens_unprocessed(done)): |
|
146 yield item |
|
147 |
|
148 |
|
149 class BaseMakefileLexer(RegexLexer): |
|
150 """ |
|
151 Lexer for simple Makefiles (no preprocessing). |
|
152 |
|
153 *New in Pygments 0.10.* |
|
154 """ |
|
155 |
|
156 name = 'Makefile' |
|
157 aliases = ['basemake'] |
|
158 filenames = [] |
|
159 mimetypes = [] |
|
160 |
|
161 tokens = { |
|
162 'root': [ |
|
163 (r'^(?:[\t ]+.*\n|\n)+', using(BashLexer)), |
|
164 (r'\$\((?:.*\\\n|.*\n)+', using(BashLexer)), |
|
165 (r'\s+', Text), |
|
166 (r'#.*?\n', Comment), |
|
167 (r'(export)(\s+)(?=[a-zA-Z0-9_${}\t -]+\n)', |
|
168 bygroups(Keyword, Text), 'export'), |
|
169 (r'export\s+', Keyword), |
|
170 # assignment |
|
171 (r'([a-zA-Z0-9_${}.-]+)(\s*)([!?:+]?=)([ \t]*)((?:.*\\\n|.*\n)+)', |
|
172 bygroups(Name.Variable, Text, Operator, Text, using(BashLexer))), |
|
173 # strings |
|
174 (r'"(\\\\|\\"|[^"])*"', String.Double), |
|
175 (r"'(\\\\|\\'|[^'])*'", String.Single), |
|
176 # targets |
|
177 (r'([^\n:]+)(:+)([ \t]*)', bygroups(Name.Function, Operator, Text), |
|
178 'block-header'), |
|
179 # TODO: add paren handling (grr) |
|
180 ], |
|
181 'export': [ |
|
182 (r'[a-zA-Z0-9_${}-]+', Name.Variable), |
|
183 (r'\n', Text, '#pop'), |
|
184 (r'\s+', Text), |
|
185 ], |
|
186 'block-header': [ |
|
187 (r'[^,\\\n#]+', Number), |
|
188 (r',', Punctuation), |
|
189 (r'#.*?\n', Comment), |
|
190 (r'\\\n', Text), # line continuation |
|
191 (r'\\.', Text), |
|
192 (r'(?:[\t ]+.*\n|\n)+', using(BashLexer), '#pop'), |
|
193 ], |
|
194 } |
|
195 |
|
196 |
|
197 class DiffLexer(RegexLexer): |
|
198 """ |
|
199 Lexer for unified or context-style diffs or patches. |
|
200 """ |
|
201 |
|
202 name = 'Diff' |
|
203 aliases = ['diff', 'udiff'] |
|
204 filenames = ['*.diff', '*.patch'] |
|
205 mimetypes = ['text/x-diff', 'text/x-patch'] |
|
206 |
|
207 tokens = { |
|
208 'root': [ |
|
209 (r' .*\n', Text), |
|
210 (r'\+.*\n', Generic.Inserted), |
|
211 (r'-.*\n', Generic.Deleted), |
|
212 (r'!.*\n', Generic.Strong), |
|
213 (r'@.*\n', Generic.Subheading), |
|
214 (r'([Ii]ndex|diff).*\n', Generic.Heading), |
|
215 (r'=.*\n', Generic.Heading), |
|
216 (r'.*\n', Text), |
|
217 ] |
|
218 } |
|
219 |
|
220 def analyse_text(text): |
|
221 if text[:7] == 'Index: ': |
|
222 return True |
|
223 if text[:5] == 'diff ': |
|
224 return True |
|
225 if text[:4] == '--- ': |
|
226 return 0.9 |
|
227 |
|
228 |
|
229 DPATCH_KEYWORDS = ['hunk', 'addfile', 'adddir', 'rmfile', 'rmdir', 'move', |
|
230 'replace'] |
|
231 |
|
232 class DarcsPatchLexer(RegexLexer): |
|
233 """ |
|
234 DarcsPatchLexer is a lexer for the various versions of the darcs patch |
|
235 format. Examples of this format are derived by commands such as |
|
236 ``darcs annotate --patch`` and ``darcs send``. |
|
237 |
|
238 *New in Pygments 0.10.* |
|
239 """ |
|
240 name = 'Darcs Patch' |
|
241 aliases = ['dpatch'] |
|
242 filenames = ['*.dpatch', '*.darcspatch'] |
|
243 |
|
244 tokens = { |
|
245 'root': [ |
|
246 (r'<', Operator), |
|
247 (r'>', Operator), |
|
248 (r'{', Operator), |
|
249 (r'}', Operator), |
|
250 (r'(\[)((?:TAG )?)(.*)(\n)(.*)(\*\*)(\d+)(\s?)(\])', |
|
251 bygroups(Operator, Keyword, Name, Text, Name, Operator, |
|
252 Literal.Date, Text, Operator)), |
|
253 (r'(\[)((?:TAG )?)(.*)(\n)(.*)(\*\*)(\d+)(\s?)', |
|
254 bygroups(Operator, Keyword, Name, Text, Name, Operator, |
|
255 Literal.Date, Text), 'comment'), |
|
256 (r'New patches:', Generic.Heading), |
|
257 (r'Context:', Generic.Heading), |
|
258 (r'Patch bundle hash:', Generic.Heading), |
|
259 (r'(\s*)(%s)(.*\n)' % '|'.join(DPATCH_KEYWORDS), |
|
260 bygroups(Text, Keyword, Text)), |
|
261 (r'\+', Generic.Inserted, "insert"), |
|
262 (r'-', Generic.Deleted, "delete"), |
|
263 (r'.*\n', Text), |
|
264 ], |
|
265 'comment': [ |
|
266 (r'[^\]].*\n', Comment), |
|
267 (r'\]', Operator, "#pop"), |
|
268 ], |
|
269 'specialText': [ # darcs add [_CODE_] special operators for clarity |
|
270 (r'\n', Text, "#pop"), # line-based |
|
271 (r'\[_[^_]*_]', Operator), |
|
272 ], |
|
273 'insert': [ |
|
274 include('specialText'), |
|
275 (r'\[', Generic.Inserted), |
|
276 (r'[^\n\[]*', Generic.Inserted), |
|
277 ], |
|
278 'delete': [ |
|
279 include('specialText'), |
|
280 (r'\[', Generic.Deleted), |
|
281 (r'[^\n\[]*', Generic.Deleted), |
|
282 ], |
|
283 } |
|
284 |
|
285 |
|
286 class IrcLogsLexer(RegexLexer): |
|
287 """ |
|
288 Lexer for IRC logs in *irssi*, *xchat* or *weechat* style. |
|
289 """ |
|
290 |
|
291 name = 'IRC logs' |
|
292 aliases = ['irc'] |
|
293 filenames = ['*.weechatlog'] |
|
294 mimetypes = ['text/x-irclog'] |
|
295 |
|
296 flags = re.VERBOSE | re.MULTILINE |
|
297 timestamp = r""" |
|
298 ( |
|
299 # irssi / xchat and others |
|
300 (?: \[|\()? # Opening bracket or paren for the timestamp |
|
301 (?: # Timestamp |
|
302 (?: (?:\d{1,4} [-/]?)+ # Date as - or /-separated groups of digits |
|
303 [T ])? # Date/time separator: T or space |
|
304 (?: \d?\d [:.]?)+ # Time as :/.-separated groups of 1 or 2 digits |
|
305 ) |
|
306 (?: \]|\))?\s+ # Closing bracket or paren for the timestamp |
|
307 | |
|
308 # weechat |
|
309 \d{4}\s\w{3}\s\d{2}\s # Date |
|
310 \d{2}:\d{2}:\d{2}\s+ # Time + Whitespace |
|
311 | |
|
312 # xchat |
|
313 \w{3}\s\d{2}\s # Date |
|
314 \d{2}:\d{2}:\d{2}\s+ # Time + Whitespace |
|
315 )? |
|
316 """ |
|
317 tokens = { |
|
318 'root': [ |
|
319 # log start/end |
|
320 (r'^\*\*\*\*(.*)\*\*\*\*$', Comment), |
|
321 # hack |
|
322 ("^" + timestamp + r'(\s*<[^>]*>\s*)$', bygroups(Comment.Preproc, Name.Tag)), |
|
323 # normal msgs |
|
324 ("^" + timestamp + r""" |
|
325 (\s*<.*?>\s*) # Nick """, |
|
326 bygroups(Comment.Preproc, Name.Tag), 'msg'), |
|
327 # /me msgs |
|
328 ("^" + timestamp + r""" |
|
329 (\s*[*]\s+) # Star |
|
330 ([^\s]+\s+.*?\n) # Nick + rest of message """, |
|
331 bygroups(Comment.Preproc, Keyword, Generic.Inserted)), |
|
332 # join/part msgs |
|
333 ("^" + timestamp + r""" |
|
334 (\s*(?:\*{3}|<?-[!@=P]?->?)\s*) # Star(s) or symbols |
|
335 ([^\s]+\s+) # Nick + Space |
|
336 (.*?\n) # Rest of message """, |
|
337 bygroups(Comment.Preproc, Keyword, String, Comment)), |
|
338 (r"^.*?\n", Text), |
|
339 ], |
|
340 'msg': [ |
|
341 (r"[^\s]+:(?!//)", Name.Attribute), # Prefix |
|
342 (r".*\n", Text, '#pop'), |
|
343 ], |
|
344 } |
|
345 |
|
346 |
|
347 class BBCodeLexer(RegexLexer): |
|
348 """ |
|
349 A lexer that highlights BBCode(-like) syntax. |
|
350 |
|
351 *New in Pygments 0.6.* |
|
352 """ |
|
353 |
|
354 name = 'BBCode' |
|
355 aliases = ['bbcode'] |
|
356 mimetypes = ['text/x-bbcode'] |
|
357 |
|
358 tokens = { |
|
359 'root' : [ |
|
360 (r'[\s\w]+', Text), |
|
361 (r'(\[)(/?[^\]\n\r=]+)(\])', |
|
362 bygroups(Keyword, Keyword.Pseudo, Keyword)), |
|
363 (r'(\[)([^\]\n\r=]+)(=)([^\]\n\r]+)(\])', |
|
364 bygroups(Keyword, Keyword.Pseudo, Operator, String, Keyword)), |
|
365 ], |
|
366 } |
|
367 |
|
368 |
|
369 class TexLexer(RegexLexer): |
|
370 """ |
|
371 Lexer for the TeX and LaTeX typesetting languages. |
|
372 """ |
|
373 |
|
374 name = 'TeX' |
|
375 aliases = ['tex', 'latex'] |
|
376 filenames = ['*.tex', '*.aux', '*.toc'] |
|
377 mimetypes = ['text/x-tex', 'text/x-latex'] |
|
378 |
|
379 tokens = { |
|
380 'general': [ |
|
381 (r'%.*?\n', Comment), |
|
382 (r'[{}]', Name.Builtin), |
|
383 (r'[&_^]', Name.Builtin), |
|
384 ], |
|
385 'root': [ |
|
386 (r'\\\[', String.Backtick, 'displaymath'), |
|
387 (r'\\\(', String, 'inlinemath'), |
|
388 (r'\$\$', String.Backtick, 'displaymath'), |
|
389 (r'\$', String, 'inlinemath'), |
|
390 (r'\\([a-zA-Z]+|.)', Keyword, 'command'), |
|
391 include('general'), |
|
392 (r'[^\\$%&_^{}]+', Text), |
|
393 ], |
|
394 'math': [ |
|
395 (r'\\([a-zA-Z]+|.)', Name.Variable), |
|
396 include('general'), |
|
397 (r'[0-9]+', Number), |
|
398 (r'[-=!+*/()\[\]]', Operator), |
|
399 (r'[^=!+*/()\[\]\\$%&_^{}0-9-]+', Name.Builtin), |
|
400 ], |
|
401 'inlinemath': [ |
|
402 (r'\\\)', String, '#pop'), |
|
403 (r'\$', String, '#pop'), |
|
404 include('math'), |
|
405 ], |
|
406 'displaymath': [ |
|
407 (r'\\\]', String, '#pop'), |
|
408 (r'\$\$', String, '#pop'), |
|
409 (r'\$', Name.Builtin), |
|
410 include('math'), |
|
411 ], |
|
412 'command': [ |
|
413 (r'\[.*?\]', Name.Attribute), |
|
414 (r'\*', Keyword), |
|
415 (r'', Text, '#pop'), |
|
416 ], |
|
417 } |
|
418 |
|
419 def analyse_text(text): |
|
420 for start in ("\\documentclass", "\\input", "\\documentstyle", |
|
421 "\\relax"): |
|
422 if text[:len(start)] == start: |
|
423 return True |
|
424 |
|
425 |
|
426 class GroffLexer(RegexLexer): |
|
427 """ |
|
428 Lexer for the (g)roff typesetting language, supporting groff |
|
429 extensions. Mainly useful for highlighting manpage sources. |
|
430 |
|
431 *New in Pygments 0.6.* |
|
432 """ |
|
433 |
|
434 name = 'Groff' |
|
435 aliases = ['groff', 'nroff', 'man'] |
|
436 filenames = ['*.[1234567]', '*.man'] |
|
437 mimetypes = ['application/x-troff', 'text/troff'] |
|
438 |
|
439 tokens = { |
|
440 'root': [ |
|
441 (r'(?i)(\.)(\w+)', bygroups(Text, Keyword), 'request'), |
|
442 (r'\.', Punctuation, 'request'), |
|
443 # Regular characters, slurp till we find a backslash or newline |
|
444 (r'[^\\\n]*', Text, 'textline'), |
|
445 ], |
|
446 'textline': [ |
|
447 include('escapes'), |
|
448 (r'[^\\\n]+', Text), |
|
449 (r'\n', Text, '#pop'), |
|
450 ], |
|
451 'escapes': [ |
|
452 # groff has many ways to write escapes. |
|
453 (r'\\"[^\n]*', Comment), |
|
454 (r'\\[fn]\w', String.Escape), |
|
455 (r'\\\(..', String.Escape), |
|
456 (r'\\.\[.*\]', String.Escape), |
|
457 (r'\\.', String.Escape), |
|
458 (r'\\\n', Text, 'request'), |
|
459 ], |
|
460 'request': [ |
|
461 (r'\n', Text, '#pop'), |
|
462 include('escapes'), |
|
463 (r'"[^\n"]+"', String.Double), |
|
464 (r'\d+', Number), |
|
465 (r'\S+', String), |
|
466 (r'\s+', Text), |
|
467 ], |
|
468 } |
|
469 |
|
470 def analyse_text(text): |
|
471 if text[:1] != '.': |
|
472 return False |
|
473 if text[:3] == '.\\"': |
|
474 return True |
|
475 if text[:4] == '.TH ': |
|
476 return True |
|
477 if text[1:3].isalnum() and text[3].isspace(): |
|
478 return 0.9 |
|
479 |
|
480 |
|
481 class ApacheConfLexer(RegexLexer): |
|
482 """ |
|
483 Lexer for configuration files following the Apache config file |
|
484 format. |
|
485 |
|
486 *New in Pygments 0.6.* |
|
487 """ |
|
488 |
|
489 name = 'ApacheConf' |
|
490 aliases = ['apacheconf', 'aconf', 'apache'] |
|
491 filenames = ['.htaccess', 'apache.conf', 'apache2.conf'] |
|
492 mimetypes = ['text/x-apacheconf'] |
|
493 flags = re.MULTILINE | re.IGNORECASE |
|
494 |
|
495 tokens = { |
|
496 'root': [ |
|
497 (r'\s+', Text), |
|
498 (r'(#.*?)$', Comment), |
|
499 (r'(<[^\s>]+)(?:(\s+)(.*?))?(>)', |
|
500 bygroups(Name.Tag, Text, String, Name.Tag)), |
|
501 (r'([a-zA-Z][a-zA-Z0-9]*)(\s+)', |
|
502 bygroups(Name.Builtin, Text), 'value'), |
|
503 (r'\.+', Text), |
|
504 ], |
|
505 'value': [ |
|
506 (r'$', Text, '#pop'), |
|
507 (r'[^\S\n]+', Text), |
|
508 (r'\d+\.\d+\.\d+\.\d+(?:/\d+)?', Number), |
|
509 (r'\d+', Number), |
|
510 (r'/([a-zA-Z0-9][a-zA-Z0-9_./-]+)', String.Other), |
|
511 (r'(on|off|none|any|all|double|email|dns|min|minimal|' |
|
512 r'os|productonly|full|emerg|alert|crit|error|warn|' |
|
513 r'notice|info|debug|registry|script|inetd|standalone|' |
|
514 r'user|group)\b', Keyword), |
|
515 (r'"([^"\\]*(?:\\.[^"\\]*)*)"', String.Double), |
|
516 (r'[^\s"]+', Text) |
|
517 ] |
|
518 } |
|
519 |
|
520 |
|
521 class MoinWikiLexer(RegexLexer): |
|
522 """ |
|
523 For MoinMoin (and Trac) Wiki markup. |
|
524 |
|
525 *New in Pygments 0.7.* |
|
526 """ |
|
527 |
|
528 name = 'MoinMoin/Trac Wiki markup' |
|
529 aliases = ['trac-wiki', 'moin'] |
|
530 filenames = [] |
|
531 mimetypes = ['text/x-trac-wiki'] |
|
532 flags = re.MULTILINE | re.IGNORECASE |
|
533 |
|
534 tokens = { |
|
535 'root': [ |
|
536 (r'^#.*$', Comment), |
|
537 (r'(!)(\S+)', bygroups(Keyword, Text)), # Ignore-next |
|
538 # Titles |
|
539 (r'^(=+)([^=]+)(=+)(\s*#.+)?$', |
|
540 bygroups(Generic.Heading, using(this), Generic.Heading, String)), |
|
541 # Literal code blocks, with optional shebang |
|
542 (r'({{{)(\n#!.+)?', bygroups(Name.Builtin, Name.Namespace), 'codeblock'), |
|
543 (r'(\'\'\'?|\|\||`|__|~~|\^|,,|::)', Comment), # Formatting |
|
544 # Lists |
|
545 (r'^( +)([.*-])( )', bygroups(Text, Name.Builtin, Text)), |
|
546 (r'^( +)([a-zivx]{1,5}\.)( )', bygroups(Text, Name.Builtin, Text)), |
|
547 # Other Formatting |
|
548 (r'\[\[\w+.*?\]\]', Keyword), # Macro |
|
549 (r'(\[[^\s\]]+)(\s+[^\]]+?)?(\])', |
|
550 bygroups(Keyword, String, Keyword)), # Link |
|
551 (r'^----+$', Keyword), # Horizontal rules |
|
552 (r'[^\n\'\[{!_~^,|]+', Text), |
|
553 (r'\n', Text), |
|
554 (r'.', Text), |
|
555 ], |
|
556 'codeblock': [ |
|
557 (r'}}}', Name.Builtin, '#pop'), |
|
558 # these blocks are allowed to be nested in Trac, but not MoinMoin |
|
559 (r'{{{', Text, '#push'), |
|
560 (r'[^{}]+', Comment.Preproc), # slurp boring text |
|
561 (r'.', Comment.Preproc), # allow loose { or } |
|
562 ], |
|
563 } |
|
564 |
|
565 |
|
566 class RstLexer(RegexLexer): |
|
567 """ |
|
568 For `reStructuredText <http://docutils.sf.net/rst.html>`_ markup. |
|
569 |
|
570 *New in Pygments 0.7.* |
|
571 |
|
572 Additional options accepted: |
|
573 |
|
574 `handlecodeblocks` |
|
575 Highlight the contents of ``.. sourcecode:: langauge`` and |
|
576 ``.. code:: language`` directives with a lexer for the given |
|
577 language (default: ``True``). *New in Pygments 0.8.* |
|
578 """ |
|
579 name = 'reStructuredText' |
|
580 aliases = ['rst', 'rest', 'restructuredtext'] |
|
581 filenames = ['*.rst', '*.rest'] |
|
582 mimetypes = ["text/x-rst", "text/prs.fallenstein.rst"] |
|
583 flags = re.MULTILINE |
|
584 |
|
585 def _handle_sourcecode(self, match): |
|
586 from pygments.lexers import get_lexer_by_name |
|
587 from pygments.util import ClassNotFound |
|
588 |
|
589 # section header |
|
590 yield match.start(1), Punctuation, match.group(1) |
|
591 yield match.start(2), Text, match.group(2) |
|
592 yield match.start(3), Operator.Word, match.group(3) |
|
593 yield match.start(4), Punctuation, match.group(4) |
|
594 yield match.start(5), Text, match.group(5) |
|
595 yield match.start(6), Keyword, match.group(6) |
|
596 yield match.start(7), Text, match.group(7) |
|
597 |
|
598 # lookup lexer if wanted and existing |
|
599 lexer = None |
|
600 if self.handlecodeblocks: |
|
601 try: |
|
602 lexer = get_lexer_by_name(match.group(6).strip()) |
|
603 except ClassNotFound: |
|
604 pass |
|
605 indention = match.group(8) |
|
606 indention_size = len(indention) |
|
607 code = (indention + match.group(9) + match.group(10) + match.group(11)) |
|
608 |
|
609 # no lexer for this language. handle it like it was a code block |
|
610 if lexer is None: |
|
611 yield match.start(8), String, code |
|
612 return |
|
613 |
|
614 # highlight the lines with the lexer. |
|
615 ins = [] |
|
616 codelines = code.splitlines(True) |
|
617 code = '' |
|
618 for line in codelines: |
|
619 if len(line) > indention_size: |
|
620 ins.append((len(code), [(0, Text, line[:indention_size])])) |
|
621 code += line[indention_size:] |
|
622 else: |
|
623 code += line |
|
624 for item in do_insertions(ins, lexer.get_tokens_unprocessed(code)): |
|
625 yield item |
|
626 |
|
627 tokens = { |
|
628 'root': [ |
|
629 # Heading with overline |
|
630 (r'^(=+|-+|`+|:+|\.+|\'+|"+|~+|\^+|_+|\*+|\++|#+)([ \t]*\n)(.+)(\n)(\1)(\n)', |
|
631 bygroups(Generic.Heading, Text, Generic.Heading, |
|
632 Text, Generic.Heading, Text)), |
|
633 # Plain heading |
|
634 (r'^(\S.*)(\n)(={3,}|-{3,}|`{3,}|:{3,}|\.{3,}|\'{3,}|"{3,}|' |
|
635 r'~{3,}|\^{3,}|_{3,}|\*{3,}|\+{3,}|#{3,})(\n)', |
|
636 bygroups(Generic.Heading, Text, Generic.Heading, Text)), |
|
637 # Bulleted lists |
|
638 (r'^(\s*)([-*+])( .+\n(?:\1 .+\n)*)', |
|
639 bygroups(Text, Number, using(this, state='inline'))), |
|
640 # Numbered lists |
|
641 (r'^(\s*)([0-9#ivxlcmIVXLCM]+\.)( .+\n(?:\1 .+\n)*)', |
|
642 bygroups(Text, Number, using(this, state='inline'))), |
|
643 (r'^(\s*)(\(?[0-9#ivxlcmIVXLCM]+\))( .+\n(?:\1 .+\n)*)', |
|
644 bygroups(Text, Number, using(this, state='inline'))), |
|
645 # Numbered, but keep words at BOL from becoming lists |
|
646 (r'^(\s*)([A-Z]+\.)( .+\n(?:\1 .+\n)+)', |
|
647 bygroups(Text, Number, using(this, state='inline'))), |
|
648 (r'^(\s*)(\(?[A-Za-z]+\))( .+\n(?:\1 .+\n)+)', |
|
649 bygroups(Text, Number, using(this, state='inline'))), |
|
650 # Sourcecode directives |
|
651 (r'^( *\.\.)(\s*)((?:source)?code)(::)([ \t]*)([^\n]+)' |
|
652 r'(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\8.*|)\n)+)', |
|
653 _handle_sourcecode), |
|
654 # A directive |
|
655 (r'^( *\.\.)(\s*)([\w-]+)(::)(?:([ \t]*)(.+))?', |
|
656 bygroups(Punctuation, Text, Operator.Word, Punctuation, Text, Keyword)), |
|
657 # A reference target |
|
658 (r'^( *\.\.)(\s*)([\w\t ]+:)(.*?)$', |
|
659 bygroups(Punctuation, Text, Name.Tag, using(this, state='inline'))), |
|
660 # A footnote target |
|
661 (r'^( *\.\.)(\s*)(\[.+\])(.*?)$', |
|
662 bygroups(Punctuation, Text, Name.Tag, using(this, state='inline'))), |
|
663 # Comments |
|
664 (r'^ *\.\..*(\n( +.*\n|\n)+)?', Comment.Preproc), |
|
665 # Field list |
|
666 (r'^( *)(:.*?:)([ \t]+)(.*?)$', bygroups(Text, Name.Class, Text, |
|
667 Name.Function)), |
|
668 # Definition list |
|
669 (r'^([^ ].*(?<!::)\n)((?:(?: +.*)\n)+)', |
|
670 bygroups(using(this, state='inline'), using(this, state='inline'))), |
|
671 # Code blocks |
|
672 (r'(::)(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\3.*|)\n)+)', |
|
673 bygroups(String.Escape, Text, String, String, Text, String)), |
|
674 include('inline'), |
|
675 ], |
|
676 'inline': [ |
|
677 (r'\\.', Text), # escape |
|
678 (r'``', String, 'literal'), # code |
|
679 (r'(`)(.+?)(`__?)', |
|
680 bygroups(Punctuation, using(this), Punctuation)), # reference |
|
681 (r'(`.+?`)(:[a-zA-Z0-9-]+?:)?', |
|
682 bygroups(Name.Variable, Name.Attribute)), # role |
|
683 (r'(:[a-zA-Z0-9-]+?:)(`.+?`)', |
|
684 bygroups(Name.Attribute, Name.Variable)), # user-defined role |
|
685 (r'\*\*.+?\*\*', Generic.Strong), # Strong emphasis |
|
686 (r'\*.+?\*', Generic.Emph), # Emphasis |
|
687 (r'\[.*?\]_', String), # Footnote or citation |
|
688 (r'<.+?>', Name.Tag), # Hyperlink |
|
689 (r'[^\\\n\[*`:]+', Text), |
|
690 (r'.', Text), |
|
691 ], |
|
692 'literal': [ |
|
693 (r'[^`\\]+', String), |
|
694 (r'\\.', String), |
|
695 (r'``', String, '#pop'), |
|
696 (r'[`\\]', String), |
|
697 ] |
|
698 } |
|
699 |
|
700 def __init__(self, **options): |
|
701 self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True) |
|
702 RegexLexer.__init__(self, **options) |
|
703 |
|
704 def analyse_text(text): |
|
705 if text[:2] == '..' and text[2:3] != '.': |
|
706 return 0.3 |
|
707 p1 = text.find("\n") |
|
708 p2 = text.find("\n", p1 + 1) |
|
709 if (p2 > -1 and # has two lines |
|
710 p1 * 2 + 1 == p2 and # they are the same length |
|
711 text[p1+1] in '-=' and # the next line both starts and ends with |
|
712 text[p1+1] == text[p2-1]): # ...a sufficiently high header |
|
713 return 0.5 |
|
714 |
|
715 |
|
716 class VimLexer(RegexLexer): |
|
717 """ |
|
718 Lexer for VimL script files. |
|
719 |
|
720 *New in Pygments 0.8.* |
|
721 """ |
|
722 name = 'VimL' |
|
723 aliases = ['vim'] |
|
724 filenames = ['*.vim', '.vimrc'] |
|
725 mimetypes = ['text/x-vim'] |
|
726 flags = re.MULTILINE |
|
727 |
|
728 tokens = { |
|
729 'root': [ |
|
730 # Who decided that doublequote was a good comment character?? |
|
731 (r'^\s*".*', Comment), |
|
732 (r'(?<=\s)"[^\-:.%#=*].*', Comment), |
|
733 |
|
734 (r'[ \t]+', Text), |
|
735 # TODO: regexes can have other delims |
|
736 (r'/(\\\\|\\/|[^\n/])*/', String.Regex), |
|
737 (r'"(\\\\|\\"|[^\n"])*"', String.Double), |
|
738 (r"'(\\\\|\\'|[^\n'])*'", String.Single), |
|
739 (r'-?\d+', Number), |
|
740 (r'#[0-9a-f]{6}', Number.Hex), |
|
741 (r'^:', Punctuation), |
|
742 (r'[()<>+=!|,~-]', Punctuation), # Inexact list. Looks decent. |
|
743 (r'\b(let|if|else|endif|elseif|fun|function|endfunction)\b', |
|
744 Keyword), |
|
745 (r'\b(NONE|bold|italic|underline|dark|light)\b', Name.Builtin), |
|
746 (r'\b\w+\b', Name.Other), # These are postprocessed below |
|
747 (r'.', Text), |
|
748 ], |
|
749 } |
|
750 def __init__(self, **options): |
|
751 from pygments.lexers._vimbuiltins import command, option, auto |
|
752 self._cmd = command |
|
753 self._opt = option |
|
754 self._aut = auto |
|
755 |
|
756 RegexLexer.__init__(self, **options) |
|
757 |
|
758 def is_in(self, w, mapping): |
|
759 r""" |
|
760 It's kind of difficult to decide if something might be a keyword |
|
761 in VimL because it allows you to abbreviate them. In fact, |
|
762 'ab[breviate]' is a good example. :ab, :abbre, or :abbreviate are |
|
763 valid ways to call it so rather than making really awful regexps |
|
764 like:: |
|
765 |
|
766 \bab(?:b(?:r(?:e(?:v(?:i(?:a(?:t(?:e)?)?)?)?)?)?)?)?\b |
|
767 |
|
768 we match `\b\w+\b` and then call is_in() on those tokens. See |
|
769 `scripts/get_vimkw.py` for how the lists are extracted. |
|
770 """ |
|
771 p = bisect(mapping, (w,)) |
|
772 if p > 0: |
|
773 if mapping[p-1][0] == w[:len(mapping[p-1][0])] and \ |
|
774 mapping[p-1][1][:len(w)] == w: return True |
|
775 if p < len(mapping): |
|
776 return mapping[p][0] == w[:len(mapping[p][0])] and \ |
|
777 mapping[p][1][:len(w)] == w |
|
778 return False |
|
779 |
|
780 def get_tokens_unprocessed(self, text): |
|
781 # TODO: builtins are only subsequent tokens on lines |
|
782 # and 'keywords' only happen at the beginning except |
|
783 # for :au ones |
|
784 for index, token, value in \ |
|
785 RegexLexer.get_tokens_unprocessed(self, text): |
|
786 if token is Name.Other: |
|
787 if self.is_in(value, self._cmd): |
|
788 yield index, Keyword, value |
|
789 elif self.is_in(value, self._opt) or \ |
|
790 self.is_in(value, self._aut): |
|
791 yield index, Name.Builtin, value |
|
792 else: |
|
793 yield index, Text, value |
|
794 else: |
|
795 yield index, token, value |
|
796 |
|
797 |
|
798 class GettextLexer(RegexLexer): |
|
799 """ |
|
800 Lexer for Gettext catalog files. |
|
801 |
|
802 *New in Pygments 0.9.* |
|
803 """ |
|
804 name = 'Gettext Catalog' |
|
805 aliases = ['pot', 'po'] |
|
806 filenames = ['*.pot', '*.po'] |
|
807 mimetypes = ['application/x-gettext', 'text/x-gettext', 'text/gettext'] |
|
808 |
|
809 tokens = { |
|
810 'root': [ |
|
811 (r'^#,\s.*?$', Keyword.Type), |
|
812 (r'^#:\s.*?$', Keyword.Declaration), |
|
813 #(r'^#$', Comment), |
|
814 (r'^(#|#\.\s|#\|\s|#~\s|#\s).*$', Comment.Single), |
|
815 (r'^(")([\w-]*:)(.*")$', |
|
816 bygroups(String, Name.Property, String)), |
|
817 (r'^".*"$', String), |
|
818 (r'^(msgid|msgid_plural|msgstr)(\s+)(".*")$', |
|
819 bygroups(Name.Variable, Text, String)), |
|
820 (r'^(msgstr\[)(\d)(\])(\s+)(".*")$', |
|
821 bygroups(Name.Variable, Number.Integer, Name.Variable, Text, String)), |
|
822 ] |
|
823 } |
|
824 |
|
825 |
|
826 class SquidConfLexer(RegexLexer): |
|
827 """ |
|
828 Lexer for `squid <http://www.squid-cache.org/>`_ configuration files. |
|
829 |
|
830 *New in Pygments 0.9.* |
|
831 """ |
|
832 |
|
833 name = 'SquidConf' |
|
834 aliases = ['squidconf', 'squid.conf', 'squid'] |
|
835 filenames = ['squid.conf'] |
|
836 mimetypes = ['text/x-squidconf'] |
|
837 flags = re.IGNORECASE |
|
838 |
|
839 keywords = [ "acl", "always_direct", "announce_host", |
|
840 "announce_period", "announce_port", "announce_to", |
|
841 "anonymize_headers", "append_domain", "as_whois_server", |
|
842 "auth_param_basic", "authenticate_children", |
|
843 "authenticate_program", "authenticate_ttl", "broken_posts", |
|
844 "buffered_logs", "cache_access_log", "cache_announce", |
|
845 "cache_dir", "cache_dns_program", "cache_effective_group", |
|
846 "cache_effective_user", "cache_host", "cache_host_acl", |
|
847 "cache_host_domain", "cache_log", "cache_mem", |
|
848 "cache_mem_high", "cache_mem_low", "cache_mgr", |
|
849 "cachemgr_passwd", "cache_peer", "cache_peer_access", |
|
850 "cahce_replacement_policy", "cache_stoplist", |
|
851 "cache_stoplist_pattern", "cache_store_log", "cache_swap", |
|
852 "cache_swap_high", "cache_swap_log", "cache_swap_low", |
|
853 "client_db", "client_lifetime", "client_netmask", |
|
854 "connect_timeout", "coredump_dir", "dead_peer_timeout", |
|
855 "debug_options", "delay_access", "delay_class", |
|
856 "delay_initial_bucket_level", "delay_parameters", |
|
857 "delay_pools", "deny_info", "dns_children", "dns_defnames", |
|
858 "dns_nameservers", "dns_testnames", "emulate_httpd_log", |
|
859 "err_html_text", "fake_user_agent", "firewall_ip", |
|
860 "forwarded_for", "forward_snmpd_port", "fqdncache_size", |
|
861 "ftpget_options", "ftpget_program", "ftp_list_width", |
|
862 "ftp_passive", "ftp_user", "half_closed_clients", |
|
863 "header_access", "header_replace", "hierarchy_stoplist", |
|
864 "high_response_time_warning", "high_page_fault_warning", |
|
865 "htcp_port", "http_access", "http_anonymizer", "httpd_accel", |
|
866 "httpd_accel_host", "httpd_accel_port", |
|
867 "httpd_accel_uses_host_header", "httpd_accel_with_proxy", |
|
868 "http_port", "http_reply_access", "icp_access", |
|
869 "icp_hit_stale", "icp_port", "icp_query_timeout", |
|
870 "ident_lookup", "ident_lookup_access", "ident_timeout", |
|
871 "incoming_http_average", "incoming_icp_average", |
|
872 "inside_firewall", "ipcache_high", "ipcache_low", |
|
873 "ipcache_size", "local_domain", "local_ip", "logfile_rotate", |
|
874 "log_fqdn", "log_icp_queries", "log_mime_hdrs", |
|
875 "maximum_object_size", "maximum_single_addr_tries", |
|
876 "mcast_groups", "mcast_icp_query_timeout", "mcast_miss_addr", |
|
877 "mcast_miss_encode_key", "mcast_miss_port", "memory_pools", |
|
878 "memory_pools_limit", "memory_replacement_policy", |
|
879 "mime_table", "min_http_poll_cnt", "min_icp_poll_cnt", |
|
880 "minimum_direct_hops", "minimum_object_size", |
|
881 "minimum_retry_timeout", "miss_access", "negative_dns_ttl", |
|
882 "negative_ttl", "neighbor_timeout", "neighbor_type_domain", |
|
883 "netdb_high", "netdb_low", "netdb_ping_period", |
|
884 "netdb_ping_rate", "never_direct", "no_cache", |
|
885 "passthrough_proxy", "pconn_timeout", "pid_filename", |
|
886 "pinger_program", "positive_dns_ttl", "prefer_direct", |
|
887 "proxy_auth", "proxy_auth_realm", "query_icmp", "quick_abort", |
|
888 "quick_abort", "quick_abort_max", "quick_abort_min", |
|
889 "quick_abort_pct", "range_offset_limit", "read_timeout", |
|
890 "redirect_children", "redirect_program", |
|
891 "redirect_rewrites_host_header", "reference_age", |
|
892 "reference_age", "refresh_pattern", "reload_into_ims", |
|
893 "request_body_max_size", "request_size", "request_timeout", |
|
894 "shutdown_lifetime", "single_parent_bypass", |
|
895 "siteselect_timeout", "snmp_access", "snmp_incoming_address", |
|
896 "snmp_port", "source_ping", "ssl_proxy", |
|
897 "store_avg_object_size", "store_objects_per_bucket", |
|
898 "strip_query_terms", "swap_level1_dirs", "swap_level2_dirs", |
|
899 "tcp_incoming_address", "tcp_outgoing_address", |
|
900 "tcp_recv_bufsize", "test_reachability", "udp_hit_obj", |
|
901 "udp_hit_obj_size", "udp_incoming_address", |
|
902 "udp_outgoing_address", "unique_hostname", "unlinkd_program", |
|
903 "uri_whitespace", "useragent_log", "visible_hostname", |
|
904 "wais_relay", "wais_relay_host", "wais_relay_port", |
|
905 ] |
|
906 |
|
907 opts = [ "proxy-only", "weight", "ttl", "no-query", "default", |
|
908 "round-robin", "multicast-responder", "on", "off", "all", |
|
909 "deny", "allow", "via", "parent", "no-digest", "heap", "lru", |
|
910 "realm", "children", "credentialsttl", "none", "disable", |
|
911 "offline_toggle", "diskd", "q1", "q2", |
|
912 ] |
|
913 |
|
914 actions = [ "shutdown", "info", "parameter", "server_list", |
|
915 "client_list", r'squid\.conf', |
|
916 ] |
|
917 |
|
918 actions_stats = [ "objects", "vm_objects", "utilization", |
|
919 "ipcache", "fqdncache", "dns", "redirector", "io", |
|
920 "reply_headers", "filedescriptors", "netdb", |
|
921 ] |
|
922 |
|
923 actions_log = [ "status", "enable", "disable", "clear"] |
|
924 |
|
925 acls = [ "url_regex", "urlpath_regex", "referer_regex", "port", |
|
926 "proto", "req_mime_type", "rep_mime_type", "method", |
|
927 "browser", "user", "src", "dst", "time", "dstdomain", "ident", |
|
928 "snmp_community", |
|
929 ] |
|
930 |
|
931 ip_re = r'\b(?:\d{1,3}\.){3}\d{1,3}\b' |
|
932 |
|
933 def makelistre(list): |
|
934 return r'\b(?:'+'|'.join(list)+r')\b' |
|
935 |
|
936 tokens = { |
|
937 'root': [ |
|
938 (r'\s+', Text), |
|
939 (r'#', Comment, 'comment'), |
|
940 (makelistre(keywords), Keyword), |
|
941 (makelistre(opts), Name.Constant), |
|
942 # Actions |
|
943 (makelistre(actions), String), |
|
944 (r'stats/'+makelistre(actions), String), |
|
945 (r'log/'+makelistre(actions)+r'=', String), |
|
946 (makelistre(acls), Keyword), |
|
947 (ip_re+r'(?:/(?:'+ip_re+r')|\d+)?', Number), |
|
948 (r'\b\d+\b', Number), |
|
949 (r'\S+', Text), |
|
950 ], |
|
951 'comment': [ |
|
952 (r'\s*TAG:.*', String.Escape, '#pop'), |
|
953 (r'.*', Comment, '#pop'), |
|
954 ], |
|
955 } |
|
956 |
|
957 |
|
958 class DebianControlLexer(RegexLexer): |
|
959 """ |
|
960 Lexer for Debian ``control`` files and ``apt-cache show <pkg>`` outputs. |
|
961 |
|
962 *New in Pygments 0.9.* |
|
963 """ |
|
964 name = 'Debian Control file' |
|
965 aliases = ['control'] |
|
966 filenames = ['control'] |
|
967 |
|
968 tokens = { |
|
969 'root': [ |
|
970 (r'^(Description)', Keyword, 'description'), |
|
971 (r'^(Maintainer)(:\s*)', bygroups(Keyword, Text), 'maintainer'), |
|
972 (r'^((Build-)?Depends)', Keyword, 'depends'), |
|
973 (r'^((?:Python-)?Version)(:\s*)([^\s]+)$', |
|
974 bygroups(Keyword, Text, Number)), |
|
975 (r'^((?:Installed-)?Size)(:\s*)([^\s]+)$', |
|
976 bygroups(Keyword, Text, Number)), |
|
977 (r'^(MD5Sum|SHA1|SHA256)(:\s*)([^\s]+)$', |
|
978 bygroups(Keyword, Text, Number)), |
|
979 (r'^([a-zA-Z\-0-9\.]*?)(:\s*)(.*?)$', |
|
980 bygroups(Keyword, Whitespace, String)), |
|
981 ], |
|
982 'maintainer': [ |
|
983 (r'<[^>]+>', Generic.Strong), |
|
984 (r'<[^>]+>$', Generic.Strong, '#pop'), |
|
985 (r',\n?', Text), |
|
986 (r'.', Text), |
|
987 ], |
|
988 'description': [ |
|
989 (r'(.*)(Homepage)(: )([^\s]+)', bygroups(Text, String, Name, Name.Class)), |
|
990 (r':.*\n', Generic.Strong), |
|
991 (r' .*\n', Text), |
|
992 ('', Text, '#pop'), |
|
993 ], |
|
994 'depends': [ |
|
995 (r':\s*', Text), |
|
996 (r'(\$)(\{)(\w+\s*:\s*\w+)', bygroups(Operator, Text, Name.Entity)), |
|
997 (r'\(', Text, 'depend_vers'), |
|
998 (r',', Text), |
|
999 (r'\|', Operator), |
|
1000 (r'[\s]+', Text), |
|
1001 (r'[}\)]\s*$', Text, '#pop'), |
|
1002 (r'[}]', Text), |
|
1003 (r'[^,]$', Name.Function, '#pop'), |
|
1004 (r'([\+\.a-zA-Z0-9-][\s\n]*)', Name.Function), |
|
1005 ], |
|
1006 'depend_vers': [ |
|
1007 (r'\),', Text, '#pop'), |
|
1008 (r'\)[^,]', Text, '#pop:2'), |
|
1009 (r'([><=]+)(\s*)([^\)]+)', bygroups(Operator, Text, Number)) |
|
1010 ] |
|
1011 } |
|
1012 |
|
1013 |
|
1014 class YamlLexerContext(LexerContext): |
|
1015 """Indentation context for the YAML lexer.""" |
|
1016 |
|
1017 def __init__(self, *args, **kwds): |
|
1018 super(YamlLexerContext, self).__init__(*args, **kwds) |
|
1019 self.indent_stack = [] |
|
1020 self.indent = -1 |
|
1021 self.next_indent = 0 |
|
1022 self.block_scalar_indent = None |
|
1023 |
|
1024 |
|
1025 class YamlLexer(ExtendedRegexLexer): |
|
1026 """ |
|
1027 Lexer for `YAML <http://yaml.org/>`_, a human-friendly data serialization |
|
1028 language. |
|
1029 |
|
1030 *New in Pygments 0.11.* |
|
1031 """ |
|
1032 |
|
1033 name = 'YAML' |
|
1034 aliases = ['yaml'] |
|
1035 filenames = ['*.yaml', '*.yml'] |
|
1036 mimetypes = ['text/x-yaml'] |
|
1037 |
|
1038 |
|
1039 def something(token_class): |
|
1040 """Do not produce empty tokens.""" |
|
1041 def callback(lexer, match, context): |
|
1042 text = match.group() |
|
1043 if not text: |
|
1044 return |
|
1045 yield match.start(), token_class, text |
|
1046 context.pos = match.end() |
|
1047 return callback |
|
1048 |
|
1049 def reset_indent(token_class): |
|
1050 """Reset the indentation levels.""" |
|
1051 def callback(lexer, match, context): |
|
1052 text = match.group() |
|
1053 context.indent_stack = [] |
|
1054 context.indent = -1 |
|
1055 context.next_indent = 0 |
|
1056 context.block_scalar_indent = None |
|
1057 yield match.start(), token_class, text |
|
1058 context.pos = match.end() |
|
1059 return callback |
|
1060 |
|
1061 def save_indent(token_class, start=False): |
|
1062 """Save a possible indentation level.""" |
|
1063 def callback(lexer, match, context): |
|
1064 text = match.group() |
|
1065 extra = '' |
|
1066 if start: |
|
1067 context.next_indent = len(text) |
|
1068 if context.next_indent < context.indent: |
|
1069 while context.next_indent < context.indent: |
|
1070 context.indent = context.indent_stack.pop() |
|
1071 if context.next_indent > context.indent: |
|
1072 extra = text[context.indent:] |
|
1073 text = text[:context.indent] |
|
1074 else: |
|
1075 context.next_indent += len(text) |
|
1076 if text: |
|
1077 yield match.start(), token_class, text |
|
1078 if extra: |
|
1079 yield match.start()+len(text), token_class.Error, extra |
|
1080 context.pos = match.end() |
|
1081 return callback |
|
1082 |
|
1083 def set_indent(token_class, implicit=False): |
|
1084 """Set the previously saved indentation level.""" |
|
1085 def callback(lexer, match, context): |
|
1086 text = match.group() |
|
1087 if context.indent < context.next_indent: |
|
1088 context.indent_stack.append(context.indent) |
|
1089 context.indent = context.next_indent |
|
1090 if not implicit: |
|
1091 context.next_indent += len(text) |
|
1092 yield match.start(), token_class, text |
|
1093 context.pos = match.end() |
|
1094 return callback |
|
1095 |
|
1096 def set_block_scalar_indent(token_class): |
|
1097 """Set an explicit indentation level for a block scalar.""" |
|
1098 def callback(lexer, match, context): |
|
1099 text = match.group() |
|
1100 context.block_scalar_indent = None |
|
1101 if not text: |
|
1102 return |
|
1103 increment = match.group(1) |
|
1104 if increment: |
|
1105 current_indent = max(context.indent, 0) |
|
1106 increment = int(increment) |
|
1107 context.block_scalar_indent = current_indent + increment |
|
1108 if text: |
|
1109 yield match.start(), token_class, text |
|
1110 context.pos = match.end() |
|
1111 return callback |
|
1112 |
|
1113 def parse_block_scalar_empty_line(indent_token_class, content_token_class): |
|
1114 """Process an empty line in a block scalar.""" |
|
1115 def callback(lexer, match, context): |
|
1116 text = match.group() |
|
1117 if (context.block_scalar_indent is None or |
|
1118 len(text) <= context.block_scalar_indent): |
|
1119 if text: |
|
1120 yield match.start(), indent_token_class, text |
|
1121 else: |
|
1122 indentation = text[:context.block_scalar_indent] |
|
1123 content = text[context.block_scalar_indent:] |
|
1124 yield match.start(), indent_token_class, indentation |
|
1125 yield (match.start()+context.block_scalar_indent, |
|
1126 content_token_class, content) |
|
1127 context.pos = match.end() |
|
1128 return callback |
|
1129 |
|
1130 def parse_block_scalar_indent(token_class): |
|
1131 """Process indentation spaces in a block scalar.""" |
|
1132 def callback(lexer, match, context): |
|
1133 text = match.group() |
|
1134 if context.block_scalar_indent is None: |
|
1135 if len(text) <= max(context.indent, 0): |
|
1136 context.stack.pop() |
|
1137 context.stack.pop() |
|
1138 return |
|
1139 context.block_scalar_indent = len(text) |
|
1140 else: |
|
1141 if len(text) < context.block_scalar_indent: |
|
1142 context.stack.pop() |
|
1143 context.stack.pop() |
|
1144 return |
|
1145 if text: |
|
1146 yield match.start(), token_class, text |
|
1147 context.pos = match.end() |
|
1148 return callback |
|
1149 |
|
1150 def parse_plain_scalar_indent(token_class): |
|
1151 """Process indentation spaces in a plain scalar.""" |
|
1152 def callback(lexer, match, context): |
|
1153 text = match.group() |
|
1154 if len(text) <= context.indent: |
|
1155 context.stack.pop() |
|
1156 context.stack.pop() |
|
1157 return |
|
1158 if text: |
|
1159 yield match.start(), token_class, text |
|
1160 context.pos = match.end() |
|
1161 return callback |
|
1162 |
|
1163 |
|
1164 |
|
1165 tokens = { |
|
1166 # the root rules |
|
1167 'root': [ |
|
1168 # ignored whitespaces |
|
1169 (r'[ ]+(?=#|$)', Text), |
|
1170 # line breaks |
|
1171 (r'\n+', Text), |
|
1172 # a comment |
|
1173 (r'#[^\n]*', Comment.Single), |
|
1174 # the '%YAML' directive |
|
1175 (r'^%YAML(?=[ ]|$)', reset_indent(Name.Tag), 'yaml-directive'), |
|
1176 # the %TAG directive |
|
1177 (r'^%TAG(?=[ ]|$)', reset_indent(Name.Tag), 'tag-directive'), |
|
1178 # document start and document end indicators |
|
1179 (r'^(?:---|\.\.\.)(?=[ ]|$)', reset_indent(Name.Namespace), |
|
1180 'block-line'), |
|
1181 # indentation spaces |
|
1182 (r'[ ]*(?![ \t\n\r\f\v]|$)', save_indent(Text, start=True), |
|
1183 ('block-line', 'indentation')), |
|
1184 ], |
|
1185 |
|
1186 # trailing whitespaces after directives or a block scalar indicator |
|
1187 'ignored-line': [ |
|
1188 # ignored whitespaces |
|
1189 (r'[ ]+(?=#|$)', Text), |
|
1190 # a comment |
|
1191 (r'#[^\n]*', Comment.Single), |
|
1192 # line break |
|
1193 (r'\n', Text, '#pop:2'), |
|
1194 ], |
|
1195 |
|
1196 # the %YAML directive |
|
1197 'yaml-directive': [ |
|
1198 # the version number |
|
1199 (r'([ ]+)([0-9]+\.[0-9]+)', |
|
1200 bygroups(Text, Number), 'ignored-line'), |
|
1201 ], |
|
1202 |
|
1203 # the %YAG directive |
|
1204 'tag-directive': [ |
|
1205 # a tag handle and the corresponding prefix |
|
1206 (r'([ ]+)(!|![0-9A-Za-z_-]*!)' |
|
1207 r'([ ]+)(!|!?[0-9A-Za-z;/?:@&=+$,_.!~*\'()\[\]%-]+)', |
|
1208 bygroups(Text, Keyword.Type, Text, Keyword.Type), |
|
1209 'ignored-line'), |
|
1210 ], |
|
1211 |
|
1212 # block scalar indicators and indentation spaces |
|
1213 'indentation': [ |
|
1214 # trailing whitespaces are ignored |
|
1215 (r'[ ]*$', something(Text), '#pop:2'), |
|
1216 # whitespaces preceeding block collection indicators |
|
1217 (r'[ ]+(?=[?:-](?:[ ]|$))', save_indent(Text)), |
|
1218 # block collection indicators |
|
1219 (r'[?:-](?=[ ]|$)', set_indent(Punctuation.Indicator)), |
|
1220 # the beginning a block line |
|
1221 (r'[ ]*', save_indent(Text), '#pop'), |
|
1222 ], |
|
1223 |
|
1224 # an indented line in the block context |
|
1225 'block-line': [ |
|
1226 # the line end |
|
1227 (r'[ ]*(?=#|$)', something(Text), '#pop'), |
|
1228 # whitespaces separating tokens |
|
1229 (r'[ ]+', Text), |
|
1230 # tags, anchors and aliases, |
|
1231 include('descriptors'), |
|
1232 # block collections and scalars |
|
1233 include('block-nodes'), |
|
1234 # flow collections and quoted scalars |
|
1235 include('flow-nodes'), |
|
1236 # a plain scalar |
|
1237 (r'(?=[^ \t\n\r\f\v?:,\[\]{}#&*!|>\'"%@`-]|[?:-][^ \t\n\r\f\v])', |
|
1238 something(Name.Variable), |
|
1239 'plain-scalar-in-block-context'), |
|
1240 ], |
|
1241 |
|
1242 # tags, anchors, aliases |
|
1243 'descriptors' : [ |
|
1244 # a full-form tag |
|
1245 (r'!<[0-9A-Za-z;/?:@&=+$,_.!~*\'()\[\]%-]+>', Keyword.Type), |
|
1246 # a tag in the form '!', '!suffix' or '!handle!suffix' |
|
1247 (r'!(?:[0-9A-Za-z_-]+)?' |
|
1248 r'(?:![0-9A-Za-z;/?:@&=+$,_.!~*\'()\[\]%-]+)?', Keyword.Type), |
|
1249 # an anchor |
|
1250 (r'&[0-9A-Za-z_-]+', Name.Label), |
|
1251 # an alias |
|
1252 (r'\*[0-9A-Za-z_-]+', Name.Variable), |
|
1253 ], |
|
1254 |
|
1255 # block collections and scalars |
|
1256 'block-nodes': [ |
|
1257 # implicit key |
|
1258 (r':(?=[ ]|$)', set_indent(Punctuation.Indicator, implicit=True)), |
|
1259 # literal and folded scalars |
|
1260 (r'[|>]', Punctuation.Indicator, |
|
1261 ('block-scalar-content', 'block-scalar-header')), |
|
1262 ], |
|
1263 |
|
1264 # flow collections and quoted scalars |
|
1265 'flow-nodes': [ |
|
1266 # a flow sequence |
|
1267 (r'\[', Punctuation.Indicator, 'flow-sequence'), |
|
1268 # a flow mapping |
|
1269 (r'\{', Punctuation.Indicator, 'flow-mapping'), |
|
1270 # a single-quoted scalar |
|
1271 (r'\'', String, 'single-quoted-scalar'), |
|
1272 # a double-quoted scalar |
|
1273 (r'\"', String, 'double-quoted-scalar'), |
|
1274 ], |
|
1275 |
|
1276 # the content of a flow collection |
|
1277 'flow-collection': [ |
|
1278 # whitespaces |
|
1279 (r'[ ]+', Text), |
|
1280 # line breaks |
|
1281 (r'\n+', Text), |
|
1282 # a comment |
|
1283 (r'#[^\n]*', Comment.Single), |
|
1284 # simple indicators |
|
1285 (r'[?:,]', Punctuation.Indicator), |
|
1286 # tags, anchors and aliases |
|
1287 include('descriptors'), |
|
1288 # nested collections and quoted scalars |
|
1289 include('flow-nodes'), |
|
1290 # a plain scalar |
|
1291 (r'(?=[^ \t\n\r\f\v?:,\[\]{}#&*!|>\'"%@`])', |
|
1292 something(Name.Variable), |
|
1293 'plain-scalar-in-flow-context'), |
|
1294 ], |
|
1295 |
|
1296 # a flow sequence indicated by '[' and ']' |
|
1297 'flow-sequence': [ |
|
1298 # include flow collection rules |
|
1299 include('flow-collection'), |
|
1300 # the closing indicator |
|
1301 (r'\]', Punctuation.Indicator, '#pop'), |
|
1302 ], |
|
1303 |
|
1304 # a flow mapping indicated by '{' and '}' |
|
1305 'flow-mapping': [ |
|
1306 # include flow collection rules |
|
1307 include('flow-collection'), |
|
1308 # the closing indicator |
|
1309 (r'\}', Punctuation.Indicator, '#pop'), |
|
1310 ], |
|
1311 |
|
1312 # block scalar lines |
|
1313 'block-scalar-content': [ |
|
1314 # line break |
|
1315 (r'\n', Text), |
|
1316 # empty line |
|
1317 (r'^[ ]+$', |
|
1318 parse_block_scalar_empty_line(Text, Name.Constant)), |
|
1319 # indentation spaces (we may leave the state here) |
|
1320 (r'^[ ]*', parse_block_scalar_indent(Text)), |
|
1321 # line content |
|
1322 (r'[^\n\r\f\v]+', Name.Constant), |
|
1323 ], |
|
1324 |
|
1325 # the content of a literal or folded scalar |
|
1326 'block-scalar-header': [ |
|
1327 # indentation indicator followed by chomping flag |
|
1328 (r'([1-9])?[+-]?(?=[ ]|$)', |
|
1329 set_block_scalar_indent(Punctuation.Indicator), |
|
1330 'ignored-line'), |
|
1331 # chomping flag followed by indentation indicator |
|
1332 (r'[+-]?([1-9])?(?=[ ]|$)', |
|
1333 set_block_scalar_indent(Punctuation.Indicator), |
|
1334 'ignored-line'), |
|
1335 ], |
|
1336 |
|
1337 # ignored and regular whitespaces in quoted scalars |
|
1338 'quoted-scalar-whitespaces': [ |
|
1339 # leading and trailing whitespaces are ignored |
|
1340 (r'^[ ]+|[ ]+$', Text), |
|
1341 # line breaks are ignored |
|
1342 (r'\n+', Text), |
|
1343 # other whitespaces are a part of the value |
|
1344 (r'[ ]+', Name.Variable), |
|
1345 ], |
|
1346 |
|
1347 # single-quoted scalars |
|
1348 'single-quoted-scalar': [ |
|
1349 # include whitespace and line break rules |
|
1350 include('quoted-scalar-whitespaces'), |
|
1351 # escaping of the quote character |
|
1352 (r'\'\'', String.Escape), |
|
1353 # regular non-whitespace characters |
|
1354 (r'[^ \t\n\r\f\v\']+', String), |
|
1355 # the closing quote |
|
1356 (r'\'', String, '#pop'), |
|
1357 ], |
|
1358 |
|
1359 # double-quoted scalars |
|
1360 'double-quoted-scalar': [ |
|
1361 # include whitespace and line break rules |
|
1362 include('quoted-scalar-whitespaces'), |
|
1363 # escaping of special characters |
|
1364 (r'\\[0abt\tn\nvfre "\\N_LP]', String), |
|
1365 # escape codes |
|
1366 (r'\\(?:x[0-9A-Fa-f]{2}|u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})', |
|
1367 String.Escape), |
|
1368 # regular non-whitespace characters |
|
1369 (r'[^ \t\n\r\f\v\"\\]+', String), |
|
1370 # the closing quote |
|
1371 (r'"', String, '#pop'), |
|
1372 ], |
|
1373 |
|
1374 # the beginning of a new line while scanning a plain scalar |
|
1375 'plain-scalar-in-block-context-new-line': [ |
|
1376 # empty lines |
|
1377 (r'^[ ]+$', Text), |
|
1378 # line breaks |
|
1379 (r'\n+', Text), |
|
1380 # document start and document end indicators |
|
1381 (r'^(?=---|\.\.\.)', something(Name.Namespace), '#pop:3'), |
|
1382 # indentation spaces (we may leave the block line state here) |
|
1383 (r'^[ ]*', parse_plain_scalar_indent(Text), '#pop'), |
|
1384 ], |
|
1385 |
|
1386 # a plain scalar in the block context |
|
1387 'plain-scalar-in-block-context': [ |
|
1388 # the scalar ends with the ':' indicator |
|
1389 (r'[ ]*(?=:[ ]|:$)', something(Text), '#pop'), |
|
1390 # the scalar ends with whitespaces followed by a comment |
|
1391 (r'[ ]+(?=#)', Text, '#pop'), |
|
1392 # trailing whitespaces are ignored |
|
1393 (r'[ ]+$', Text), |
|
1394 # line breaks are ignored |
|
1395 (r'\n+', Text, 'plain-scalar-in-block-context-new-line'), |
|
1396 # other whitespaces are a part of the value |
|
1397 (r'[ ]+', Literal.Scalar.Plain), |
|
1398 # regular non-whitespace characters |
|
1399 (r'(?::(?![ \t\n\r\f\v])|[^ \t\n\r\f\v:])+', Literal.Scalar.Plain), |
|
1400 ], |
|
1401 |
|
1402 # a plain scalar is the flow context |
|
1403 'plain-scalar-in-flow-context': [ |
|
1404 # the scalar ends with an indicator character |
|
1405 (r'[ ]*(?=[,:?\[\]{}])', something(Text), '#pop'), |
|
1406 # the scalar ends with a comment |
|
1407 (r'[ ]+(?=#)', Text, '#pop'), |
|
1408 # leading and trailing whitespaces are ignored |
|
1409 (r'^[ ]+|[ ]+$', Text), |
|
1410 # line breaks are ignored |
|
1411 (r'\n+', Text), |
|
1412 # other whitespaces are a part of the value |
|
1413 (r'[ ]+', Name.Variable), |
|
1414 # regular non-whitespace characters |
|
1415 (r'[^ \t\n\r\f\v,:?\[\]{}]+', Name.Variable), |
|
1416 ], |
|
1417 |
|
1418 } |
|
1419 |
|
1420 def get_tokens_unprocessed(self, text=None, context=None): |
|
1421 if context is None: |
|
1422 context = YamlLexerContext(text, 0) |
|
1423 return super(YamlLexer, self).get_tokens_unprocessed(text, context) |
|
1424 |
|
1425 |
|
1426 class LighttpdConfLexer(RegexLexer): |
|
1427 """ |
|
1428 Lexer for `Lighttpd <http://lighttpd.net/>`_ configuration files. |
|
1429 |
|
1430 *New in Pygments 0.11.* |
|
1431 """ |
|
1432 name = 'Lighttpd configuration file' |
|
1433 aliases = ['lighty', 'lighttpd'] |
|
1434 filenames = [] |
|
1435 mimetypes = ['text/x-lighttpd-conf'] |
|
1436 |
|
1437 tokens = { |
|
1438 'root': [ |
|
1439 (r'#.*\n', Comment.Single), |
|
1440 (r'/\S*', Name), # pathname |
|
1441 (r'[a-zA-Z._-]+', Keyword), |
|
1442 (r'\d+\.\d+\.\d+\.\d+(?:/\d+)?', Number), |
|
1443 (r'[0-9]+', Number), |
|
1444 (r'=>|=~|\+=|==|=|\+', Operator), |
|
1445 (r'\$[A-Z]+', Name.Builtin), |
|
1446 (r'[(){}\[\],]', Punctuation), |
|
1447 (r'"([^"\\]*(?:\\.[^"\\]*)*)"', String.Double), |
|
1448 (r'\s+', Text), |
|
1449 ], |
|
1450 |
|
1451 } |
|
1452 |
|
1453 |
|
1454 class NginxConfLexer(RegexLexer): |
|
1455 """ |
|
1456 Lexer for `Nginx <http://nginx.net/>`_ configuration files. |
|
1457 |
|
1458 *New in Pygments 0.11.* |
|
1459 """ |
|
1460 name = 'Nginx configuration file' |
|
1461 aliases = ['nginx'] |
|
1462 filenames = [] |
|
1463 mimetypes = ['text/x-nginx-conf'] |
|
1464 |
|
1465 tokens = { |
|
1466 'root': [ |
|
1467 (r'(include)(\s+)([^\s;]+)', bygroups(Keyword, Text, Name)), |
|
1468 (r'[^\s;#]+', Keyword, 'stmt'), |
|
1469 include('base'), |
|
1470 ], |
|
1471 'block': [ |
|
1472 (r'}', Punctuation, '#pop:2'), |
|
1473 (r'[^\s;#]+', Keyword.Namespace, 'stmt'), |
|
1474 include('base'), |
|
1475 ], |
|
1476 'stmt': [ |
|
1477 (r'{', Punctuation, 'block'), |
|
1478 (r';', Punctuation, '#pop'), |
|
1479 include('base'), |
|
1480 ], |
|
1481 'base': [ |
|
1482 (r'#.*\n', Comment.Single), |
|
1483 (r'on|off', Name.Constant), |
|
1484 (r'\$[^\s;#()]+', Name.Variable), |
|
1485 (r'([a-z0-9.-]+)(:)([0-9]+)', |
|
1486 bygroups(Name, Punctuation, Number.Integer)), |
|
1487 (r'[a-z-]+/[a-z-+]+', String), # mimetype |
|
1488 #(r'[a-zA-Z._-]+', Keyword), |
|
1489 (r'[0-9]+[km]?\b', Number.Integer), |
|
1490 (r'(~)(\s*)([^\s{]+)', bygroups(Punctuation, Text, String.Regex)), |
|
1491 (r'[:=~]', Punctuation), |
|
1492 (r'[^\s;#{}$]+', String), # catch all |
|
1493 (r'/[^\s;#]*', Name), # pathname |
|
1494 (r'\s+', Text), |
|
1495 ], |
|
1496 } |