ThirdParty/Pygments/pygments/lexers/text.py

changeset 0
de9c2efb9d02
child 12
1d8dd9706f46
equal deleted inserted replaced
-1:000000000000 0:de9c2efb9d02
1 # -*- coding: utf-8 -*-
2 """
3 pygments.lexers.text
4 ~~~~~~~~~~~~~~~~~~~~
5
6 Lexers for non-source code file types.
7
8 :copyright: Copyright 2006-2009 by the Pygments team, see AUTHORS.
9 :license: BSD, see LICENSE for details.
10 """
11
12 import re
13 try:
14 set
15 except NameError:
16 from sets import Set as set
17 from bisect import bisect
18
19 from pygments.lexer import Lexer, LexerContext, RegexLexer, ExtendedRegexLexer, \
20 bygroups, include, using, this, do_insertions
21 from pygments.token import Punctuation, Text, Comment, Keyword, Name, String, \
22 Generic, Operator, Number, Whitespace, Literal
23 from pygments.util import get_bool_opt
24 from pygments.lexers.other import BashLexer
25
26 __all__ = ['IniLexer', 'SourcesListLexer', 'BaseMakefileLexer',
27 'MakefileLexer', 'DiffLexer', 'IrcLogsLexer', 'TexLexer',
28 'GroffLexer', 'ApacheConfLexer', 'BBCodeLexer', 'MoinWikiLexer',
29 'RstLexer', 'VimLexer', 'GettextLexer', 'SquidConfLexer',
30 'DebianControlLexer', 'DarcsPatchLexer', 'YamlLexer',
31 'LighttpdConfLexer', 'NginxConfLexer']
32
33
34 class IniLexer(RegexLexer):
35 """
36 Lexer for configuration files in INI style.
37 """
38
39 name = 'INI'
40 aliases = ['ini', 'cfg']
41 filenames = ['*.ini', '*.cfg', '*.properties']
42 mimetypes = ['text/x-ini']
43
44 tokens = {
45 'root': [
46 (r'\s+', Text),
47 (r'[;#].*?$', Comment),
48 (r'\[.*?\]$', Keyword),
49 (r'(.*?)([ \t]*)(=)([ \t]*)(.*?)$',
50 bygroups(Name.Attribute, Text, Operator, Text, String))
51 ]
52 }
53
54 def analyse_text(text):
55 npos = text.find('\n')
56 if npos < 3:
57 return False
58 return text[0] == '[' and text[npos-1] == ']'
59
60
61 class SourcesListLexer(RegexLexer):
62 """
63 Lexer that highlights debian sources.list files.
64
65 *New in Pygments 0.7.*
66 """
67
68 name = 'Debian Sourcelist'
69 aliases = ['sourceslist', 'sources.list']
70 filenames = ['sources.list']
71 mimetype = ['application/x-debian-sourceslist']
72
73 tokens = {
74 'root': [
75 (r'\s+', Text),
76 (r'#.*?$', Comment),
77 (r'^(deb(?:-src)?)(\s+)',
78 bygroups(Keyword, Text), 'distribution')
79 ],
80 'distribution': [
81 (r'#.*?$', Comment, '#pop'),
82 (r'\$\(ARCH\)', Name.Variable),
83 (r'[^\s$[]+', String),
84 (r'\[', String.Other, 'escaped-distribution'),
85 (r'\$', String),
86 (r'\s+', Text, 'components')
87 ],
88 'escaped-distribution': [
89 (r'\]', String.Other, '#pop'),
90 (r'\$\(ARCH\)', Name.Variable),
91 (r'[^\]$]+', String.Other),
92 (r'\$', String.Other)
93 ],
94 'components': [
95 (r'#.*?$', Comment, '#pop:2'),
96 (r'$', Text, '#pop:2'),
97 (r'\s+', Text),
98 (r'\S+', Keyword.Pseudo),
99 ]
100 }
101
102 def analyse_text(text):
103 for line in text.split('\n'):
104 line = line.strip()
105 if not (line.startswith('#') or line.startswith('deb ') or
106 line.startswith('deb-src ') or not line):
107 return False
108 return True
109
110
111 class MakefileLexer(Lexer):
112 """
113 Lexer for BSD and GNU make extensions (lenient enough to handle both in
114 the same file even).
115
116 *Rewritten in Pygments 0.10.*
117 """
118
119 name = 'Makefile'
120 aliases = ['make', 'makefile', 'mf', 'bsdmake']
121 filenames = ['*.mak', 'Makefile', 'makefile', 'Makefile.*', 'GNUmakefile']
122 mimetypes = ['text/x-makefile']
123
124 r_special = re.compile(r'^(?:'
125 # BSD Make
126 r'\.\s*(include|undef|error|warning|if|else|elif|endif|for|endfor)|'
127 # GNU Make
128 r'\s*(ifeq|ifneq|ifdef|ifndef|else|endif|-?include|define|endef|:))(?=\s)')
129 r_comment = re.compile(r'^\s*@?#')
130
131 def get_tokens_unprocessed(self, text):
132 ins = []
133 lines = text.splitlines(True)
134 done = ''
135 lex = BaseMakefileLexer(**self.options)
136 backslashflag = False
137 for line in lines:
138 if self.r_special.match(line) or backslashflag:
139 ins.append((len(done), [(0, Comment.Preproc, line)]))
140 backslashflag = line.strip().endswith('\\')
141 elif self.r_comment.match(line):
142 ins.append((len(done), [(0, Comment, line)]))
143 else:
144 done += line
145 for item in do_insertions(ins, lex.get_tokens_unprocessed(done)):
146 yield item
147
148
149 class BaseMakefileLexer(RegexLexer):
150 """
151 Lexer for simple Makefiles (no preprocessing).
152
153 *New in Pygments 0.10.*
154 """
155
156 name = 'Makefile'
157 aliases = ['basemake']
158 filenames = []
159 mimetypes = []
160
161 tokens = {
162 'root': [
163 (r'^(?:[\t ]+.*\n|\n)+', using(BashLexer)),
164 (r'\$\((?:.*\\\n|.*\n)+', using(BashLexer)),
165 (r'\s+', Text),
166 (r'#.*?\n', Comment),
167 (r'(export)(\s+)(?=[a-zA-Z0-9_${}\t -]+\n)',
168 bygroups(Keyword, Text), 'export'),
169 (r'export\s+', Keyword),
170 # assignment
171 (r'([a-zA-Z0-9_${}.-]+)(\s*)([!?:+]?=)([ \t]*)((?:.*\\\n|.*\n)+)',
172 bygroups(Name.Variable, Text, Operator, Text, using(BashLexer))),
173 # strings
174 (r'"(\\\\|\\"|[^"])*"', String.Double),
175 (r"'(\\\\|\\'|[^'])*'", String.Single),
176 # targets
177 (r'([^\n:]+)(:+)([ \t]*)', bygroups(Name.Function, Operator, Text),
178 'block-header'),
179 # TODO: add paren handling (grr)
180 ],
181 'export': [
182 (r'[a-zA-Z0-9_${}-]+', Name.Variable),
183 (r'\n', Text, '#pop'),
184 (r'\s+', Text),
185 ],
186 'block-header': [
187 (r'[^,\\\n#]+', Number),
188 (r',', Punctuation),
189 (r'#.*?\n', Comment),
190 (r'\\\n', Text), # line continuation
191 (r'\\.', Text),
192 (r'(?:[\t ]+.*\n|\n)+', using(BashLexer), '#pop'),
193 ],
194 }
195
196
197 class DiffLexer(RegexLexer):
198 """
199 Lexer for unified or context-style diffs or patches.
200 """
201
202 name = 'Diff'
203 aliases = ['diff', 'udiff']
204 filenames = ['*.diff', '*.patch']
205 mimetypes = ['text/x-diff', 'text/x-patch']
206
207 tokens = {
208 'root': [
209 (r' .*\n', Text),
210 (r'\+.*\n', Generic.Inserted),
211 (r'-.*\n', Generic.Deleted),
212 (r'!.*\n', Generic.Strong),
213 (r'@.*\n', Generic.Subheading),
214 (r'([Ii]ndex|diff).*\n', Generic.Heading),
215 (r'=.*\n', Generic.Heading),
216 (r'.*\n', Text),
217 ]
218 }
219
220 def analyse_text(text):
221 if text[:7] == 'Index: ':
222 return True
223 if text[:5] == 'diff ':
224 return True
225 if text[:4] == '--- ':
226 return 0.9
227
228
229 DPATCH_KEYWORDS = ['hunk', 'addfile', 'adddir', 'rmfile', 'rmdir', 'move',
230 'replace']
231
232 class DarcsPatchLexer(RegexLexer):
233 """
234 DarcsPatchLexer is a lexer for the various versions of the darcs patch
235 format. Examples of this format are derived by commands such as
236 ``darcs annotate --patch`` and ``darcs send``.
237
238 *New in Pygments 0.10.*
239 """
240 name = 'Darcs Patch'
241 aliases = ['dpatch']
242 filenames = ['*.dpatch', '*.darcspatch']
243
244 tokens = {
245 'root': [
246 (r'<', Operator),
247 (r'>', Operator),
248 (r'{', Operator),
249 (r'}', Operator),
250 (r'(\[)((?:TAG )?)(.*)(\n)(.*)(\*\*)(\d+)(\s?)(\])',
251 bygroups(Operator, Keyword, Name, Text, Name, Operator,
252 Literal.Date, Text, Operator)),
253 (r'(\[)((?:TAG )?)(.*)(\n)(.*)(\*\*)(\d+)(\s?)',
254 bygroups(Operator, Keyword, Name, Text, Name, Operator,
255 Literal.Date, Text), 'comment'),
256 (r'New patches:', Generic.Heading),
257 (r'Context:', Generic.Heading),
258 (r'Patch bundle hash:', Generic.Heading),
259 (r'(\s*)(%s)(.*\n)' % '|'.join(DPATCH_KEYWORDS),
260 bygroups(Text, Keyword, Text)),
261 (r'\+', Generic.Inserted, "insert"),
262 (r'-', Generic.Deleted, "delete"),
263 (r'.*\n', Text),
264 ],
265 'comment': [
266 (r'[^\]].*\n', Comment),
267 (r'\]', Operator, "#pop"),
268 ],
269 'specialText': [ # darcs add [_CODE_] special operators for clarity
270 (r'\n', Text, "#pop"), # line-based
271 (r'\[_[^_]*_]', Operator),
272 ],
273 'insert': [
274 include('specialText'),
275 (r'\[', Generic.Inserted),
276 (r'[^\n\[]*', Generic.Inserted),
277 ],
278 'delete': [
279 include('specialText'),
280 (r'\[', Generic.Deleted),
281 (r'[^\n\[]*', Generic.Deleted),
282 ],
283 }
284
285
286 class IrcLogsLexer(RegexLexer):
287 """
288 Lexer for IRC logs in *irssi*, *xchat* or *weechat* style.
289 """
290
291 name = 'IRC logs'
292 aliases = ['irc']
293 filenames = ['*.weechatlog']
294 mimetypes = ['text/x-irclog']
295
296 flags = re.VERBOSE | re.MULTILINE
297 timestamp = r"""
298 (
299 # irssi / xchat and others
300 (?: \[|\()? # Opening bracket or paren for the timestamp
301 (?: # Timestamp
302 (?: (?:\d{1,4} [-/]?)+ # Date as - or /-separated groups of digits
303 [T ])? # Date/time separator: T or space
304 (?: \d?\d [:.]?)+ # Time as :/.-separated groups of 1 or 2 digits
305 )
306 (?: \]|\))?\s+ # Closing bracket or paren for the timestamp
307 |
308 # weechat
309 \d{4}\s\w{3}\s\d{2}\s # Date
310 \d{2}:\d{2}:\d{2}\s+ # Time + Whitespace
311 |
312 # xchat
313 \w{3}\s\d{2}\s # Date
314 \d{2}:\d{2}:\d{2}\s+ # Time + Whitespace
315 )?
316 """
317 tokens = {
318 'root': [
319 # log start/end
320 (r'^\*\*\*\*(.*)\*\*\*\*$', Comment),
321 # hack
322 ("^" + timestamp + r'(\s*<[^>]*>\s*)$', bygroups(Comment.Preproc, Name.Tag)),
323 # normal msgs
324 ("^" + timestamp + r"""
325 (\s*<.*?>\s*) # Nick """,
326 bygroups(Comment.Preproc, Name.Tag), 'msg'),
327 # /me msgs
328 ("^" + timestamp + r"""
329 (\s*[*]\s+) # Star
330 ([^\s]+\s+.*?\n) # Nick + rest of message """,
331 bygroups(Comment.Preproc, Keyword, Generic.Inserted)),
332 # join/part msgs
333 ("^" + timestamp + r"""
334 (\s*(?:\*{3}|<?-[!@=P]?->?)\s*) # Star(s) or symbols
335 ([^\s]+\s+) # Nick + Space
336 (.*?\n) # Rest of message """,
337 bygroups(Comment.Preproc, Keyword, String, Comment)),
338 (r"^.*?\n", Text),
339 ],
340 'msg': [
341 (r"[^\s]+:(?!//)", Name.Attribute), # Prefix
342 (r".*\n", Text, '#pop'),
343 ],
344 }
345
346
347 class BBCodeLexer(RegexLexer):
348 """
349 A lexer that highlights BBCode(-like) syntax.
350
351 *New in Pygments 0.6.*
352 """
353
354 name = 'BBCode'
355 aliases = ['bbcode']
356 mimetypes = ['text/x-bbcode']
357
358 tokens = {
359 'root' : [
360 (r'[\s\w]+', Text),
361 (r'(\[)(/?[^\]\n\r=]+)(\])',
362 bygroups(Keyword, Keyword.Pseudo, Keyword)),
363 (r'(\[)([^\]\n\r=]+)(=)([^\]\n\r]+)(\])',
364 bygroups(Keyword, Keyword.Pseudo, Operator, String, Keyword)),
365 ],
366 }
367
368
369 class TexLexer(RegexLexer):
370 """
371 Lexer for the TeX and LaTeX typesetting languages.
372 """
373
374 name = 'TeX'
375 aliases = ['tex', 'latex']
376 filenames = ['*.tex', '*.aux', '*.toc']
377 mimetypes = ['text/x-tex', 'text/x-latex']
378
379 tokens = {
380 'general': [
381 (r'%.*?\n', Comment),
382 (r'[{}]', Name.Builtin),
383 (r'[&_^]', Name.Builtin),
384 ],
385 'root': [
386 (r'\\\[', String.Backtick, 'displaymath'),
387 (r'\\\(', String, 'inlinemath'),
388 (r'\$\$', String.Backtick, 'displaymath'),
389 (r'\$', String, 'inlinemath'),
390 (r'\\([a-zA-Z]+|.)', Keyword, 'command'),
391 include('general'),
392 (r'[^\\$%&_^{}]+', Text),
393 ],
394 'math': [
395 (r'\\([a-zA-Z]+|.)', Name.Variable),
396 include('general'),
397 (r'[0-9]+', Number),
398 (r'[-=!+*/()\[\]]', Operator),
399 (r'[^=!+*/()\[\]\\$%&_^{}0-9-]+', Name.Builtin),
400 ],
401 'inlinemath': [
402 (r'\\\)', String, '#pop'),
403 (r'\$', String, '#pop'),
404 include('math'),
405 ],
406 'displaymath': [
407 (r'\\\]', String, '#pop'),
408 (r'\$\$', String, '#pop'),
409 (r'\$', Name.Builtin),
410 include('math'),
411 ],
412 'command': [
413 (r'\[.*?\]', Name.Attribute),
414 (r'\*', Keyword),
415 (r'', Text, '#pop'),
416 ],
417 }
418
419 def analyse_text(text):
420 for start in ("\\documentclass", "\\input", "\\documentstyle",
421 "\\relax"):
422 if text[:len(start)] == start:
423 return True
424
425
426 class GroffLexer(RegexLexer):
427 """
428 Lexer for the (g)roff typesetting language, supporting groff
429 extensions. Mainly useful for highlighting manpage sources.
430
431 *New in Pygments 0.6.*
432 """
433
434 name = 'Groff'
435 aliases = ['groff', 'nroff', 'man']
436 filenames = ['*.[1234567]', '*.man']
437 mimetypes = ['application/x-troff', 'text/troff']
438
439 tokens = {
440 'root': [
441 (r'(?i)(\.)(\w+)', bygroups(Text, Keyword), 'request'),
442 (r'\.', Punctuation, 'request'),
443 # Regular characters, slurp till we find a backslash or newline
444 (r'[^\\\n]*', Text, 'textline'),
445 ],
446 'textline': [
447 include('escapes'),
448 (r'[^\\\n]+', Text),
449 (r'\n', Text, '#pop'),
450 ],
451 'escapes': [
452 # groff has many ways to write escapes.
453 (r'\\"[^\n]*', Comment),
454 (r'\\[fn]\w', String.Escape),
455 (r'\\\(..', String.Escape),
456 (r'\\.\[.*\]', String.Escape),
457 (r'\\.', String.Escape),
458 (r'\\\n', Text, 'request'),
459 ],
460 'request': [
461 (r'\n', Text, '#pop'),
462 include('escapes'),
463 (r'"[^\n"]+"', String.Double),
464 (r'\d+', Number),
465 (r'\S+', String),
466 (r'\s+', Text),
467 ],
468 }
469
470 def analyse_text(text):
471 if text[:1] != '.':
472 return False
473 if text[:3] == '.\\"':
474 return True
475 if text[:4] == '.TH ':
476 return True
477 if text[1:3].isalnum() and text[3].isspace():
478 return 0.9
479
480
481 class ApacheConfLexer(RegexLexer):
482 """
483 Lexer for configuration files following the Apache config file
484 format.
485
486 *New in Pygments 0.6.*
487 """
488
489 name = 'ApacheConf'
490 aliases = ['apacheconf', 'aconf', 'apache']
491 filenames = ['.htaccess', 'apache.conf', 'apache2.conf']
492 mimetypes = ['text/x-apacheconf']
493 flags = re.MULTILINE | re.IGNORECASE
494
495 tokens = {
496 'root': [
497 (r'\s+', Text),
498 (r'(#.*?)$', Comment),
499 (r'(<[^\s>]+)(?:(\s+)(.*?))?(>)',
500 bygroups(Name.Tag, Text, String, Name.Tag)),
501 (r'([a-zA-Z][a-zA-Z0-9]*)(\s+)',
502 bygroups(Name.Builtin, Text), 'value'),
503 (r'\.+', Text),
504 ],
505 'value': [
506 (r'$', Text, '#pop'),
507 (r'[^\S\n]+', Text),
508 (r'\d+\.\d+\.\d+\.\d+(?:/\d+)?', Number),
509 (r'\d+', Number),
510 (r'/([a-zA-Z0-9][a-zA-Z0-9_./-]+)', String.Other),
511 (r'(on|off|none|any|all|double|email|dns|min|minimal|'
512 r'os|productonly|full|emerg|alert|crit|error|warn|'
513 r'notice|info|debug|registry|script|inetd|standalone|'
514 r'user|group)\b', Keyword),
515 (r'"([^"\\]*(?:\\.[^"\\]*)*)"', String.Double),
516 (r'[^\s"]+', Text)
517 ]
518 }
519
520
521 class MoinWikiLexer(RegexLexer):
522 """
523 For MoinMoin (and Trac) Wiki markup.
524
525 *New in Pygments 0.7.*
526 """
527
528 name = 'MoinMoin/Trac Wiki markup'
529 aliases = ['trac-wiki', 'moin']
530 filenames = []
531 mimetypes = ['text/x-trac-wiki']
532 flags = re.MULTILINE | re.IGNORECASE
533
534 tokens = {
535 'root': [
536 (r'^#.*$', Comment),
537 (r'(!)(\S+)', bygroups(Keyword, Text)), # Ignore-next
538 # Titles
539 (r'^(=+)([^=]+)(=+)(\s*#.+)?$',
540 bygroups(Generic.Heading, using(this), Generic.Heading, String)),
541 # Literal code blocks, with optional shebang
542 (r'({{{)(\n#!.+)?', bygroups(Name.Builtin, Name.Namespace), 'codeblock'),
543 (r'(\'\'\'?|\|\||`|__|~~|\^|,,|::)', Comment), # Formatting
544 # Lists
545 (r'^( +)([.*-])( )', bygroups(Text, Name.Builtin, Text)),
546 (r'^( +)([a-zivx]{1,5}\.)( )', bygroups(Text, Name.Builtin, Text)),
547 # Other Formatting
548 (r'\[\[\w+.*?\]\]', Keyword), # Macro
549 (r'(\[[^\s\]]+)(\s+[^\]]+?)?(\])',
550 bygroups(Keyword, String, Keyword)), # Link
551 (r'^----+$', Keyword), # Horizontal rules
552 (r'[^\n\'\[{!_~^,|]+', Text),
553 (r'\n', Text),
554 (r'.', Text),
555 ],
556 'codeblock': [
557 (r'}}}', Name.Builtin, '#pop'),
558 # these blocks are allowed to be nested in Trac, but not MoinMoin
559 (r'{{{', Text, '#push'),
560 (r'[^{}]+', Comment.Preproc), # slurp boring text
561 (r'.', Comment.Preproc), # allow loose { or }
562 ],
563 }
564
565
566 class RstLexer(RegexLexer):
567 """
568 For `reStructuredText <http://docutils.sf.net/rst.html>`_ markup.
569
570 *New in Pygments 0.7.*
571
572 Additional options accepted:
573
574 `handlecodeblocks`
575 Highlight the contents of ``.. sourcecode:: langauge`` and
576 ``.. code:: language`` directives with a lexer for the given
577 language (default: ``True``). *New in Pygments 0.8.*
578 """
579 name = 'reStructuredText'
580 aliases = ['rst', 'rest', 'restructuredtext']
581 filenames = ['*.rst', '*.rest']
582 mimetypes = ["text/x-rst", "text/prs.fallenstein.rst"]
583 flags = re.MULTILINE
584
585 def _handle_sourcecode(self, match):
586 from pygments.lexers import get_lexer_by_name
587 from pygments.util import ClassNotFound
588
589 # section header
590 yield match.start(1), Punctuation, match.group(1)
591 yield match.start(2), Text, match.group(2)
592 yield match.start(3), Operator.Word, match.group(3)
593 yield match.start(4), Punctuation, match.group(4)
594 yield match.start(5), Text, match.group(5)
595 yield match.start(6), Keyword, match.group(6)
596 yield match.start(7), Text, match.group(7)
597
598 # lookup lexer if wanted and existing
599 lexer = None
600 if self.handlecodeblocks:
601 try:
602 lexer = get_lexer_by_name(match.group(6).strip())
603 except ClassNotFound:
604 pass
605 indention = match.group(8)
606 indention_size = len(indention)
607 code = (indention + match.group(9) + match.group(10) + match.group(11))
608
609 # no lexer for this language. handle it like it was a code block
610 if lexer is None:
611 yield match.start(8), String, code
612 return
613
614 # highlight the lines with the lexer.
615 ins = []
616 codelines = code.splitlines(True)
617 code = ''
618 for line in codelines:
619 if len(line) > indention_size:
620 ins.append((len(code), [(0, Text, line[:indention_size])]))
621 code += line[indention_size:]
622 else:
623 code += line
624 for item in do_insertions(ins, lexer.get_tokens_unprocessed(code)):
625 yield item
626
627 tokens = {
628 'root': [
629 # Heading with overline
630 (r'^(=+|-+|`+|:+|\.+|\'+|"+|~+|\^+|_+|\*+|\++|#+)([ \t]*\n)(.+)(\n)(\1)(\n)',
631 bygroups(Generic.Heading, Text, Generic.Heading,
632 Text, Generic.Heading, Text)),
633 # Plain heading
634 (r'^(\S.*)(\n)(={3,}|-{3,}|`{3,}|:{3,}|\.{3,}|\'{3,}|"{3,}|'
635 r'~{3,}|\^{3,}|_{3,}|\*{3,}|\+{3,}|#{3,})(\n)',
636 bygroups(Generic.Heading, Text, Generic.Heading, Text)),
637 # Bulleted lists
638 (r'^(\s*)([-*+])( .+\n(?:\1 .+\n)*)',
639 bygroups(Text, Number, using(this, state='inline'))),
640 # Numbered lists
641 (r'^(\s*)([0-9#ivxlcmIVXLCM]+\.)( .+\n(?:\1 .+\n)*)',
642 bygroups(Text, Number, using(this, state='inline'))),
643 (r'^(\s*)(\(?[0-9#ivxlcmIVXLCM]+\))( .+\n(?:\1 .+\n)*)',
644 bygroups(Text, Number, using(this, state='inline'))),
645 # Numbered, but keep words at BOL from becoming lists
646 (r'^(\s*)([A-Z]+\.)( .+\n(?:\1 .+\n)+)',
647 bygroups(Text, Number, using(this, state='inline'))),
648 (r'^(\s*)(\(?[A-Za-z]+\))( .+\n(?:\1 .+\n)+)',
649 bygroups(Text, Number, using(this, state='inline'))),
650 # Sourcecode directives
651 (r'^( *\.\.)(\s*)((?:source)?code)(::)([ \t]*)([^\n]+)'
652 r'(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\8.*|)\n)+)',
653 _handle_sourcecode),
654 # A directive
655 (r'^( *\.\.)(\s*)([\w-]+)(::)(?:([ \t]*)(.+))?',
656 bygroups(Punctuation, Text, Operator.Word, Punctuation, Text, Keyword)),
657 # A reference target
658 (r'^( *\.\.)(\s*)([\w\t ]+:)(.*?)$',
659 bygroups(Punctuation, Text, Name.Tag, using(this, state='inline'))),
660 # A footnote target
661 (r'^( *\.\.)(\s*)(\[.+\])(.*?)$',
662 bygroups(Punctuation, Text, Name.Tag, using(this, state='inline'))),
663 # Comments
664 (r'^ *\.\..*(\n( +.*\n|\n)+)?', Comment.Preproc),
665 # Field list
666 (r'^( *)(:.*?:)([ \t]+)(.*?)$', bygroups(Text, Name.Class, Text,
667 Name.Function)),
668 # Definition list
669 (r'^([^ ].*(?<!::)\n)((?:(?: +.*)\n)+)',
670 bygroups(using(this, state='inline'), using(this, state='inline'))),
671 # Code blocks
672 (r'(::)(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\3.*|)\n)+)',
673 bygroups(String.Escape, Text, String, String, Text, String)),
674 include('inline'),
675 ],
676 'inline': [
677 (r'\\.', Text), # escape
678 (r'``', String, 'literal'), # code
679 (r'(`)(.+?)(`__?)',
680 bygroups(Punctuation, using(this), Punctuation)), # reference
681 (r'(`.+?`)(:[a-zA-Z0-9-]+?:)?',
682 bygroups(Name.Variable, Name.Attribute)), # role
683 (r'(:[a-zA-Z0-9-]+?:)(`.+?`)',
684 bygroups(Name.Attribute, Name.Variable)), # user-defined role
685 (r'\*\*.+?\*\*', Generic.Strong), # Strong emphasis
686 (r'\*.+?\*', Generic.Emph), # Emphasis
687 (r'\[.*?\]_', String), # Footnote or citation
688 (r'<.+?>', Name.Tag), # Hyperlink
689 (r'[^\\\n\[*`:]+', Text),
690 (r'.', Text),
691 ],
692 'literal': [
693 (r'[^`\\]+', String),
694 (r'\\.', String),
695 (r'``', String, '#pop'),
696 (r'[`\\]', String),
697 ]
698 }
699
700 def __init__(self, **options):
701 self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True)
702 RegexLexer.__init__(self, **options)
703
704 def analyse_text(text):
705 if text[:2] == '..' and text[2:3] != '.':
706 return 0.3
707 p1 = text.find("\n")
708 p2 = text.find("\n", p1 + 1)
709 if (p2 > -1 and # has two lines
710 p1 * 2 + 1 == p2 and # they are the same length
711 text[p1+1] in '-=' and # the next line both starts and ends with
712 text[p1+1] == text[p2-1]): # ...a sufficiently high header
713 return 0.5
714
715
716 class VimLexer(RegexLexer):
717 """
718 Lexer for VimL script files.
719
720 *New in Pygments 0.8.*
721 """
722 name = 'VimL'
723 aliases = ['vim']
724 filenames = ['*.vim', '.vimrc']
725 mimetypes = ['text/x-vim']
726 flags = re.MULTILINE
727
728 tokens = {
729 'root': [
730 # Who decided that doublequote was a good comment character??
731 (r'^\s*".*', Comment),
732 (r'(?<=\s)"[^\-:.%#=*].*', Comment),
733
734 (r'[ \t]+', Text),
735 # TODO: regexes can have other delims
736 (r'/(\\\\|\\/|[^\n/])*/', String.Regex),
737 (r'"(\\\\|\\"|[^\n"])*"', String.Double),
738 (r"'(\\\\|\\'|[^\n'])*'", String.Single),
739 (r'-?\d+', Number),
740 (r'#[0-9a-f]{6}', Number.Hex),
741 (r'^:', Punctuation),
742 (r'[()<>+=!|,~-]', Punctuation), # Inexact list. Looks decent.
743 (r'\b(let|if|else|endif|elseif|fun|function|endfunction)\b',
744 Keyword),
745 (r'\b(NONE|bold|italic|underline|dark|light)\b', Name.Builtin),
746 (r'\b\w+\b', Name.Other), # These are postprocessed below
747 (r'.', Text),
748 ],
749 }
750 def __init__(self, **options):
751 from pygments.lexers._vimbuiltins import command, option, auto
752 self._cmd = command
753 self._opt = option
754 self._aut = auto
755
756 RegexLexer.__init__(self, **options)
757
758 def is_in(self, w, mapping):
759 r"""
760 It's kind of difficult to decide if something might be a keyword
761 in VimL because it allows you to abbreviate them. In fact,
762 'ab[breviate]' is a good example. :ab, :abbre, or :abbreviate are
763 valid ways to call it so rather than making really awful regexps
764 like::
765
766 \bab(?:b(?:r(?:e(?:v(?:i(?:a(?:t(?:e)?)?)?)?)?)?)?)?\b
767
768 we match `\b\w+\b` and then call is_in() on those tokens. See
769 `scripts/get_vimkw.py` for how the lists are extracted.
770 """
771 p = bisect(mapping, (w,))
772 if p > 0:
773 if mapping[p-1][0] == w[:len(mapping[p-1][0])] and \
774 mapping[p-1][1][:len(w)] == w: return True
775 if p < len(mapping):
776 return mapping[p][0] == w[:len(mapping[p][0])] and \
777 mapping[p][1][:len(w)] == w
778 return False
779
780 def get_tokens_unprocessed(self, text):
781 # TODO: builtins are only subsequent tokens on lines
782 # and 'keywords' only happen at the beginning except
783 # for :au ones
784 for index, token, value in \
785 RegexLexer.get_tokens_unprocessed(self, text):
786 if token is Name.Other:
787 if self.is_in(value, self._cmd):
788 yield index, Keyword, value
789 elif self.is_in(value, self._opt) or \
790 self.is_in(value, self._aut):
791 yield index, Name.Builtin, value
792 else:
793 yield index, Text, value
794 else:
795 yield index, token, value
796
797
798 class GettextLexer(RegexLexer):
799 """
800 Lexer for Gettext catalog files.
801
802 *New in Pygments 0.9.*
803 """
804 name = 'Gettext Catalog'
805 aliases = ['pot', 'po']
806 filenames = ['*.pot', '*.po']
807 mimetypes = ['application/x-gettext', 'text/x-gettext', 'text/gettext']
808
809 tokens = {
810 'root': [
811 (r'^#,\s.*?$', Keyword.Type),
812 (r'^#:\s.*?$', Keyword.Declaration),
813 #(r'^#$', Comment),
814 (r'^(#|#\.\s|#\|\s|#~\s|#\s).*$', Comment.Single),
815 (r'^(")([\w-]*:)(.*")$',
816 bygroups(String, Name.Property, String)),
817 (r'^".*"$', String),
818 (r'^(msgid|msgid_plural|msgstr)(\s+)(".*")$',
819 bygroups(Name.Variable, Text, String)),
820 (r'^(msgstr\[)(\d)(\])(\s+)(".*")$',
821 bygroups(Name.Variable, Number.Integer, Name.Variable, Text, String)),
822 ]
823 }
824
825
826 class SquidConfLexer(RegexLexer):
827 """
828 Lexer for `squid <http://www.squid-cache.org/>`_ configuration files.
829
830 *New in Pygments 0.9.*
831 """
832
833 name = 'SquidConf'
834 aliases = ['squidconf', 'squid.conf', 'squid']
835 filenames = ['squid.conf']
836 mimetypes = ['text/x-squidconf']
837 flags = re.IGNORECASE
838
839 keywords = [ "acl", "always_direct", "announce_host",
840 "announce_period", "announce_port", "announce_to",
841 "anonymize_headers", "append_domain", "as_whois_server",
842 "auth_param_basic", "authenticate_children",
843 "authenticate_program", "authenticate_ttl", "broken_posts",
844 "buffered_logs", "cache_access_log", "cache_announce",
845 "cache_dir", "cache_dns_program", "cache_effective_group",
846 "cache_effective_user", "cache_host", "cache_host_acl",
847 "cache_host_domain", "cache_log", "cache_mem",
848 "cache_mem_high", "cache_mem_low", "cache_mgr",
849 "cachemgr_passwd", "cache_peer", "cache_peer_access",
850 "cahce_replacement_policy", "cache_stoplist",
851 "cache_stoplist_pattern", "cache_store_log", "cache_swap",
852 "cache_swap_high", "cache_swap_log", "cache_swap_low",
853 "client_db", "client_lifetime", "client_netmask",
854 "connect_timeout", "coredump_dir", "dead_peer_timeout",
855 "debug_options", "delay_access", "delay_class",
856 "delay_initial_bucket_level", "delay_parameters",
857 "delay_pools", "deny_info", "dns_children", "dns_defnames",
858 "dns_nameservers", "dns_testnames", "emulate_httpd_log",
859 "err_html_text", "fake_user_agent", "firewall_ip",
860 "forwarded_for", "forward_snmpd_port", "fqdncache_size",
861 "ftpget_options", "ftpget_program", "ftp_list_width",
862 "ftp_passive", "ftp_user", "half_closed_clients",
863 "header_access", "header_replace", "hierarchy_stoplist",
864 "high_response_time_warning", "high_page_fault_warning",
865 "htcp_port", "http_access", "http_anonymizer", "httpd_accel",
866 "httpd_accel_host", "httpd_accel_port",
867 "httpd_accel_uses_host_header", "httpd_accel_with_proxy",
868 "http_port", "http_reply_access", "icp_access",
869 "icp_hit_stale", "icp_port", "icp_query_timeout",
870 "ident_lookup", "ident_lookup_access", "ident_timeout",
871 "incoming_http_average", "incoming_icp_average",
872 "inside_firewall", "ipcache_high", "ipcache_low",
873 "ipcache_size", "local_domain", "local_ip", "logfile_rotate",
874 "log_fqdn", "log_icp_queries", "log_mime_hdrs",
875 "maximum_object_size", "maximum_single_addr_tries",
876 "mcast_groups", "mcast_icp_query_timeout", "mcast_miss_addr",
877 "mcast_miss_encode_key", "mcast_miss_port", "memory_pools",
878 "memory_pools_limit", "memory_replacement_policy",
879 "mime_table", "min_http_poll_cnt", "min_icp_poll_cnt",
880 "minimum_direct_hops", "minimum_object_size",
881 "minimum_retry_timeout", "miss_access", "negative_dns_ttl",
882 "negative_ttl", "neighbor_timeout", "neighbor_type_domain",
883 "netdb_high", "netdb_low", "netdb_ping_period",
884 "netdb_ping_rate", "never_direct", "no_cache",
885 "passthrough_proxy", "pconn_timeout", "pid_filename",
886 "pinger_program", "positive_dns_ttl", "prefer_direct",
887 "proxy_auth", "proxy_auth_realm", "query_icmp", "quick_abort",
888 "quick_abort", "quick_abort_max", "quick_abort_min",
889 "quick_abort_pct", "range_offset_limit", "read_timeout",
890 "redirect_children", "redirect_program",
891 "redirect_rewrites_host_header", "reference_age",
892 "reference_age", "refresh_pattern", "reload_into_ims",
893 "request_body_max_size", "request_size", "request_timeout",
894 "shutdown_lifetime", "single_parent_bypass",
895 "siteselect_timeout", "snmp_access", "snmp_incoming_address",
896 "snmp_port", "source_ping", "ssl_proxy",
897 "store_avg_object_size", "store_objects_per_bucket",
898 "strip_query_terms", "swap_level1_dirs", "swap_level2_dirs",
899 "tcp_incoming_address", "tcp_outgoing_address",
900 "tcp_recv_bufsize", "test_reachability", "udp_hit_obj",
901 "udp_hit_obj_size", "udp_incoming_address",
902 "udp_outgoing_address", "unique_hostname", "unlinkd_program",
903 "uri_whitespace", "useragent_log", "visible_hostname",
904 "wais_relay", "wais_relay_host", "wais_relay_port",
905 ]
906
907 opts = [ "proxy-only", "weight", "ttl", "no-query", "default",
908 "round-robin", "multicast-responder", "on", "off", "all",
909 "deny", "allow", "via", "parent", "no-digest", "heap", "lru",
910 "realm", "children", "credentialsttl", "none", "disable",
911 "offline_toggle", "diskd", "q1", "q2",
912 ]
913
914 actions = [ "shutdown", "info", "parameter", "server_list",
915 "client_list", r'squid\.conf',
916 ]
917
918 actions_stats = [ "objects", "vm_objects", "utilization",
919 "ipcache", "fqdncache", "dns", "redirector", "io",
920 "reply_headers", "filedescriptors", "netdb",
921 ]
922
923 actions_log = [ "status", "enable", "disable", "clear"]
924
925 acls = [ "url_regex", "urlpath_regex", "referer_regex", "port",
926 "proto", "req_mime_type", "rep_mime_type", "method",
927 "browser", "user", "src", "dst", "time", "dstdomain", "ident",
928 "snmp_community",
929 ]
930
931 ip_re = r'\b(?:\d{1,3}\.){3}\d{1,3}\b'
932
933 def makelistre(list):
934 return r'\b(?:'+'|'.join(list)+r')\b'
935
936 tokens = {
937 'root': [
938 (r'\s+', Text),
939 (r'#', Comment, 'comment'),
940 (makelistre(keywords), Keyword),
941 (makelistre(opts), Name.Constant),
942 # Actions
943 (makelistre(actions), String),
944 (r'stats/'+makelistre(actions), String),
945 (r'log/'+makelistre(actions)+r'=', String),
946 (makelistre(acls), Keyword),
947 (ip_re+r'(?:/(?:'+ip_re+r')|\d+)?', Number),
948 (r'\b\d+\b', Number),
949 (r'\S+', Text),
950 ],
951 'comment': [
952 (r'\s*TAG:.*', String.Escape, '#pop'),
953 (r'.*', Comment, '#pop'),
954 ],
955 }
956
957
958 class DebianControlLexer(RegexLexer):
959 """
960 Lexer for Debian ``control`` files and ``apt-cache show <pkg>`` outputs.
961
962 *New in Pygments 0.9.*
963 """
964 name = 'Debian Control file'
965 aliases = ['control']
966 filenames = ['control']
967
968 tokens = {
969 'root': [
970 (r'^(Description)', Keyword, 'description'),
971 (r'^(Maintainer)(:\s*)', bygroups(Keyword, Text), 'maintainer'),
972 (r'^((Build-)?Depends)', Keyword, 'depends'),
973 (r'^((?:Python-)?Version)(:\s*)([^\s]+)$',
974 bygroups(Keyword, Text, Number)),
975 (r'^((?:Installed-)?Size)(:\s*)([^\s]+)$',
976 bygroups(Keyword, Text, Number)),
977 (r'^(MD5Sum|SHA1|SHA256)(:\s*)([^\s]+)$',
978 bygroups(Keyword, Text, Number)),
979 (r'^([a-zA-Z\-0-9\.]*?)(:\s*)(.*?)$',
980 bygroups(Keyword, Whitespace, String)),
981 ],
982 'maintainer': [
983 (r'<[^>]+>', Generic.Strong),
984 (r'<[^>]+>$', Generic.Strong, '#pop'),
985 (r',\n?', Text),
986 (r'.', Text),
987 ],
988 'description': [
989 (r'(.*)(Homepage)(: )([^\s]+)', bygroups(Text, String, Name, Name.Class)),
990 (r':.*\n', Generic.Strong),
991 (r' .*\n', Text),
992 ('', Text, '#pop'),
993 ],
994 'depends': [
995 (r':\s*', Text),
996 (r'(\$)(\{)(\w+\s*:\s*\w+)', bygroups(Operator, Text, Name.Entity)),
997 (r'\(', Text, 'depend_vers'),
998 (r',', Text),
999 (r'\|', Operator),
1000 (r'[\s]+', Text),
1001 (r'[}\)]\s*$', Text, '#pop'),
1002 (r'[}]', Text),
1003 (r'[^,]$', Name.Function, '#pop'),
1004 (r'([\+\.a-zA-Z0-9-][\s\n]*)', Name.Function),
1005 ],
1006 'depend_vers': [
1007 (r'\),', Text, '#pop'),
1008 (r'\)[^,]', Text, '#pop:2'),
1009 (r'([><=]+)(\s*)([^\)]+)', bygroups(Operator, Text, Number))
1010 ]
1011 }
1012
1013
1014 class YamlLexerContext(LexerContext):
1015 """Indentation context for the YAML lexer."""
1016
1017 def __init__(self, *args, **kwds):
1018 super(YamlLexerContext, self).__init__(*args, **kwds)
1019 self.indent_stack = []
1020 self.indent = -1
1021 self.next_indent = 0
1022 self.block_scalar_indent = None
1023
1024
1025 class YamlLexer(ExtendedRegexLexer):
1026 """
1027 Lexer for `YAML <http://yaml.org/>`_, a human-friendly data serialization
1028 language.
1029
1030 *New in Pygments 0.11.*
1031 """
1032
1033 name = 'YAML'
1034 aliases = ['yaml']
1035 filenames = ['*.yaml', '*.yml']
1036 mimetypes = ['text/x-yaml']
1037
1038
1039 def something(token_class):
1040 """Do not produce empty tokens."""
1041 def callback(lexer, match, context):
1042 text = match.group()
1043 if not text:
1044 return
1045 yield match.start(), token_class, text
1046 context.pos = match.end()
1047 return callback
1048
1049 def reset_indent(token_class):
1050 """Reset the indentation levels."""
1051 def callback(lexer, match, context):
1052 text = match.group()
1053 context.indent_stack = []
1054 context.indent = -1
1055 context.next_indent = 0
1056 context.block_scalar_indent = None
1057 yield match.start(), token_class, text
1058 context.pos = match.end()
1059 return callback
1060
1061 def save_indent(token_class, start=False):
1062 """Save a possible indentation level."""
1063 def callback(lexer, match, context):
1064 text = match.group()
1065 extra = ''
1066 if start:
1067 context.next_indent = len(text)
1068 if context.next_indent < context.indent:
1069 while context.next_indent < context.indent:
1070 context.indent = context.indent_stack.pop()
1071 if context.next_indent > context.indent:
1072 extra = text[context.indent:]
1073 text = text[:context.indent]
1074 else:
1075 context.next_indent += len(text)
1076 if text:
1077 yield match.start(), token_class, text
1078 if extra:
1079 yield match.start()+len(text), token_class.Error, extra
1080 context.pos = match.end()
1081 return callback
1082
1083 def set_indent(token_class, implicit=False):
1084 """Set the previously saved indentation level."""
1085 def callback(lexer, match, context):
1086 text = match.group()
1087 if context.indent < context.next_indent:
1088 context.indent_stack.append(context.indent)
1089 context.indent = context.next_indent
1090 if not implicit:
1091 context.next_indent += len(text)
1092 yield match.start(), token_class, text
1093 context.pos = match.end()
1094 return callback
1095
1096 def set_block_scalar_indent(token_class):
1097 """Set an explicit indentation level for a block scalar."""
1098 def callback(lexer, match, context):
1099 text = match.group()
1100 context.block_scalar_indent = None
1101 if not text:
1102 return
1103 increment = match.group(1)
1104 if increment:
1105 current_indent = max(context.indent, 0)
1106 increment = int(increment)
1107 context.block_scalar_indent = current_indent + increment
1108 if text:
1109 yield match.start(), token_class, text
1110 context.pos = match.end()
1111 return callback
1112
1113 def parse_block_scalar_empty_line(indent_token_class, content_token_class):
1114 """Process an empty line in a block scalar."""
1115 def callback(lexer, match, context):
1116 text = match.group()
1117 if (context.block_scalar_indent is None or
1118 len(text) <= context.block_scalar_indent):
1119 if text:
1120 yield match.start(), indent_token_class, text
1121 else:
1122 indentation = text[:context.block_scalar_indent]
1123 content = text[context.block_scalar_indent:]
1124 yield match.start(), indent_token_class, indentation
1125 yield (match.start()+context.block_scalar_indent,
1126 content_token_class, content)
1127 context.pos = match.end()
1128 return callback
1129
1130 def parse_block_scalar_indent(token_class):
1131 """Process indentation spaces in a block scalar."""
1132 def callback(lexer, match, context):
1133 text = match.group()
1134 if context.block_scalar_indent is None:
1135 if len(text) <= max(context.indent, 0):
1136 context.stack.pop()
1137 context.stack.pop()
1138 return
1139 context.block_scalar_indent = len(text)
1140 else:
1141 if len(text) < context.block_scalar_indent:
1142 context.stack.pop()
1143 context.stack.pop()
1144 return
1145 if text:
1146 yield match.start(), token_class, text
1147 context.pos = match.end()
1148 return callback
1149
1150 def parse_plain_scalar_indent(token_class):
1151 """Process indentation spaces in a plain scalar."""
1152 def callback(lexer, match, context):
1153 text = match.group()
1154 if len(text) <= context.indent:
1155 context.stack.pop()
1156 context.stack.pop()
1157 return
1158 if text:
1159 yield match.start(), token_class, text
1160 context.pos = match.end()
1161 return callback
1162
1163
1164
1165 tokens = {
1166 # the root rules
1167 'root': [
1168 # ignored whitespaces
1169 (r'[ ]+(?=#|$)', Text),
1170 # line breaks
1171 (r'\n+', Text),
1172 # a comment
1173 (r'#[^\n]*', Comment.Single),
1174 # the '%YAML' directive
1175 (r'^%YAML(?=[ ]|$)', reset_indent(Name.Tag), 'yaml-directive'),
1176 # the %TAG directive
1177 (r'^%TAG(?=[ ]|$)', reset_indent(Name.Tag), 'tag-directive'),
1178 # document start and document end indicators
1179 (r'^(?:---|\.\.\.)(?=[ ]|$)', reset_indent(Name.Namespace),
1180 'block-line'),
1181 # indentation spaces
1182 (r'[ ]*(?![ \t\n\r\f\v]|$)', save_indent(Text, start=True),
1183 ('block-line', 'indentation')),
1184 ],
1185
1186 # trailing whitespaces after directives or a block scalar indicator
1187 'ignored-line': [
1188 # ignored whitespaces
1189 (r'[ ]+(?=#|$)', Text),
1190 # a comment
1191 (r'#[^\n]*', Comment.Single),
1192 # line break
1193 (r'\n', Text, '#pop:2'),
1194 ],
1195
1196 # the %YAML directive
1197 'yaml-directive': [
1198 # the version number
1199 (r'([ ]+)([0-9]+\.[0-9]+)',
1200 bygroups(Text, Number), 'ignored-line'),
1201 ],
1202
1203 # the %YAG directive
1204 'tag-directive': [
1205 # a tag handle and the corresponding prefix
1206 (r'([ ]+)(!|![0-9A-Za-z_-]*!)'
1207 r'([ ]+)(!|!?[0-9A-Za-z;/?:@&=+$,_.!~*\'()\[\]%-]+)',
1208 bygroups(Text, Keyword.Type, Text, Keyword.Type),
1209 'ignored-line'),
1210 ],
1211
1212 # block scalar indicators and indentation spaces
1213 'indentation': [
1214 # trailing whitespaces are ignored
1215 (r'[ ]*$', something(Text), '#pop:2'),
1216 # whitespaces preceeding block collection indicators
1217 (r'[ ]+(?=[?:-](?:[ ]|$))', save_indent(Text)),
1218 # block collection indicators
1219 (r'[?:-](?=[ ]|$)', set_indent(Punctuation.Indicator)),
1220 # the beginning a block line
1221 (r'[ ]*', save_indent(Text), '#pop'),
1222 ],
1223
1224 # an indented line in the block context
1225 'block-line': [
1226 # the line end
1227 (r'[ ]*(?=#|$)', something(Text), '#pop'),
1228 # whitespaces separating tokens
1229 (r'[ ]+', Text),
1230 # tags, anchors and aliases,
1231 include('descriptors'),
1232 # block collections and scalars
1233 include('block-nodes'),
1234 # flow collections and quoted scalars
1235 include('flow-nodes'),
1236 # a plain scalar
1237 (r'(?=[^ \t\n\r\f\v?:,\[\]{}#&*!|>\'"%@`-]|[?:-][^ \t\n\r\f\v])',
1238 something(Name.Variable),
1239 'plain-scalar-in-block-context'),
1240 ],
1241
1242 # tags, anchors, aliases
1243 'descriptors' : [
1244 # a full-form tag
1245 (r'!<[0-9A-Za-z;/?:@&=+$,_.!~*\'()\[\]%-]+>', Keyword.Type),
1246 # a tag in the form '!', '!suffix' or '!handle!suffix'
1247 (r'!(?:[0-9A-Za-z_-]+)?'
1248 r'(?:![0-9A-Za-z;/?:@&=+$,_.!~*\'()\[\]%-]+)?', Keyword.Type),
1249 # an anchor
1250 (r'&[0-9A-Za-z_-]+', Name.Label),
1251 # an alias
1252 (r'\*[0-9A-Za-z_-]+', Name.Variable),
1253 ],
1254
1255 # block collections and scalars
1256 'block-nodes': [
1257 # implicit key
1258 (r':(?=[ ]|$)', set_indent(Punctuation.Indicator, implicit=True)),
1259 # literal and folded scalars
1260 (r'[|>]', Punctuation.Indicator,
1261 ('block-scalar-content', 'block-scalar-header')),
1262 ],
1263
1264 # flow collections and quoted scalars
1265 'flow-nodes': [
1266 # a flow sequence
1267 (r'\[', Punctuation.Indicator, 'flow-sequence'),
1268 # a flow mapping
1269 (r'\{', Punctuation.Indicator, 'flow-mapping'),
1270 # a single-quoted scalar
1271 (r'\'', String, 'single-quoted-scalar'),
1272 # a double-quoted scalar
1273 (r'\"', String, 'double-quoted-scalar'),
1274 ],
1275
1276 # the content of a flow collection
1277 'flow-collection': [
1278 # whitespaces
1279 (r'[ ]+', Text),
1280 # line breaks
1281 (r'\n+', Text),
1282 # a comment
1283 (r'#[^\n]*', Comment.Single),
1284 # simple indicators
1285 (r'[?:,]', Punctuation.Indicator),
1286 # tags, anchors and aliases
1287 include('descriptors'),
1288 # nested collections and quoted scalars
1289 include('flow-nodes'),
1290 # a plain scalar
1291 (r'(?=[^ \t\n\r\f\v?:,\[\]{}#&*!|>\'"%@`])',
1292 something(Name.Variable),
1293 'plain-scalar-in-flow-context'),
1294 ],
1295
1296 # a flow sequence indicated by '[' and ']'
1297 'flow-sequence': [
1298 # include flow collection rules
1299 include('flow-collection'),
1300 # the closing indicator
1301 (r'\]', Punctuation.Indicator, '#pop'),
1302 ],
1303
1304 # a flow mapping indicated by '{' and '}'
1305 'flow-mapping': [
1306 # include flow collection rules
1307 include('flow-collection'),
1308 # the closing indicator
1309 (r'\}', Punctuation.Indicator, '#pop'),
1310 ],
1311
1312 # block scalar lines
1313 'block-scalar-content': [
1314 # line break
1315 (r'\n', Text),
1316 # empty line
1317 (r'^[ ]+$',
1318 parse_block_scalar_empty_line(Text, Name.Constant)),
1319 # indentation spaces (we may leave the state here)
1320 (r'^[ ]*', parse_block_scalar_indent(Text)),
1321 # line content
1322 (r'[^\n\r\f\v]+', Name.Constant),
1323 ],
1324
1325 # the content of a literal or folded scalar
1326 'block-scalar-header': [
1327 # indentation indicator followed by chomping flag
1328 (r'([1-9])?[+-]?(?=[ ]|$)',
1329 set_block_scalar_indent(Punctuation.Indicator),
1330 'ignored-line'),
1331 # chomping flag followed by indentation indicator
1332 (r'[+-]?([1-9])?(?=[ ]|$)',
1333 set_block_scalar_indent(Punctuation.Indicator),
1334 'ignored-line'),
1335 ],
1336
1337 # ignored and regular whitespaces in quoted scalars
1338 'quoted-scalar-whitespaces': [
1339 # leading and trailing whitespaces are ignored
1340 (r'^[ ]+|[ ]+$', Text),
1341 # line breaks are ignored
1342 (r'\n+', Text),
1343 # other whitespaces are a part of the value
1344 (r'[ ]+', Name.Variable),
1345 ],
1346
1347 # single-quoted scalars
1348 'single-quoted-scalar': [
1349 # include whitespace and line break rules
1350 include('quoted-scalar-whitespaces'),
1351 # escaping of the quote character
1352 (r'\'\'', String.Escape),
1353 # regular non-whitespace characters
1354 (r'[^ \t\n\r\f\v\']+', String),
1355 # the closing quote
1356 (r'\'', String, '#pop'),
1357 ],
1358
1359 # double-quoted scalars
1360 'double-quoted-scalar': [
1361 # include whitespace and line break rules
1362 include('quoted-scalar-whitespaces'),
1363 # escaping of special characters
1364 (r'\\[0abt\tn\nvfre "\\N_LP]', String),
1365 # escape codes
1366 (r'\\(?:x[0-9A-Fa-f]{2}|u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})',
1367 String.Escape),
1368 # regular non-whitespace characters
1369 (r'[^ \t\n\r\f\v\"\\]+', String),
1370 # the closing quote
1371 (r'"', String, '#pop'),
1372 ],
1373
1374 # the beginning of a new line while scanning a plain scalar
1375 'plain-scalar-in-block-context-new-line': [
1376 # empty lines
1377 (r'^[ ]+$', Text),
1378 # line breaks
1379 (r'\n+', Text),
1380 # document start and document end indicators
1381 (r'^(?=---|\.\.\.)', something(Name.Namespace), '#pop:3'),
1382 # indentation spaces (we may leave the block line state here)
1383 (r'^[ ]*', parse_plain_scalar_indent(Text), '#pop'),
1384 ],
1385
1386 # a plain scalar in the block context
1387 'plain-scalar-in-block-context': [
1388 # the scalar ends with the ':' indicator
1389 (r'[ ]*(?=:[ ]|:$)', something(Text), '#pop'),
1390 # the scalar ends with whitespaces followed by a comment
1391 (r'[ ]+(?=#)', Text, '#pop'),
1392 # trailing whitespaces are ignored
1393 (r'[ ]+$', Text),
1394 # line breaks are ignored
1395 (r'\n+', Text, 'plain-scalar-in-block-context-new-line'),
1396 # other whitespaces are a part of the value
1397 (r'[ ]+', Literal.Scalar.Plain),
1398 # regular non-whitespace characters
1399 (r'(?::(?![ \t\n\r\f\v])|[^ \t\n\r\f\v:])+', Literal.Scalar.Plain),
1400 ],
1401
1402 # a plain scalar is the flow context
1403 'plain-scalar-in-flow-context': [
1404 # the scalar ends with an indicator character
1405 (r'[ ]*(?=[,:?\[\]{}])', something(Text), '#pop'),
1406 # the scalar ends with a comment
1407 (r'[ ]+(?=#)', Text, '#pop'),
1408 # leading and trailing whitespaces are ignored
1409 (r'^[ ]+|[ ]+$', Text),
1410 # line breaks are ignored
1411 (r'\n+', Text),
1412 # other whitespaces are a part of the value
1413 (r'[ ]+', Name.Variable),
1414 # regular non-whitespace characters
1415 (r'[^ \t\n\r\f\v,:?\[\]{}]+', Name.Variable),
1416 ],
1417
1418 }
1419
1420 def get_tokens_unprocessed(self, text=None, context=None):
1421 if context is None:
1422 context = YamlLexerContext(text, 0)
1423 return super(YamlLexer, self).get_tokens_unprocessed(text, context)
1424
1425
1426 class LighttpdConfLexer(RegexLexer):
1427 """
1428 Lexer for `Lighttpd <http://lighttpd.net/>`_ configuration files.
1429
1430 *New in Pygments 0.11.*
1431 """
1432 name = 'Lighttpd configuration file'
1433 aliases = ['lighty', 'lighttpd']
1434 filenames = []
1435 mimetypes = ['text/x-lighttpd-conf']
1436
1437 tokens = {
1438 'root': [
1439 (r'#.*\n', Comment.Single),
1440 (r'/\S*', Name), # pathname
1441 (r'[a-zA-Z._-]+', Keyword),
1442 (r'\d+\.\d+\.\d+\.\d+(?:/\d+)?', Number),
1443 (r'[0-9]+', Number),
1444 (r'=>|=~|\+=|==|=|\+', Operator),
1445 (r'\$[A-Z]+', Name.Builtin),
1446 (r'[(){}\[\],]', Punctuation),
1447 (r'"([^"\\]*(?:\\.[^"\\]*)*)"', String.Double),
1448 (r'\s+', Text),
1449 ],
1450
1451 }
1452
1453
1454 class NginxConfLexer(RegexLexer):
1455 """
1456 Lexer for `Nginx <http://nginx.net/>`_ configuration files.
1457
1458 *New in Pygments 0.11.*
1459 """
1460 name = 'Nginx configuration file'
1461 aliases = ['nginx']
1462 filenames = []
1463 mimetypes = ['text/x-nginx-conf']
1464
1465 tokens = {
1466 'root': [
1467 (r'(include)(\s+)([^\s;]+)', bygroups(Keyword, Text, Name)),
1468 (r'[^\s;#]+', Keyword, 'stmt'),
1469 include('base'),
1470 ],
1471 'block': [
1472 (r'}', Punctuation, '#pop:2'),
1473 (r'[^\s;#]+', Keyword.Namespace, 'stmt'),
1474 include('base'),
1475 ],
1476 'stmt': [
1477 (r'{', Punctuation, 'block'),
1478 (r';', Punctuation, '#pop'),
1479 include('base'),
1480 ],
1481 'base': [
1482 (r'#.*\n', Comment.Single),
1483 (r'on|off', Name.Constant),
1484 (r'\$[^\s;#()]+', Name.Variable),
1485 (r'([a-z0-9.-]+)(:)([0-9]+)',
1486 bygroups(Name, Punctuation, Number.Integer)),
1487 (r'[a-z-]+/[a-z-+]+', String), # mimetype
1488 #(r'[a-zA-Z._-]+', Keyword),
1489 (r'[0-9]+[km]?\b', Number.Integer),
1490 (r'(~)(\s*)([^\s{]+)', bygroups(Punctuation, Text, String.Regex)),
1491 (r'[:=~]', Punctuation),
1492 (r'[^\s;#{}$]+', String), # catch all
1493 (r'/[^\s;#]*', Name), # pathname
1494 (r'\s+', Text),
1495 ],
1496 }

eric ide

mercurial