3 pygments.lexers.textfmts |
3 pygments.lexers.textfmts |
4 ~~~~~~~~~~~~~~~~~~~~~~~~ |
4 ~~~~~~~~~~~~~~~~~~~~~~~~ |
5 |
5 |
6 Lexers for various text formats. |
6 Lexers for various text formats. |
7 |
7 |
8 :copyright: Copyright 2006-2017 by the Pygments team, see AUTHORS. |
8 :copyright: Copyright 2006-2019 by the Pygments team, see AUTHORS. |
9 :license: BSD, see LICENSE for details. |
9 :license: BSD, see LICENSE for details. |
10 """ |
10 """ |
11 |
11 |
12 import re |
12 import re |
13 |
13 |
14 from pygments.lexer import RegexLexer, bygroups |
14 from pygments.lexers import guess_lexer, get_lexer_by_name |
|
15 from pygments.lexer import RegexLexer, bygroups, default, include |
15 from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ |
16 from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ |
16 Number, Generic, Literal |
17 Number, Generic, Literal, Punctuation |
17 from pygments.util import ClassNotFound |
18 from pygments.util import ClassNotFound |
18 |
19 |
19 __all__ = ['IrcLogsLexer', 'TodotxtLexer', 'HttpLexer', 'GettextLexer'] |
20 __all__ = ['IrcLogsLexer', 'TodotxtLexer', 'HttpLexer', 'GettextLexer', |
|
21 'NotmuchLexer', 'KernelLogLexer'] |
20 |
22 |
21 |
23 |
22 class IrcLogsLexer(RegexLexer): |
24 class IrcLogsLexer(RegexLexer): |
23 """ |
25 """ |
24 Lexer for IRC logs in *irssi*, *xchat* or *weechat* style. |
26 Lexer for IRC logs in *irssi*, *xchat* or *weechat* style. |
171 yield offset, Text, content |
173 yield offset, Text, content |
172 |
174 |
173 tokens = { |
175 tokens = { |
174 'root': [ |
176 'root': [ |
175 (r'(GET|POST|PUT|DELETE|HEAD|OPTIONS|TRACE|PATCH)( +)([^ ]+)( +)' |
177 (r'(GET|POST|PUT|DELETE|HEAD|OPTIONS|TRACE|PATCH)( +)([^ ]+)( +)' |
176 r'(HTTP)(/)(1\.[01])(\r?\n|\Z)', |
178 r'(HTTP)(/)(1\.[01]|2|3)(\r?\n|\Z)', |
177 bygroups(Name.Function, Text, Name.Namespace, Text, |
179 bygroups(Name.Function, Text, Name.Namespace, Text, |
178 Keyword.Reserved, Operator, Number, Text), |
180 Keyword.Reserved, Operator, Number, Text), |
179 'headers'), |
181 'headers'), |
180 (r'(HTTP)(/)(1\.[01])( +)(\d{3})( +)([^\r\n]+)(\r?\n|\Z)', |
182 (r'(HTTP)(/)(1\.[01]|2|3)( +)(\d{3})(?:( +)([^\r\n]+))?(\r?\n|\Z)', |
181 bygroups(Keyword.Reserved, Operator, Number, Text, Number, |
183 bygroups(Keyword.Reserved, Operator, Number, Text, Number, Text, |
182 Text, Name.Exception, Text), |
184 Name.Exception, Text), |
183 'headers'), |
185 'headers'), |
184 ], |
186 ], |
185 'headers': [ |
187 'headers': [ |
186 (r'([^\s:]+)( *)(:)( *)([^\r\n]+)(\r?\n|\Z)', header_callback), |
188 (r'([^\s:]+)( *)(:)( *)([^\r\n]+)(\r?\n|\Z)', header_callback), |
187 (r'([\t ]+)([^\r\n]+)(\r?\n|\Z)', continuous_header_callback), |
189 (r'([\t ]+)([^\r\n]+)(\r?\n|\Z)', continuous_header_callback), |
293 (r'\S+', IncompleteTaskText), |
295 (r'\S+', IncompleteTaskText), |
294 # Tokenize whitespace not containing a newline |
296 # Tokenize whitespace not containing a newline |
295 (r'\s+', IncompleteTaskText), |
297 (r'\s+', IncompleteTaskText), |
296 ], |
298 ], |
297 } |
299 } |
|
300 |
|
301 |
|
302 class NotmuchLexer(RegexLexer): |
|
303 """ |
|
304 For `Notmuch <https://notmuchmail.org/>`_ email text format. |
|
305 |
|
306 .. versionadded:: 2.5 |
|
307 |
|
308 Additional options accepted: |
|
309 |
|
310 `body_lexer` |
|
311 If given, highlight the contents of the message body with the specified |
|
312 lexer, else guess it according to the body content (default: ``None``). |
|
313 """ |
|
314 |
|
315 name = 'Notmuch' |
|
316 aliases = ['notmuch'] |
|
317 |
|
318 def _highlight_code(self, match): |
|
319 code = match.group(1) |
|
320 |
|
321 try: |
|
322 if self.body_lexer: |
|
323 lexer = get_lexer_by_name(self.body_lexer) |
|
324 else: |
|
325 lexer = guess_lexer(code.strip()) |
|
326 except ClassNotFound: |
|
327 lexer = get_lexer_by_name('text') |
|
328 |
|
329 for item in lexer.get_tokens_unprocessed(code): |
|
330 yield item |
|
331 |
|
332 tokens = { |
|
333 'root': [ |
|
334 (r'\fmessage{\s*', Keyword, ('message', 'message-attr')), |
|
335 ], |
|
336 'message-attr': [ |
|
337 (r'(\s*id:\s*)([^\s]+)', bygroups(Name.Attribute, String)), |
|
338 (r'(\s*(?:depth|match|excluded):\s*)(\d+)', |
|
339 bygroups(Name.Attribute, Number.Integer)), |
|
340 (r'(\s*filename:\s*)(.+\n)', |
|
341 bygroups(Name.Attribute, String)), |
|
342 default('#pop'), |
|
343 ], |
|
344 'message': [ |
|
345 (r'\fmessage}\n', Keyword, '#pop'), |
|
346 (r'\fheader{\n', Keyword, 'header'), |
|
347 (r'\fbody{\n', Keyword, 'body'), |
|
348 ], |
|
349 'header': [ |
|
350 (r'\fheader}\n', Keyword, '#pop'), |
|
351 (r'((?:Subject|From|To|Cc|Date):\s*)(.*\n)', |
|
352 bygroups(Name.Attribute, String)), |
|
353 (r'(.*)(\s*\(.*\))(\s*\(.*\)\n)', |
|
354 bygroups(Generic.Strong, Literal, Name.Tag)), |
|
355 ], |
|
356 'body': [ |
|
357 (r'\fpart{\n', Keyword, 'part'), |
|
358 (r'\f(part|attachment){\s*', Keyword, ('part', 'part-attr')), |
|
359 (r'\fbody}\n', Keyword, '#pop'), |
|
360 ], |
|
361 'part-attr': [ |
|
362 (r'(ID:\s*)(\d+)', bygroups(Name.Attribute, Number.Integer)), |
|
363 (r'(,\s*)((?:Filename|Content-id):\s*)([^,]+)', |
|
364 bygroups(Punctuation, Name.Attribute, String)), |
|
365 (r'(,\s*)(Content-type:\s*)(.+\n)', |
|
366 bygroups(Punctuation, Name.Attribute, String)), |
|
367 default('#pop'), |
|
368 ], |
|
369 'part': [ |
|
370 (r'\f(?:part|attachment)}\n', Keyword, '#pop'), |
|
371 (r'\f(?:part|attachment){\s*', Keyword, ('#push', 'part-attr')), |
|
372 (r'^Non-text part: .*\n', Comment), |
|
373 (r'(?s)(.*?(?=\f(?:part|attachment)}\n))', _highlight_code), |
|
374 ], |
|
375 } |
|
376 |
|
377 def analyse_text(text): |
|
378 return 1.0 if text.startswith('\fmessage{') else 0.0 |
|
379 |
|
380 def __init__(self, **options): |
|
381 self.body_lexer = options.get('body_lexer', None) |
|
382 RegexLexer.__init__(self, **options) |
|
383 |
|
384 |
|
385 class KernelLogLexer(RegexLexer): |
|
386 """ |
|
387 For Linux Kernel log ("dmesg") output. |
|
388 |
|
389 .. versionadded:: 2.6 |
|
390 """ |
|
391 name = 'Kernel log' |
|
392 aliases = ['kmsg', 'dmesg'] |
|
393 filenames = ['*.kmsg', '*.dmesg'] |
|
394 |
|
395 tokens = { |
|
396 'root': [ |
|
397 (r'^[^:]+:debug : (?=\[)', Text, 'debug'), |
|
398 (r'^[^:]+:info : (?=\[)', Text, 'info'), |
|
399 (r'^[^:]+:warn : (?=\[)', Text, 'warn'), |
|
400 (r'^[^:]+:notice: (?=\[)', Text, 'warn'), |
|
401 (r'^[^:]+:err : (?=\[)', Text, 'error'), |
|
402 (r'^[^:]+:crit : (?=\[)', Text, 'error'), |
|
403 (r'^(?=\[)', Text, 'unknown'), |
|
404 ], |
|
405 'unknown': [ |
|
406 (r'^(?=.+(warning|notice|audit|deprecated))', Text, 'warn'), |
|
407 (r'^(?=.+(error|critical|fail|Bug))', Text, 'error'), |
|
408 default('info'), |
|
409 ], |
|
410 'base': [ |
|
411 (r'\[[0-9\. ]+\] ', Number), |
|
412 (r'(?<=\] ).+?:', Keyword), |
|
413 (r'\n', Text, '#pop'), |
|
414 ], |
|
415 'debug': [ |
|
416 include('base'), |
|
417 (r'.+\n', Comment, '#pop') |
|
418 ], |
|
419 'info': [ |
|
420 include('base'), |
|
421 (r'.+\n', Text, '#pop') |
|
422 ], |
|
423 'warn': [ |
|
424 include('base'), |
|
425 (r'.+\n', Generic.Strong, '#pop') |
|
426 ], |
|
427 'error': [ |
|
428 include('base'), |
|
429 (r'.+\n', Generic.Error, '#pop') |
|
430 ] |
|
431 } |