eric6/ThirdParty/Pygments/pygments/lexers/textfmts.py

changeset 7983
54c5cfbb1e29
parent 7701
25f42e208e08
--- a/eric6/ThirdParty/Pygments/pygments/lexers/textfmts.py	Thu Jan 14 18:06:56 2021 +0100
+++ b/eric6/ThirdParty/Pygments/pygments/lexers/textfmts.py	Thu Jan 14 18:14:15 2021 +0100
@@ -1,430 +1,430 @@
-# -*- coding: utf-8 -*-
-"""
-    pygments.lexers.textfmts
-    ~~~~~~~~~~~~~~~~~~~~~~~~
-
-    Lexers for various text formats.
-
-    :copyright: Copyright 2006-2020 by the Pygments team, see AUTHORS.
-    :license: BSD, see LICENSE for details.
-"""
-
-import re
-
-from pygments.lexers import guess_lexer, get_lexer_by_name
-from pygments.lexer import RegexLexer, bygroups, default, include
-from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
-    Number, Generic, Literal, Punctuation
-from pygments.util import ClassNotFound
-
-__all__ = ['IrcLogsLexer', 'TodotxtLexer', 'HttpLexer', 'GettextLexer',
-           'NotmuchLexer', 'KernelLogLexer']
-
-
-class IrcLogsLexer(RegexLexer):
-    """
-    Lexer for IRC logs in *irssi*, *xchat* or *weechat* style.
-    """
-
-    name = 'IRC logs'
-    aliases = ['irc']
-    filenames = ['*.weechatlog']
-    mimetypes = ['text/x-irclog']
-
-    flags = re.VERBOSE | re.MULTILINE
-    timestamp = r"""
-        (
-          # irssi / xchat and others
-          (?: \[|\()?                  # Opening bracket or paren for the timestamp
-            (?:                        # Timestamp
-                (?: (?:\d{1,4} [-/])*  # Date as - or /-separated groups of digits
-                    (?:\d{1,4})
-                 [T ])?                # Date/time separator: T or space
-                (?: \d?\d [:.])*       # Time as :/.-separated groups of 1 or 2 digits
-                    (?: \d?\d)
-            )
-          (?: \]|\))?\s+               # Closing bracket or paren for the timestamp
-        |
-          # weechat
-          \d{4}\s\w{3}\s\d{2}\s        # Date
-          \d{2}:\d{2}:\d{2}\s+         # Time + Whitespace
-        |
-          # xchat
-          \w{3}\s\d{2}\s               # Date
-          \d{2}:\d{2}:\d{2}\s+         # Time + Whitespace
-        )?
-    """
-    tokens = {
-        'root': [
-            # log start/end
-            (r'^\*\*\*\*(.*)\*\*\*\*$', Comment),
-            # hack
-            ("^" + timestamp + r'(\s*<[^>]*>\s*)$', bygroups(Comment.Preproc, Name.Tag)),
-            # normal msgs
-            ("^" + timestamp + r"""
-                (\s*<.*?>\s*)          # Nick """,
-             bygroups(Comment.Preproc, Name.Tag), 'msg'),
-            # /me msgs
-            ("^" + timestamp + r"""
-                (\s*[*]\s+)            # Star
-                (\S+\s+.*?\n)          # Nick + rest of message """,
-             bygroups(Comment.Preproc, Keyword, Generic.Inserted)),
-            # join/part msgs
-            ("^" + timestamp + r"""
-                (\s*(?:\*{3}|<?-[!@=P]?->?)\s*)  # Star(s) or symbols
-                (\S+\s+)                     # Nick + Space
-                (.*?\n)                         # Rest of message """,
-             bygroups(Comment.Preproc, Keyword, String, Comment)),
-            (r"^.*?\n", Text),
-        ],
-        'msg': [
-            (r"\S+:(?!//)", Name.Attribute),  # Prefix
-            (r".*\n", Text, '#pop'),
-        ],
-    }
-
-
-class GettextLexer(RegexLexer):
-    """
-    Lexer for Gettext catalog files.
-
-    .. versionadded:: 0.9
-    """
-    name = 'Gettext Catalog'
-    aliases = ['pot', 'po']
-    filenames = ['*.pot', '*.po']
-    mimetypes = ['application/x-gettext', 'text/x-gettext', 'text/gettext']
-
-    tokens = {
-        'root': [
-            (r'^#,\s.*?$', Keyword.Type),
-            (r'^#:\s.*?$', Keyword.Declaration),
-            # (r'^#$', Comment),
-            (r'^(#|#\.\s|#\|\s|#~\s|#\s).*$', Comment.Single),
-            (r'^(")([A-Za-z-]+:)(.*")$',
-             bygroups(String, Name.Property, String)),
-            (r'^".*"$', String),
-            (r'^(msgid|msgid_plural|msgstr|msgctxt)(\s+)(".*")$',
-             bygroups(Name.Variable, Text, String)),
-            (r'^(msgstr\[)(\d)(\])(\s+)(".*")$',
-             bygroups(Name.Variable, Number.Integer, Name.Variable, Text, String)),
-        ]
-    }
-
-
-class HttpLexer(RegexLexer):
-    """
-    Lexer for HTTP sessions.
-
-    .. versionadded:: 1.5
-    """
-
-    name = 'HTTP'
-    aliases = ['http']
-
-    flags = re.DOTALL
-
-    def get_tokens_unprocessed(self, text, stack=('root',)):
-        """Reset the content-type state."""
-        self.content_type = None
-        return RegexLexer.get_tokens_unprocessed(self, text, stack)
-
-    def header_callback(self, match):
-        if match.group(1).lower() == 'content-type':
-            content_type = match.group(5).strip()
-            if ';' in content_type:
-                content_type = content_type[:content_type.find(';')].strip()
-            self.content_type = content_type
-        yield match.start(1), Name.Attribute, match.group(1)
-        yield match.start(2), Text, match.group(2)
-        yield match.start(3), Operator, match.group(3)
-        yield match.start(4), Text, match.group(4)
-        yield match.start(5), Literal, match.group(5)
-        yield match.start(6), Text, match.group(6)
-
-    def continuous_header_callback(self, match):
-        yield match.start(1), Text, match.group(1)
-        yield match.start(2), Literal, match.group(2)
-        yield match.start(3), Text, match.group(3)
-
-    def content_callback(self, match):
-        content_type = getattr(self, 'content_type', None)
-        content = match.group()
-        offset = match.start()
-        if content_type:
-            from pygments.lexers import get_lexer_for_mimetype
-            possible_lexer_mimetypes = [content_type]
-            if '+' in content_type:
-                # application/calendar+xml can be treated as application/xml
-                # if there's not a better match.
-                general_type = re.sub(r'^(.*)/.*\+(.*)$', r'\1/\2',
-                                      content_type)
-                possible_lexer_mimetypes.append(general_type)
-
-            for i in possible_lexer_mimetypes:
-                try:
-                    lexer = get_lexer_for_mimetype(i)
-                except ClassNotFound:
-                    pass
-                else:
-                    for idx, token, value in lexer.get_tokens_unprocessed(content):
-                        yield offset + idx, token, value
-                    return
-        yield offset, Text, content
-
-    tokens = {
-        'root': [
-            (r'(GET|POST|PUT|DELETE|HEAD|OPTIONS|TRACE|PATCH)( +)([^ ]+)( +)'
-             r'(HTTP)(/)(1\.[01]|2(?:\.0)?|3)(\r?\n|\Z)',
-             bygroups(Name.Function, Text, Name.Namespace, Text,
-                      Keyword.Reserved, Operator, Number, Text),
-             'headers'),
-            (r'(HTTP)(/)(1\.[01]|2(?:\.0)?|3)( +)(\d{3})(?:( +)([^\r\n]*))?(\r?\n|\Z)',
-             bygroups(Keyword.Reserved, Operator, Number, Text, Number, Text,
-                      Name.Exception, Text),
-             'headers'),
-        ],
-        'headers': [
-            (r'([^\s:]+)( *)(:)( *)([^\r\n]+)(\r?\n|\Z)', header_callback),
-            (r'([\t ]+)([^\r\n]+)(\r?\n|\Z)', continuous_header_callback),
-            (r'\r?\n', Text, 'content')
-        ],
-        'content': [
-            (r'.+', content_callback)
-        ]
-    }
-
-    def analyse_text(text):
-        return text.startswith(('GET /', 'POST /', 'PUT /', 'DELETE /', 'HEAD /',
-                                'OPTIONS /', 'TRACE /', 'PATCH /'))
-
-
-class TodotxtLexer(RegexLexer):
-    """
-    Lexer for `Todo.txt <http://todotxt.com/>`_ todo list format.
-
-    .. versionadded:: 2.0
-    """
-
-    name = 'Todotxt'
-    aliases = ['todotxt']
-    # *.todotxt is not a standard extension for Todo.txt files; including it
-    # makes testing easier, and also makes autodetecting file type easier.
-    filenames = ['todo.txt', '*.todotxt']
-    mimetypes = ['text/x-todo']
-
-    # Aliases mapping standard token types of Todo.txt format concepts
-    CompleteTaskText = Operator  # Chosen to de-emphasize complete tasks
-    IncompleteTaskText = Text    # Incomplete tasks should look like plain text
-
-    # Priority should have most emphasis to indicate importance of tasks
-    Priority = Generic.Heading
-    # Dates should have next most emphasis because time is important
-    Date = Generic.Subheading
-
-    # Project and context should have equal weight, and be in different colors
-    Project = Generic.Error
-    Context = String
-
-    # If tag functionality is added, it should have the same weight as Project
-    # and Context, and a different color. Generic.Traceback would work well.
-
-    # Regex patterns for building up rules; dates, priorities, projects, and
-    # contexts are all atomic
-    # TODO: Make date regex more ISO 8601 compliant
-    date_regex = r'\d{4,}-\d{2}-\d{2}'
-    priority_regex = r'\([A-Z]\)'
-    project_regex = r'\+\S+'
-    context_regex = r'@\S+'
-
-    # Compound regex expressions
-    complete_one_date_regex = r'(x )(' + date_regex + r')'
-    complete_two_date_regex = (complete_one_date_regex + r'( )(' +
-                               date_regex + r')')
-    priority_date_regex = r'(' + priority_regex + r')( )(' + date_regex + r')'
-
-    tokens = {
-        # Should parse starting at beginning of line; each line is a task
-        'root': [
-            # Complete task entry points: two total:
-            # 1. Complete task with two dates
-            (complete_two_date_regex, bygroups(CompleteTaskText, Date,
-                                               CompleteTaskText, Date),
-             'complete'),
-            # 2. Complete task with one date
-            (complete_one_date_regex, bygroups(CompleteTaskText, Date),
-             'complete'),
-
-            # Incomplete task entry points: six total:
-            # 1. Priority plus date
-            (priority_date_regex, bygroups(Priority, IncompleteTaskText, Date),
-             'incomplete'),
-            # 2. Priority only
-            (priority_regex, Priority, 'incomplete'),
-            # 3. Leading date
-            (date_regex, Date, 'incomplete'),
-            # 4. Leading context
-            (context_regex, Context, 'incomplete'),
-            # 5. Leading project
-            (project_regex, Project, 'incomplete'),
-            # 6. Non-whitespace catch-all
-            (r'\S+', IncompleteTaskText, 'incomplete'),
-        ],
-
-        # Parse a complete task
-        'complete': [
-            # Newline indicates end of task, should return to root
-            (r'\s*\n', CompleteTaskText, '#pop'),
-            # Tokenize contexts and projects
-            (context_regex, Context),
-            (project_regex, Project),
-            # Tokenize non-whitespace text
-            (r'\S+', CompleteTaskText),
-            # Tokenize whitespace not containing a newline
-            (r'\s+', CompleteTaskText),
-        ],
-
-        # Parse an incomplete task
-        'incomplete': [
-            # Newline indicates end of task, should return to root
-            (r'\s*\n', IncompleteTaskText, '#pop'),
-            # Tokenize contexts and projects
-            (context_regex, Context),
-            (project_regex, Project),
-            # Tokenize non-whitespace text
-            (r'\S+', IncompleteTaskText),
-            # Tokenize whitespace not containing a newline
-            (r'\s+', IncompleteTaskText),
-        ],
-    }
-
-
-class NotmuchLexer(RegexLexer):
-    """
-    For `Notmuch <https://notmuchmail.org/>`_ email text format.
-
-    .. versionadded:: 2.5
-
-    Additional options accepted:
-
-    `body_lexer`
-        If given, highlight the contents of the message body with the specified
-        lexer, else guess it according to the body content (default: ``None``).
-    """
-
-    name = 'Notmuch'
-    aliases = ['notmuch']
-
-    def _highlight_code(self, match):
-        code = match.group(1)
-
-        try:
-            if self.body_lexer:
-                lexer = get_lexer_by_name(self.body_lexer)
-            else:
-                lexer = guess_lexer(code.strip())
-        except ClassNotFound:
-            lexer = get_lexer_by_name('text')
-
-        yield from lexer.get_tokens_unprocessed(code)
-
-    tokens = {
-        'root': [
-            (r'\fmessage\{\s*', Keyword, ('message', 'message-attr')),
-        ],
-        'message-attr': [
-            (r'(\s*id:\s*)(\S+)', bygroups(Name.Attribute, String)),
-            (r'(\s*(?:depth|match|excluded):\s*)(\d+)',
-             bygroups(Name.Attribute, Number.Integer)),
-            (r'(\s*filename:\s*)(.+\n)',
-             bygroups(Name.Attribute, String)),
-            default('#pop'),
-        ],
-        'message': [
-            (r'\fmessage\}\n', Keyword, '#pop'),
-            (r'\fheader\{\n', Keyword, 'header'),
-            (r'\fbody\{\n', Keyword, 'body'),
-        ],
-        'header': [
-            (r'\fheader\}\n', Keyword, '#pop'),
-            (r'((?:Subject|From|To|Cc|Date):\s*)(.*\n)',
-             bygroups(Name.Attribute, String)),
-            (r'(.*)(\s*\(.*\))(\s*\(.*\)\n)',
-             bygroups(Generic.Strong, Literal, Name.Tag)),
-        ],
-        'body': [
-            (r'\fpart\{\n', Keyword, 'part'),
-            (r'\f(part|attachment)\{\s*', Keyword, ('part', 'part-attr')),
-            (r'\fbody\}\n', Keyword, '#pop'),
-        ],
-        'part-attr': [
-            (r'(ID:\s*)(\d+)', bygroups(Name.Attribute, Number.Integer)),
-            (r'(,\s*)((?:Filename|Content-id):\s*)([^,]+)',
-             bygroups(Punctuation, Name.Attribute, String)),
-            (r'(,\s*)(Content-type:\s*)(.+\n)',
-             bygroups(Punctuation, Name.Attribute, String)),
-            default('#pop'),
-        ],
-        'part': [
-            (r'\f(?:part|attachment)\}\n', Keyword, '#pop'),
-            (r'\f(?:part|attachment)\{\s*', Keyword, ('#push', 'part-attr')),
-            (r'^Non-text part: .*\n', Comment),
-            (r'(?s)(.*?(?=\f(?:part|attachment)\}\n))', _highlight_code),
-        ],
-    }
-
-    def analyse_text(text):
-        return 1.0 if text.startswith('\fmessage{') else 0.0
-
-    def __init__(self, **options):
-        self.body_lexer = options.get('body_lexer', None)
-        RegexLexer.__init__(self, **options)
-
-
-class KernelLogLexer(RegexLexer):
-    """
-    For Linux Kernel log ("dmesg") output.
-
-    .. versionadded:: 2.6
-    """
-    name = 'Kernel log'
-    aliases = ['kmsg', 'dmesg']
-    filenames = ['*.kmsg', '*.dmesg']
-
-    tokens = {
-        'root': [
-            (r'^[^:]+:debug : (?=\[)', Text, 'debug'),
-            (r'^[^:]+:info  : (?=\[)', Text, 'info'),
-            (r'^[^:]+:warn  : (?=\[)', Text, 'warn'),
-            (r'^[^:]+:notice: (?=\[)', Text, 'warn'),
-            (r'^[^:]+:err   : (?=\[)', Text, 'error'),
-            (r'^[^:]+:crit  : (?=\[)', Text, 'error'),
-            (r'^(?=\[)', Text, 'unknown'),
-        ],
-        'unknown': [
-            (r'^(?=.+(warning|notice|audit|deprecated))', Text, 'warn'),
-            (r'^(?=.+(error|critical|fail|Bug))', Text, 'error'),
-            default('info'),
-        ],
-        'base': [
-            (r'\[[0-9. ]+\] ', Number),
-            (r'(?<=\] ).+?:', Keyword),
-            (r'\n', Text, '#pop'),
-        ],
-        'debug': [
-            include('base'),
-            (r'.+\n', Comment, '#pop')
-        ],
-        'info': [
-            include('base'),
-            (r'.+\n', Text, '#pop')
-        ],
-        'warn': [
-            include('base'),
-            (r'.+\n', Generic.Strong, '#pop')
-        ],
-        'error': [
-            include('base'),
-            (r'.+\n', Generic.Error, '#pop')
-        ]
-    }
+# -*- coding: utf-8 -*-
+"""
+    pygments.lexers.textfmts
+    ~~~~~~~~~~~~~~~~~~~~~~~~
+
+    Lexers for various text formats.
+
+    :copyright: Copyright 2006-2021 by the Pygments team, see AUTHORS.
+    :license: BSD, see LICENSE for details.
+"""
+
+import re
+
+from pygments.lexers import guess_lexer, get_lexer_by_name
+from pygments.lexer import RegexLexer, bygroups, default, include
+from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
+    Number, Generic, Literal, Punctuation
+from pygments.util import ClassNotFound
+
+__all__ = ['IrcLogsLexer', 'TodotxtLexer', 'HttpLexer', 'GettextLexer',
+           'NotmuchLexer', 'KernelLogLexer']
+
+
+class IrcLogsLexer(RegexLexer):
+    """
+    Lexer for IRC logs in *irssi*, *xchat* or *weechat* style.
+    """
+
+    name = 'IRC logs'
+    aliases = ['irc']
+    filenames = ['*.weechatlog']
+    mimetypes = ['text/x-irclog']
+
+    flags = re.VERBOSE | re.MULTILINE
+    timestamp = r"""
+        (
+          # irssi / xchat and others
+          (?: \[|\()?                  # Opening bracket or paren for the timestamp
+            (?:                        # Timestamp
+                (?: (?:\d{1,4} [-/])*  # Date as - or /-separated groups of digits
+                    (?:\d{1,4})
+                 [T ])?                # Date/time separator: T or space
+                (?: \d?\d [:.])*       # Time as :/.-separated groups of 1 or 2 digits
+                    (?: \d?\d)
+            )
+          (?: \]|\))?\s+               # Closing bracket or paren for the timestamp
+        |
+          # weechat
+          \d{4}\s\w{3}\s\d{2}\s        # Date
+          \d{2}:\d{2}:\d{2}\s+         # Time + Whitespace
+        |
+          # xchat
+          \w{3}\s\d{2}\s               # Date
+          \d{2}:\d{2}:\d{2}\s+         # Time + Whitespace
+        )?
+    """
+    tokens = {
+        'root': [
+            # log start/end
+            (r'^\*\*\*\*(.*)\*\*\*\*$', Comment),
+            # hack
+            ("^" + timestamp + r'(\s*<[^>]*>\s*)$', bygroups(Comment.Preproc, Name.Tag)),
+            # normal msgs
+            ("^" + timestamp + r"""
+                (\s*<.*?>\s*)          # Nick """,
+             bygroups(Comment.Preproc, Name.Tag), 'msg'),
+            # /me msgs
+            ("^" + timestamp + r"""
+                (\s*[*]\s+)            # Star
+                (\S+\s+.*?\n)          # Nick + rest of message """,
+             bygroups(Comment.Preproc, Keyword, Generic.Inserted)),
+            # join/part msgs
+            ("^" + timestamp + r"""
+                (\s*(?:\*{3}|<?-[!@=P]?->?)\s*)  # Star(s) or symbols
+                (\S+\s+)                     # Nick + Space
+                (.*?\n)                         # Rest of message """,
+             bygroups(Comment.Preproc, Keyword, String, Comment)),
+            (r"^.*?\n", Text),
+        ],
+        'msg': [
+            (r"\S+:(?!//)", Name.Attribute),  # Prefix
+            (r".*\n", Text, '#pop'),
+        ],
+    }
+
+
+class GettextLexer(RegexLexer):
+    """
+    Lexer for Gettext catalog files.
+
+    .. versionadded:: 0.9
+    """
+    name = 'Gettext Catalog'
+    aliases = ['pot', 'po']
+    filenames = ['*.pot', '*.po']
+    mimetypes = ['application/x-gettext', 'text/x-gettext', 'text/gettext']
+
+    tokens = {
+        'root': [
+            (r'^#,\s.*?$', Keyword.Type),
+            (r'^#:\s.*?$', Keyword.Declaration),
+            # (r'^#$', Comment),
+            (r'^(#|#\.\s|#\|\s|#~\s|#\s).*$', Comment.Single),
+            (r'^(")([A-Za-z-]+:)(.*")$',
+             bygroups(String, Name.Property, String)),
+            (r'^".*"$', String),
+            (r'^(msgid|msgid_plural|msgstr|msgctxt)(\s+)(".*")$',
+             bygroups(Name.Variable, Text, String)),
+            (r'^(msgstr\[)(\d)(\])(\s+)(".*")$',
+             bygroups(Name.Variable, Number.Integer, Name.Variable, Text, String)),
+        ]
+    }
+
+
+class HttpLexer(RegexLexer):
+    """
+    Lexer for HTTP sessions.
+
+    .. versionadded:: 1.5
+    """
+
+    name = 'HTTP'
+    aliases = ['http']
+
+    flags = re.DOTALL
+
+    def get_tokens_unprocessed(self, text, stack=('root',)):
+        """Reset the content-type state."""
+        self.content_type = None
+        return RegexLexer.get_tokens_unprocessed(self, text, stack)
+
+    def header_callback(self, match):
+        if match.group(1).lower() == 'content-type':
+            content_type = match.group(5).strip()
+            if ';' in content_type:
+                content_type = content_type[:content_type.find(';')].strip()
+            self.content_type = content_type
+        yield match.start(1), Name.Attribute, match.group(1)
+        yield match.start(2), Text, match.group(2)
+        yield match.start(3), Operator, match.group(3)
+        yield match.start(4), Text, match.group(4)
+        yield match.start(5), Literal, match.group(5)
+        yield match.start(6), Text, match.group(6)
+
+    def continuous_header_callback(self, match):
+        yield match.start(1), Text, match.group(1)
+        yield match.start(2), Literal, match.group(2)
+        yield match.start(3), Text, match.group(3)
+
+    def content_callback(self, match):
+        content_type = getattr(self, 'content_type', None)
+        content = match.group()
+        offset = match.start()
+        if content_type:
+            from pygments.lexers import get_lexer_for_mimetype
+            possible_lexer_mimetypes = [content_type]
+            if '+' in content_type:
+                # application/calendar+xml can be treated as application/xml
+                # if there's not a better match.
+                general_type = re.sub(r'^(.*)/.*\+(.*)$', r'\1/\2',
+                                      content_type)
+                possible_lexer_mimetypes.append(general_type)
+
+            for i in possible_lexer_mimetypes:
+                try:
+                    lexer = get_lexer_for_mimetype(i)
+                except ClassNotFound:
+                    pass
+                else:
+                    for idx, token, value in lexer.get_tokens_unprocessed(content):
+                        yield offset + idx, token, value
+                    return
+        yield offset, Text, content
+
+    tokens = {
+        'root': [
+            (r'(GET|POST|PUT|DELETE|HEAD|OPTIONS|TRACE|PATCH)( +)([^ ]+)( +)'
+             r'(HTTP)(/)(1\.[01]|2(?:\.0)?|3)(\r?\n|\Z)',
+             bygroups(Name.Function, Text, Name.Namespace, Text,
+                      Keyword.Reserved, Operator, Number, Text),
+             'headers'),
+            (r'(HTTP)(/)(1\.[01]|2(?:\.0)?|3)( +)(\d{3})(?:( +)([^\r\n]*))?(\r?\n|\Z)',
+             bygroups(Keyword.Reserved, Operator, Number, Text, Number, Text,
+                      Name.Exception, Text),
+             'headers'),
+        ],
+        'headers': [
+            (r'([^\s:]+)( *)(:)( *)([^\r\n]+)(\r?\n|\Z)', header_callback),
+            (r'([\t ]+)([^\r\n]+)(\r?\n|\Z)', continuous_header_callback),
+            (r'\r?\n', Text, 'content')
+        ],
+        'content': [
+            (r'.+', content_callback)
+        ]
+    }
+
+    def analyse_text(text):
+        return text.startswith(('GET /', 'POST /', 'PUT /', 'DELETE /', 'HEAD /',
+                                'OPTIONS /', 'TRACE /', 'PATCH /'))
+
+
+class TodotxtLexer(RegexLexer):
+    """
+    Lexer for `Todo.txt <http://todotxt.com/>`_ todo list format.
+
+    .. versionadded:: 2.0
+    """
+
+    name = 'Todotxt'
+    aliases = ['todotxt']
+    # *.todotxt is not a standard extension for Todo.txt files; including it
+    # makes testing easier, and also makes autodetecting file type easier.
+    filenames = ['todo.txt', '*.todotxt']
+    mimetypes = ['text/x-todo']
+
+    # Aliases mapping standard token types of Todo.txt format concepts
+    CompleteTaskText = Operator  # Chosen to de-emphasize complete tasks
+    IncompleteTaskText = Text    # Incomplete tasks should look like plain text
+
+    # Priority should have most emphasis to indicate importance of tasks
+    Priority = Generic.Heading
+    # Dates should have next most emphasis because time is important
+    Date = Generic.Subheading
+
+    # Project and context should have equal weight, and be in different colors
+    Project = Generic.Error
+    Context = String
+
+    # If tag functionality is added, it should have the same weight as Project
+    # and Context, and a different color. Generic.Traceback would work well.
+
+    # Regex patterns for building up rules; dates, priorities, projects, and
+    # contexts are all atomic
+    # TODO: Make date regex more ISO 8601 compliant
+    date_regex = r'\d{4,}-\d{2}-\d{2}'
+    priority_regex = r'\([A-Z]\)'
+    project_regex = r'\+\S+'
+    context_regex = r'@\S+'
+
+    # Compound regex expressions
+    complete_one_date_regex = r'(x )(' + date_regex + r')'
+    complete_two_date_regex = (complete_one_date_regex + r'( )(' +
+                               date_regex + r')')
+    priority_date_regex = r'(' + priority_regex + r')( )(' + date_regex + r')'
+
+    tokens = {
+        # Should parse starting at beginning of line; each line is a task
+        'root': [
+            # Complete task entry points: two total:
+            # 1. Complete task with two dates
+            (complete_two_date_regex, bygroups(CompleteTaskText, Date,
+                                               CompleteTaskText, Date),
+             'complete'),
+            # 2. Complete task with one date
+            (complete_one_date_regex, bygroups(CompleteTaskText, Date),
+             'complete'),
+
+            # Incomplete task entry points: six total:
+            # 1. Priority plus date
+            (priority_date_regex, bygroups(Priority, IncompleteTaskText, Date),
+             'incomplete'),
+            # 2. Priority only
+            (priority_regex, Priority, 'incomplete'),
+            # 3. Leading date
+            (date_regex, Date, 'incomplete'),
+            # 4. Leading context
+            (context_regex, Context, 'incomplete'),
+            # 5. Leading project
+            (project_regex, Project, 'incomplete'),
+            # 6. Non-whitespace catch-all
+            (r'\S+', IncompleteTaskText, 'incomplete'),
+        ],
+
+        # Parse a complete task
+        'complete': [
+            # Newline indicates end of task, should return to root
+            (r'\s*\n', CompleteTaskText, '#pop'),
+            # Tokenize contexts and projects
+            (context_regex, Context),
+            (project_regex, Project),
+            # Tokenize non-whitespace text
+            (r'\S+', CompleteTaskText),
+            # Tokenize whitespace not containing a newline
+            (r'\s+', CompleteTaskText),
+        ],
+
+        # Parse an incomplete task
+        'incomplete': [
+            # Newline indicates end of task, should return to root
+            (r'\s*\n', IncompleteTaskText, '#pop'),
+            # Tokenize contexts and projects
+            (context_regex, Context),
+            (project_regex, Project),
+            # Tokenize non-whitespace text
+            (r'\S+', IncompleteTaskText),
+            # Tokenize whitespace not containing a newline
+            (r'\s+', IncompleteTaskText),
+        ],
+    }
+
+
+class NotmuchLexer(RegexLexer):
+    """
+    For `Notmuch <https://notmuchmail.org/>`_ email text format.
+
+    .. versionadded:: 2.5
+
+    Additional options accepted:
+
+    `body_lexer`
+        If given, highlight the contents of the message body with the specified
+        lexer, else guess it according to the body content (default: ``None``).
+    """
+
+    name = 'Notmuch'
+    aliases = ['notmuch']
+
+    def _highlight_code(self, match):
+        code = match.group(1)
+
+        try:
+            if self.body_lexer:
+                lexer = get_lexer_by_name(self.body_lexer)
+            else:
+                lexer = guess_lexer(code.strip())
+        except ClassNotFound:
+            lexer = get_lexer_by_name('text')
+
+        yield from lexer.get_tokens_unprocessed(code)
+
+    tokens = {
+        'root': [
+            (r'\fmessage\{\s*', Keyword, ('message', 'message-attr')),
+        ],
+        'message-attr': [
+            (r'(\s*id:\s*)(\S+)', bygroups(Name.Attribute, String)),
+            (r'(\s*(?:depth|match|excluded):\s*)(\d+)',
+             bygroups(Name.Attribute, Number.Integer)),
+            (r'(\s*filename:\s*)(.+\n)',
+             bygroups(Name.Attribute, String)),
+            default('#pop'),
+        ],
+        'message': [
+            (r'\fmessage\}\n', Keyword, '#pop'),
+            (r'\fheader\{\n', Keyword, 'header'),
+            (r'\fbody\{\n', Keyword, 'body'),
+        ],
+        'header': [
+            (r'\fheader\}\n', Keyword, '#pop'),
+            (r'((?:Subject|From|To|Cc|Date):\s*)(.*\n)',
+             bygroups(Name.Attribute, String)),
+            (r'(.*)(\s*\(.*\))(\s*\(.*\)\n)',
+             bygroups(Generic.Strong, Literal, Name.Tag)),
+        ],
+        'body': [
+            (r'\fpart\{\n', Keyword, 'part'),
+            (r'\f(part|attachment)\{\s*', Keyword, ('part', 'part-attr')),
+            (r'\fbody\}\n', Keyword, '#pop'),
+        ],
+        'part-attr': [
+            (r'(ID:\s*)(\d+)', bygroups(Name.Attribute, Number.Integer)),
+            (r'(,\s*)((?:Filename|Content-id):\s*)([^,]+)',
+             bygroups(Punctuation, Name.Attribute, String)),
+            (r'(,\s*)(Content-type:\s*)(.+\n)',
+             bygroups(Punctuation, Name.Attribute, String)),
+            default('#pop'),
+        ],
+        'part': [
+            (r'\f(?:part|attachment)\}\n', Keyword, '#pop'),
+            (r'\f(?:part|attachment)\{\s*', Keyword, ('#push', 'part-attr')),
+            (r'^Non-text part: .*\n', Comment),
+            (r'(?s)(.*?(?=\f(?:part|attachment)\}\n))', _highlight_code),
+        ],
+    }
+
+    def analyse_text(text):
+        return 1.0 if text.startswith('\fmessage{') else 0.0
+
+    def __init__(self, **options):
+        self.body_lexer = options.get('body_lexer', None)
+        RegexLexer.__init__(self, **options)
+
+
+class KernelLogLexer(RegexLexer):
+    """
+    For Linux Kernel log ("dmesg") output.
+
+    .. versionadded:: 2.6
+    """
+    name = 'Kernel log'
+    aliases = ['kmsg', 'dmesg']
+    filenames = ['*.kmsg', '*.dmesg']
+
+    tokens = {
+        'root': [
+            (r'^[^:]+:debug : (?=\[)', Text, 'debug'),
+            (r'^[^:]+:info  : (?=\[)', Text, 'info'),
+            (r'^[^:]+:warn  : (?=\[)', Text, 'warn'),
+            (r'^[^:]+:notice: (?=\[)', Text, 'warn'),
+            (r'^[^:]+:err   : (?=\[)', Text, 'error'),
+            (r'^[^:]+:crit  : (?=\[)', Text, 'error'),
+            (r'^(?=\[)', Text, 'unknown'),
+        ],
+        'unknown': [
+            (r'^(?=.+(warning|notice|audit|deprecated))', Text, 'warn'),
+            (r'^(?=.+(error|critical|fail|Bug))', Text, 'error'),
+            default('info'),
+        ],
+        'base': [
+            (r'\[[0-9. ]+\] ', Number),
+            (r'(?<=\] ).+?:', Keyword),
+            (r'\n', Text, '#pop'),
+        ],
+        'debug': [
+            include('base'),
+            (r'.+\n', Comment, '#pop')
+        ],
+        'info': [
+            include('base'),
+            (r'.+\n', Text, '#pop')
+        ],
+        'warn': [
+            include('base'),
+            (r'.+\n', Generic.Strong, '#pop')
+        ],
+        'error': [
+            include('base'),
+            (r'.+\n', Generic.Error, '#pop')
+        ]
+    }

eric ide

mercurial