ThirdParty/Pygments/pygments/lexers/parsers.py

changeset 4172
4f20dba37ab6
parent 3145
a9de05d4a22f
child 4697
c2e9bf425554
--- a/ThirdParty/Pygments/pygments/lexers/parsers.py	Wed Mar 11 18:25:37 2015 +0100
+++ b/ThirdParty/Pygments/pygments/lexers/parsers.py	Wed Mar 11 18:32:27 2015 +0100
@@ -5,33 +5,33 @@
 
     Lexers for parser generators.
 
-    :copyright: Copyright 2006-2013 by the Pygments team, see AUTHORS.
+    :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS.
     :license: BSD, see LICENSE for details.
 """
 
-from __future__ import unicode_literals
-
 import re
 
 from pygments.lexer import RegexLexer, DelegatingLexer, \
     include, bygroups, using
 from pygments.token import Punctuation, Other, Text, Comment, Operator, \
-     Keyword, Name, String, Number, Whitespace
-from pygments.lexers.compiled import JavaLexer, CLexer, CppLexer, \
-    ObjectiveCLexer, DLexer
+    Keyword, Name, String, Number, Whitespace
+from pygments.lexers.jvm import JavaLexer
+from pygments.lexers.c_cpp import CLexer, CppLexer
+from pygments.lexers.objective import ObjectiveCLexer
+from pygments.lexers.d import DLexer
 from pygments.lexers.dotnet import CSharpLexer
-from pygments.lexers.agile import RubyLexer, PythonLexer, PerlLexer
-from pygments.lexers.web import ActionScriptLexer
-
+from pygments.lexers.ruby import RubyLexer
+from pygments.lexers.python import PythonLexer
+from pygments.lexers.perl import PerlLexer
 
 __all__ = ['RagelLexer', 'RagelEmbeddedLexer', 'RagelCLexer', 'RagelDLexer',
            'RagelCppLexer', 'RagelObjectiveCLexer', 'RagelRubyLexer',
            'RagelJavaLexer', 'AntlrLexer', 'AntlrPythonLexer',
            'AntlrPerlLexer', 'AntlrRubyLexer', 'AntlrCppLexer',
-           #'AntlrCLexer',
+           # 'AntlrCLexer',
            'AntlrCSharpLexer', 'AntlrObjectiveCLexer',
-           'AntlrJavaLexer', "AntlrActionScriptLexer",
-           'TreetopLexer']
+           'AntlrJavaLexer', 'AntlrActionScriptLexer',
+           'TreetopLexer', 'EbnfLexer']
 
 
 class RagelLexer(RegexLexer):
@@ -40,7 +40,7 @@
     fragments of Ragel.  For ``.rl`` files, use RagelEmbeddedLexer instead
     (or one of the language-specific subclasses).
 
-    *New in Pygments 1.1.*
+    .. versionadded:: 1.1
     """
 
     name = 'Ragel'
@@ -65,29 +65,29 @@
             (r'[+-]?[0-9]+', Number.Integer),
         ],
         'literals': [
-            (r'"(\\\\|\\"|[^"])*"', String), # double quote string
-            (r"'(\\\\|\\'|[^'])*'", String), # single quote string
-            (r'\[(\\\\|\\\]|[^\]])*\]', String), # square bracket literals
-            (r'/(?!\*)(\\\\|\\/|[^/])*/', String.Regex), # regular expressions
+            (r'"(\\\\|\\"|[^"])*"', String),              # double quote string
+            (r"'(\\\\|\\'|[^'])*'", String),              # single quote string
+            (r'\[(\\\\|\\\]|[^\]])*\]', String),          # square bracket literals
+            (r'/(?!\*)(\\\\|\\/|[^/])*/', String.Regex),  # regular expressions
         ],
         'identifiers': [
-            (r'[a-zA-Z_][a-zA-Z_0-9]*', Name.Variable),
+            (r'[a-zA-Z_]\w*', Name.Variable),
         ],
         'operators': [
-            (r',', Operator), # Join
-            (r'\||&|--?', Operator), # Union, Intersection and Subtraction
-            (r'\.|<:|:>>?', Operator), # Concatention
-            (r':', Operator), # Label
-            (r'->', Operator), # Epsilon Transition
-            (r'(>|\$|%|<|@|<>)(/|eof\b)', Operator), # EOF Actions
-            (r'(>|\$|%|<|@|<>)(!|err\b)', Operator), # Global Error Actions
-            (r'(>|\$|%|<|@|<>)(\^|lerr\b)', Operator), # Local Error Actions
-            (r'(>|\$|%|<|@|<>)(~|to\b)', Operator), # To-State Actions
-            (r'(>|\$|%|<|@|<>)(\*|from\b)', Operator), # From-State Actions
-            (r'>|@|\$|%', Operator), # Transition Actions and Priorities
-            (r'\*|\?|\+|{[0-9]*,[0-9]*}', Operator), # Repetition
-            (r'!|\^', Operator), # Negation
-            (r'\(|\)', Operator), # Grouping
+            (r',', Operator),                           # Join
+            (r'\||&|--?', Operator),                    # Union, Intersection and Subtraction
+            (r'\.|<:|:>>?', Operator),                  # Concatention
+            (r':', Operator),                           # Label
+            (r'->', Operator),                          # Epsilon Transition
+            (r'(>|\$|%|<|@|<>)(/|eof\b)', Operator),    # EOF Actions
+            (r'(>|\$|%|<|@|<>)(!|err\b)', Operator),    # Global Error Actions
+            (r'(>|\$|%|<|@|<>)(\^|lerr\b)', Operator),  # Local Error Actions
+            (r'(>|\$|%|<|@|<>)(~|to\b)', Operator),     # To-State Actions
+            (r'(>|\$|%|<|@|<>)(\*|from\b)', Operator),  # From-State Actions
+            (r'>|@|\$|%', Operator),                    # Transition Actions and Priorities
+            (r'\*|\?|\+|\{[0-9]*,[0-9]*\}', Operator),  # Repetition
+            (r'!|\^', Operator),                        # Negation
+            (r'\(|\)', Operator),                       # Grouping
         ],
         'root': [
             include('literals'),
@@ -97,21 +97,21 @@
             include('numbers'),
             include('identifiers'),
             include('operators'),
-            (r'{', Punctuation, 'host'),
+            (r'\{', Punctuation, 'host'),
             (r'=', Operator),
             (r';', Punctuation),
         ],
         'host': [
-            (r'(' + r'|'.join(( # keep host code in largest possible chunks
-                r'[^{}\'"/#]+', # exclude unsafe characters
-                r'[^\\][\\][{}]', # allow escaped { or }
+            (r'(' + r'|'.join((  # keep host code in largest possible chunks
+                r'[^{}\'"/#]+',  # exclude unsafe characters
+                r'[^\\]\\[{}]',  # allow escaped { or }
 
                 # strings and comments may safely contain unsafe characters
-                r'"(\\\\|\\"|[^"])*"', # double quote string
-                r"'(\\\\|\\'|[^'])*'", # single quote string
-                r'//.*$\n?', # single line comment
-                r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment
-                r'\#.*$\n?', # ruby comment
+                r'"(\\\\|\\"|[^"])*"',  # double quote string
+                r"'(\\\\|\\'|[^'])*'",  # single quote string
+                r'//.*$\n?',            # single line comment
+                r'/\*(.|\n)*?\*/',      # multi-line javadoc-style comment
+                r'\#.*$\n?',            # ruby comment
 
                 # regular expression: There's no reason for it to start
                 # with a * and this stops confusion with comments.
@@ -121,8 +121,8 @@
                 r'/',
             )) + r')+', Other),
 
-            (r'{', Punctuation, '#push'),
-            (r'}', Punctuation, '#pop'),
+            (r'\{', Punctuation, '#push'),
+            (r'\}', Punctuation, '#pop'),
         ],
     }
 
@@ -134,7 +134,7 @@
     This will only highlight Ragel statements. If you want host language
     highlighting then call the language-specific Ragel lexer.
 
-    *New in Pygments 1.1.*
+    .. versionadded:: 1.1
     """
 
     name = 'Embedded Ragel'
@@ -143,17 +143,17 @@
 
     tokens = {
         'root': [
-            (r'(' + r'|'.join(( # keep host code in largest possible chunks
-                r'[^%\'"/#]+', # exclude unsafe characters
-                r'%(?=[^%]|$)', # a single % sign is okay, just not 2 of them
+            (r'(' + r'|'.join((   # keep host code in largest possible chunks
+                r'[^%\'"/#]+',    # exclude unsafe characters
+                r'%(?=[^%]|$)',   # a single % sign is okay, just not 2 of them
 
                 # strings and comments may safely contain unsafe characters
-                r'"(\\\\|\\"|[^"])*"', # double quote string
-                r"'(\\\\|\\'|[^'])*'", # single quote string
-                r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment
-                r'//.*$\n?', # single line comment
-                r'\#.*$\n?', # ruby/ragel comment
-                r'/(?!\*)(\\\\|\\/|[^/])*/', # regular expression
+                r'"(\\\\|\\"|[^"])*"',  # double quote string
+                r"'(\\\\|\\'|[^'])*'",  # single quote string
+                r'/\*(.|\n)*?\*/',      # multi-line javadoc-style comment
+                r'//.*$\n?',  # single line comment
+                r'\#.*$\n?',  # ruby/ragel comment
+                r'/(?!\*)(\\\\|\\/|[^/])*/',  # regular expression
 
                 # / is safe now that we've handled regex and javadoc comments
                 r'/',
@@ -167,15 +167,15 @@
                                                      Punctuation, Text)),
 
             # Multi Line FSM.
-            (r'(%%%%|%%){', Punctuation, 'multi-line-fsm'),
+            (r'(%%%%|%%)\{', Punctuation, 'multi-line-fsm'),
         ],
         'multi-line-fsm': [
-            (r'(' + r'|'.join(( # keep ragel code in largest possible chunks.
+            (r'(' + r'|'.join((  # keep ragel code in largest possible chunks.
                 r'(' + r'|'.join((
-                    r'[^}\'"\[/#]', # exclude unsafe characters
-                    r'}(?=[^%]|$)', # } is okay as long as it's not followed by %
-                    r'}%(?=[^%]|$)', # ...well, one %'s okay, just not two...
-                    r'[^\\][\\][{}]', # ...and } is okay if it's escaped
+                    r'[^}\'"\[/#]',   # exclude unsafe characters
+                    r'\}(?=[^%]|$)',   # } is okay as long as it's not followed by %
+                    r'\}%(?=[^%]|$)',  # ...well, one %'s okay, just not two...
+                    r'[^\\]\\[{}]',   # ...and } is okay if it's escaped
 
                     # allow / if it's preceded with one of these symbols
                     # (ragel EOF actions)
@@ -186,35 +186,35 @@
                     r'/(?!\*)(\\\\|\\/|[^/])*/\*',
 
                     # allow / as long as it's not followed by another / or by a *
-                    r'/(?=[^/\*]|$)',
+                    r'/(?=[^/*]|$)',
 
                     # We want to match as many of these as we can in one block.
                     # Not sure if we need the + sign here,
                     # does it help performance?
-                    )) + r')+',
+                )) + r')+',
 
                 # strings and comments may safely contain unsafe characters
-                r'"(\\\\|\\"|[^"])*"', # double quote string
-                r"'(\\\\|\\'|[^'])*'", # single quote string
-                r"\[(\\\\|\\\]|[^\]])*\]", # square bracket literal
-                r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment
-                r'//.*$\n?', # single line comment
-                r'\#.*$\n?', # ruby/ragel comment
+                r'"(\\\\|\\"|[^"])*"',      # double quote string
+                r"'(\\\\|\\'|[^'])*'",      # single quote string
+                r"\[(\\\\|\\\]|[^\]])*\]",  # square bracket literal
+                r'/\*(.|\n)*?\*/',          # multi-line javadoc-style comment
+                r'//.*$\n?',                # single line comment
+                r'\#.*$\n?',                # ruby/ragel comment
             )) + r')+', using(RagelLexer)),
 
-            (r'}%%', Punctuation, '#pop'),
+            (r'\}%%', Punctuation, '#pop'),
         ]
     }
 
     def analyse_text(text):
-        return '@LANG: indep' in text or 0.1
+        return '@LANG: indep' in text
 
 
 class RagelRubyLexer(DelegatingLexer):
     """
     A lexer for `Ragel`_ in a Ruby host file.
 
-    *New in Pygments 1.1.*
+    .. versionadded:: 1.1
     """
 
     name = 'Ragel in Ruby Host'
@@ -223,7 +223,7 @@
 
     def __init__(self, **options):
         super(RagelRubyLexer, self).__init__(RubyLexer, RagelEmbeddedLexer,
-                                              **options)
+                                             **options)
 
     def analyse_text(text):
         return '@LANG: ruby' in text
@@ -233,7 +233,7 @@
     """
     A lexer for `Ragel`_ in a C host file.
 
-    *New in Pygments 1.1.*
+    .. versionadded:: 1.1
     """
 
     name = 'Ragel in C Host'
@@ -252,7 +252,7 @@
     """
     A lexer for `Ragel`_ in a D host file.
 
-    *New in Pygments 1.1.*
+    .. versionadded:: 1.1
     """
 
     name = 'Ragel in D Host'
@@ -270,7 +270,7 @@
     """
     A lexer for `Ragel`_ in a CPP host file.
 
-    *New in Pygments 1.1.*
+    .. versionadded:: 1.1
     """
 
     name = 'Ragel in CPP Host'
@@ -288,7 +288,7 @@
     """
     A lexer for `Ragel`_ in an Objective C host file.
 
-    *New in Pygments 1.1.*
+    .. versionadded:: 1.1
     """
 
     name = 'Ragel in Objective C Host'
@@ -308,7 +308,7 @@
     """
     A lexer for `Ragel`_ in a Java host file.
 
-    *New in Pygments 1.1.*
+    .. versionadded:: 1.1
     """
 
     name = 'Ragel in Java Host'
@@ -329,7 +329,7 @@
     Should not be called directly, instead
     use DelegatingLexer for your target language.
 
-    *New in Pygments 1.1.*
+    .. versionadded:: 1.1
 
     .. _ANTLR: http://www.antlr.org/
     """
@@ -338,9 +338,9 @@
     aliases = ['antlr']
     filenames = []
 
-    _id =          r'[A-Za-z][A-Za-z_0-9]*'
-    _TOKEN_REF =   r'[A-Z][A-Za-z_0-9]*'
-    _RULE_REF =    r'[a-z][A-Za-z_0-9]*'
+    _id = r'[A-Za-z]\w*'
+    _TOKEN_REF = r'[A-Z]\w*'
+    _RULE_REF = r'[a-z]\w*'
     _STRING_LITERAL = r'\'(?:\\\\|\\\'|[^\']*)\''
     _INT = r'[0-9]+'
 
@@ -364,17 +364,17 @@
             # tokensSpec
             (r'tokens\b', Keyword, 'tokens'),
             # attrScope
-            (r'(scope)(\s*)(' + _id + ')(\s*)({)',
+            (r'(scope)(\s*)(' + _id + ')(\s*)(\{)',
              bygroups(Keyword, Whitespace, Name.Variable, Whitespace,
                       Punctuation), 'action'),
             # exception
             (r'(catch|finally)\b', Keyword, 'exception'),
             # action
-            (r'(@' + _id + ')(\s*)(::)?(\s*)(' + _id + ')(\s*)({)',
+            (r'(@' + _id + ')(\s*)(::)?(\s*)(' + _id + ')(\s*)(\{)',
              bygroups(Name.Label, Whitespace, Punctuation, Whitespace,
                       Name.Label, Whitespace, Punctuation), 'action'),
             # rule
-            (r'((?:protected|private|public|fragment)\b)?(\s*)(' + _id + ')(!)?', \
+            (r'((?:protected|private|public|fragment)\b)?(\s*)(' + _id + ')(!)?',
              bygroups(Keyword, Whitespace, Name.Label, Punctuation),
              ('rule-alts', 'rule-prelims')),
         ],
@@ -397,18 +397,18 @@
             (r'(throws)(\s+)(' + _id + ')',
              bygroups(Keyword, Whitespace, Name.Label)),
             (r'(,)(\s*)(' + _id + ')',
-             bygroups(Punctuation, Whitespace, Name.Label)), # Additional throws
+             bygroups(Punctuation, Whitespace, Name.Label)),  # Additional throws
             # optionsSpec
             (r'options\b', Keyword, 'options'),
             # ruleScopeSpec - scope followed by target language code or name of action
             # TODO finish implementing other possibilities for scope
             # L173 ANTLRv3.g from ANTLR book
-            (r'(scope)(\s+)({)', bygroups(Keyword, Whitespace, Punctuation),
-            'action'),
+            (r'(scope)(\s+)(\{)', bygroups(Keyword, Whitespace, Punctuation),
+             'action'),
             (r'(scope)(\s+)(' + _id + ')(\s*)(;)',
              bygroups(Keyword, Whitespace, Name.Label, Whitespace, Punctuation)),
             # ruleAction
-            (r'(@' + _id + ')(\s*)({)',
+            (r'(@' + _id + ')(\s*)(\{)',
              bygroups(Name.Label, Whitespace, Punctuation), 'action'),
             # finished prelims, go to rule alts!
             (r':', Punctuation, '#pop')
@@ -427,9 +427,9 @@
             (r'<<([^>]|>[^>])>>', String),
             # identifiers
             # Tokens start with capital letter.
-            (r'\$?[A-Z_][A-Za-z_0-9]*', Name.Constant),
+            (r'\$?[A-Z_]\w*', Name.Constant),
             # Rules start with small letter.
-            (r'\$?[a-z_][A-Za-z_0-9]*', Name.Variable),
+            (r'\$?[a-z_]\w*', Name.Variable),
             # operators
             (r'(\+|\||->|=>|=|\(|\)|\.\.|\.|\?|\*|\^|!|\#|~)', Operator),
             (r',', Punctuation),
@@ -440,32 +440,32 @@
         'tokens': [
             include('whitespace'),
             include('comments'),
-            (r'{', Punctuation),
+            (r'\{', Punctuation),
             (r'(' + _TOKEN_REF + r')(\s*)(=)?(\s*)(' + _STRING_LITERAL
              + ')?(\s*)(;)',
              bygroups(Name.Label, Whitespace, Punctuation, Whitespace,
                       String, Whitespace, Punctuation)),
-            (r'}', Punctuation, '#pop'),
+            (r'\}', Punctuation, '#pop'),
         ],
         'options': [
             include('whitespace'),
             include('comments'),
-            (r'{', Punctuation),
+            (r'\{', Punctuation),
             (r'(' + _id + r')(\s*)(=)(\s*)(' +
-             '|'.join((_id, _STRING_LITERAL, _INT, '\*'))+ ')(\s*)(;)',
+             '|'.join((_id, _STRING_LITERAL, _INT, '\*')) + ')(\s*)(;)',
              bygroups(Name.Variable, Whitespace, Punctuation, Whitespace,
                       Text, Whitespace, Punctuation)),
-            (r'}', Punctuation, '#pop'),
+            (r'\}', Punctuation, '#pop'),
         ],
         'action': [
-            (r'(' + r'|'.join(( # keep host code in largest possible chunks
-                r'[^\${}\'"/\\]+', # exclude unsafe characters
+            (r'(' + r'|'.join((    # keep host code in largest possible chunks
+                r'[^${}\'"/\\]+',  # exclude unsafe characters
 
                 # strings and comments may safely contain unsafe characters
-                r'"(\\\\|\\"|[^"])*"', # double quote string
-                r"'(\\\\|\\'|[^'])*'", # single quote string
-                r'//.*$\n?', # single line comment
-                r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment
+                r'"(\\\\|\\"|[^"])*"',  # double quote string
+                r"'(\\\\|\\'|[^'])*'",  # single quote string
+                r'//.*$\n?',            # single line comment
+                r'/\*(.|\n)*?\*/',      # multi-line javadoc-style comment
 
                 # regular expression: There's no reason for it to start
                 # with a * and this stops confusion with comments.
@@ -481,18 +481,18 @@
             (r'(\\)(%)', bygroups(Punctuation, Other)),
             (r'(\$[a-zA-Z]+)(\.?)(text|value)?',
              bygroups(Name.Variable, Punctuation, Name.Property)),
-            (r'{', Punctuation, '#push'),
-            (r'}', Punctuation, '#pop'),
+            (r'\{', Punctuation, '#push'),
+            (r'\}', Punctuation, '#pop'),
         ],
         'nested-arg-action': [
-            (r'(' + r'|'.join(( # keep host code in largest possible chunks.
-                r'[^\$\[\]\'"/]+', # exclude unsafe characters
+            (r'(' + r'|'.join((    # keep host code in largest possible chunks.
+                r'[^$\[\]\'"/]+',  # exclude unsafe characters
 
                 # strings and comments may safely contain unsafe characters
-                r'"(\\\\|\\"|[^"])*"', # double quote string
-                r"'(\\\\|\\'|[^'])*'", # single quote string
-                r'//.*$\n?', # single line comment
-                r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment
+                r'"(\\\\|\\"|[^"])*"',  # double quote string
+                r"'(\\\\|\\'|[^'])*'",  # single quote string
+                r'//.*$\n?',            # single line comment
+                r'/\*(.|\n)*?\*/',      # multi-line javadoc-style comment
 
                 # regular expression: There's no reason for it to start
                 # with a * and this stops confusion with comments.
@@ -522,11 +522,11 @@
 # so just assume they're C++.  No idea how to make Objective C work in the
 # future.
 
-#class AntlrCLexer(DelegatingLexer):
+# class AntlrCLexer(DelegatingLexer):
 #    """
 #    ANTLR with C Target
 #
-#    *New in Pygments 1.1*
+#    .. versionadded:: 1.1
 #    """
 #
 #    name = 'ANTLR With C Target'
@@ -539,11 +539,12 @@
 #    def analyse_text(text):
 #        return re.match(r'^\s*language\s*=\s*C\s*;', text)
 
+
 class AntlrCppLexer(DelegatingLexer):
     """
     `ANTLR`_ with CPP Target
 
-    *New in Pygments 1.1.*
+    .. versionadded:: 1.1
     """
 
     name = 'ANTLR With CPP Target'
@@ -555,14 +556,14 @@
 
     def analyse_text(text):
         return AntlrLexer.analyse_text(text) and \
-               re.search(r'^\s*language\s*=\s*C\s*;', text, re.M)
+            re.search(r'^\s*language\s*=\s*C\s*;', text, re.M)
 
 
 class AntlrObjectiveCLexer(DelegatingLexer):
     """
     `ANTLR`_ with Objective-C Target
 
-    *New in Pygments 1.1.*
+    .. versionadded:: 1.1
     """
 
     name = 'ANTLR With ObjectiveC Target'
@@ -575,14 +576,14 @@
 
     def analyse_text(text):
         return AntlrLexer.analyse_text(text) and \
-               re.search(r'^\s*language\s*=\s*ObjC\s*;', text)
+            re.search(r'^\s*language\s*=\s*ObjC\s*;', text)
 
 
 class AntlrCSharpLexer(DelegatingLexer):
     """
     `ANTLR`_ with C# Target
 
-    *New in Pygments 1.1.*
+    .. versionadded:: 1.1
     """
 
     name = 'ANTLR With C# Target'
@@ -595,14 +596,14 @@
 
     def analyse_text(text):
         return AntlrLexer.analyse_text(text) and \
-               re.search(r'^\s*language\s*=\s*CSharp2\s*;', text, re.M)
+            re.search(r'^\s*language\s*=\s*CSharp2\s*;', text, re.M)
 
 
 class AntlrPythonLexer(DelegatingLexer):
     """
     `ANTLR`_ with Python Target
 
-    *New in Pygments 1.1.*
+    .. versionadded:: 1.1
     """
 
     name = 'ANTLR With Python Target'
@@ -615,14 +616,14 @@
 
     def analyse_text(text):
         return AntlrLexer.analyse_text(text) and \
-               re.search(r'^\s*language\s*=\s*Python\s*;', text, re.M)
+            re.search(r'^\s*language\s*=\s*Python\s*;', text, re.M)
 
 
 class AntlrJavaLexer(DelegatingLexer):
     """
     `ANTLR`_ with Java Target
 
-    *New in Pygments 1.1*
+    .. versionadded:: 1.
     """
 
     name = 'ANTLR With Java Target'
@@ -642,7 +643,7 @@
     """
     `ANTLR`_ with Ruby Target
 
-    *New in Pygments 1.1.*
+    .. versionadded:: 1.1
     """
 
     name = 'ANTLR With Ruby Target'
@@ -655,14 +656,14 @@
 
     def analyse_text(text):
         return AntlrLexer.analyse_text(text) and \
-               re.search(r'^\s*language\s*=\s*Ruby\s*;', text, re.M)
+            re.search(r'^\s*language\s*=\s*Ruby\s*;', text, re.M)
 
 
 class AntlrPerlLexer(DelegatingLexer):
     """
     `ANTLR`_ with Perl Target
 
-    *New in Pygments 1.1.*
+    .. versionadded:: 1.1
     """
 
     name = 'ANTLR With Perl Target'
@@ -675,14 +676,14 @@
 
     def analyse_text(text):
         return AntlrLexer.analyse_text(text) and \
-               re.search(r'^\s*language\s*=\s*Perl5\s*;', text, re.M)
+            re.search(r'^\s*language\s*=\s*Perl5\s*;', text, re.M)
 
 
 class AntlrActionScriptLexer(DelegatingLexer):
     """
     `ANTLR`_ with ActionScript Target
 
-    *New in Pygments 1.1.*
+    .. versionadded:: 1.1
     """
 
     name = 'ANTLR With ActionScript Target'
@@ -690,19 +691,21 @@
     filenames = ['*.G', '*.g']
 
     def __init__(self, **options):
+        from pygments.lexers.actionscript import ActionScriptLexer
         super(AntlrActionScriptLexer, self).__init__(ActionScriptLexer,
                                                      AntlrLexer, **options)
 
     def analyse_text(text):
         return AntlrLexer.analyse_text(text) and \
-               re.search(r'^\s*language\s*=\s*ActionScript\s*;', text, re.M)
+            re.search(r'^\s*language\s*=\s*ActionScript\s*;', text, re.M)
+
 
 class TreetopBaseLexer(RegexLexer):
     """
     A base lexer for `Treetop <http://treetop.rubyforge.org/>`_ grammars.
     Not for direct use; use TreetopLexer instead.
 
-    *New in Pygments 1.6.*
+    .. versionadded:: 1.6
     """
 
     tokens = {
@@ -717,43 +720,43 @@
             include('end'),
             (r'module\b', Keyword, '#push'),
             (r'grammar\b', Keyword, 'grammar'),
-            (r'[A-Z][A-Za-z_0-9]*(?:::[A-Z][A-Za-z_0-9]*)*', Name.Namespace),
+            (r'[A-Z]\w*(?:::[A-Z]\w*)*', Name.Namespace),
         ],
         'grammar': [
             include('space'),
             include('end'),
             (r'rule\b', Keyword, 'rule'),
             (r'include\b', Keyword, 'include'),
-            (r'[A-Z][A-Za-z_0-9]*', Name),
+            (r'[A-Z]\w*', Name),
         ],
         'include': [
             include('space'),
-            (r'[A-Z][A-Za-z_0-9]*(?:::[A-Z][A-Za-z_0-9]*)*', Name.Class, '#pop'),
+            (r'[A-Z]\w*(?:::[A-Z]\w*)*', Name.Class, '#pop'),
         ],
         'rule': [
             include('space'),
             include('end'),
             (r'"(\\\\|\\"|[^"])*"', String.Double),
             (r"'(\\\\|\\'|[^'])*'", String.Single),
-            (r'([A-Za-z_][A-Za-z_0-9]*)(:)', bygroups(Name.Label, Punctuation)),
-            (r'[A-Za-z_][A-Za-z_0-9]*', Name),
+            (r'([A-Za-z_]\w*)(:)', bygroups(Name.Label, Punctuation)),
+            (r'[A-Za-z_]\w*', Name),
             (r'[()]', Punctuation),
             (r'[?+*/&!~]', Operator),
             (r'\[(?:\\.|\[:\^?[a-z]+:\]|[^\\\]])+\]', String.Regex),
             (r'([0-9]*)(\.\.)([0-9]*)',
              bygroups(Number.Integer, Operator, Number.Integer)),
             (r'(<)([^>]+)(>)', bygroups(Punctuation, Name.Class, Punctuation)),
-            (r'{', Punctuation, 'inline_module'),
+            (r'\{', Punctuation, 'inline_module'),
             (r'\.', String.Regex),
         ],
         'inline_module': [
-            (r'{', Other, 'ruby'),
-            (r'}', Punctuation, '#pop'),
+            (r'\{', Other, 'ruby'),
+            (r'\}', Punctuation, '#pop'),
             (r'[^{}]+', Other),
         ],
         'ruby': [
-            (r'{', Other, '#push'),
-            (r'}', Other, '#pop'),
+            (r'\{', Other, '#push'),
+            (r'\}', Other, '#pop'),
             (r'[^{}]+', Other),
         ],
         'space': [
@@ -765,11 +768,12 @@
         ],
     }
 
+
 class TreetopLexer(DelegatingLexer):
     """
     A lexer for `Treetop <http://treetop.rubyforge.org/>`_ grammars.
 
-    *New in Pygments 1.6.*
+    .. versionadded:: 1.6
     """
 
     name = 'Treetop'
@@ -778,3 +782,54 @@
 
     def __init__(self, **options):
         super(TreetopLexer, self).__init__(RubyLexer, TreetopBaseLexer, **options)
+
+
+class EbnfLexer(RegexLexer):
+    """
+    Lexer for `ISO/IEC 14977 EBNF
+    <http://en.wikipedia.org/wiki/Extended_Backus%E2%80%93Naur_Form>`_
+    grammars.
+
+    .. versionadded:: 2.0
+    """
+
+    name = 'EBNF'
+    aliases = ['ebnf']
+    filenames = ['*.ebnf']
+    mimetypes = ['text/x-ebnf']
+
+    tokens = {
+        'root': [
+            include('whitespace'),
+            include('comment_start'),
+            include('identifier'),
+            (r'=', Operator, 'production'),
+        ],
+        'production': [
+            include('whitespace'),
+            include('comment_start'),
+            include('identifier'),
+            (r'"[^"]*"', String.Double),
+            (r"'[^']*'", String.Single),
+            (r'(\?[^?]*\?)', Name.Entity),
+            (r'[\[\]{}(),|]', Punctuation),
+            (r'-', Operator),
+            (r';', Punctuation, '#pop'),
+            (r'\.', Punctuation, '#pop'),
+        ],
+        'whitespace': [
+            (r'\s+', Text),
+        ],
+        'comment_start': [
+            (r'\(\*', Comment.Multiline, 'comment'),
+        ],
+        'comment': [
+            (r'[^*)]', Comment.Multiline),
+            include('comment_start'),
+            (r'\*\)', Comment.Multiline, '#pop'),
+            (r'[*)]', Comment.Multiline),
+        ],
+        'identifier': [
+            (r'([a-zA-Z][\w \-]*)', Keyword),
+        ],
+    }

eric ide

mercurial