--- a/ThirdParty/Pygments/pygments/lexers/parsers.py Wed Mar 11 18:25:37 2015 +0100 +++ b/ThirdParty/Pygments/pygments/lexers/parsers.py Wed Mar 11 18:32:27 2015 +0100 @@ -5,33 +5,33 @@ Lexers for parser generators. - :copyright: Copyright 2006-2013 by the Pygments team, see AUTHORS. + :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS. :license: BSD, see LICENSE for details. """ -from __future__ import unicode_literals - import re from pygments.lexer import RegexLexer, DelegatingLexer, \ include, bygroups, using from pygments.token import Punctuation, Other, Text, Comment, Operator, \ - Keyword, Name, String, Number, Whitespace -from pygments.lexers.compiled import JavaLexer, CLexer, CppLexer, \ - ObjectiveCLexer, DLexer + Keyword, Name, String, Number, Whitespace +from pygments.lexers.jvm import JavaLexer +from pygments.lexers.c_cpp import CLexer, CppLexer +from pygments.lexers.objective import ObjectiveCLexer +from pygments.lexers.d import DLexer from pygments.lexers.dotnet import CSharpLexer -from pygments.lexers.agile import RubyLexer, PythonLexer, PerlLexer -from pygments.lexers.web import ActionScriptLexer - +from pygments.lexers.ruby import RubyLexer +from pygments.lexers.python import PythonLexer +from pygments.lexers.perl import PerlLexer __all__ = ['RagelLexer', 'RagelEmbeddedLexer', 'RagelCLexer', 'RagelDLexer', 'RagelCppLexer', 'RagelObjectiveCLexer', 'RagelRubyLexer', 'RagelJavaLexer', 'AntlrLexer', 'AntlrPythonLexer', 'AntlrPerlLexer', 'AntlrRubyLexer', 'AntlrCppLexer', - #'AntlrCLexer', + # 'AntlrCLexer', 'AntlrCSharpLexer', 'AntlrObjectiveCLexer', - 'AntlrJavaLexer', "AntlrActionScriptLexer", - 'TreetopLexer'] + 'AntlrJavaLexer', 'AntlrActionScriptLexer', + 'TreetopLexer', 'EbnfLexer'] class RagelLexer(RegexLexer): @@ -40,7 +40,7 @@ fragments of Ragel. For ``.rl`` files, use RagelEmbeddedLexer instead (or one of the language-specific subclasses). - *New in Pygments 1.1.* + .. versionadded:: 1.1 """ name = 'Ragel' @@ -65,29 +65,29 @@ (r'[+-]?[0-9]+', Number.Integer), ], 'literals': [ - (r'"(\\\\|\\"|[^"])*"', String), # double quote string - (r"'(\\\\|\\'|[^'])*'", String), # single quote string - (r'\[(\\\\|\\\]|[^\]])*\]', String), # square bracket literals - (r'/(?!\*)(\\\\|\\/|[^/])*/', String.Regex), # regular expressions + (r'"(\\\\|\\"|[^"])*"', String), # double quote string + (r"'(\\\\|\\'|[^'])*'", String), # single quote string + (r'\[(\\\\|\\\]|[^\]])*\]', String), # square bracket literals + (r'/(?!\*)(\\\\|\\/|[^/])*/', String.Regex), # regular expressions ], 'identifiers': [ - (r'[a-zA-Z_][a-zA-Z_0-9]*', Name.Variable), + (r'[a-zA-Z_]\w*', Name.Variable), ], 'operators': [ - (r',', Operator), # Join - (r'\||&|--?', Operator), # Union, Intersection and Subtraction - (r'\.|<:|:>>?', Operator), # Concatention - (r':', Operator), # Label - (r'->', Operator), # Epsilon Transition - (r'(>|\$|%|<|@|<>)(/|eof\b)', Operator), # EOF Actions - (r'(>|\$|%|<|@|<>)(!|err\b)', Operator), # Global Error Actions - (r'(>|\$|%|<|@|<>)(\^|lerr\b)', Operator), # Local Error Actions - (r'(>|\$|%|<|@|<>)(~|to\b)', Operator), # To-State Actions - (r'(>|\$|%|<|@|<>)(\*|from\b)', Operator), # From-State Actions - (r'>|@|\$|%', Operator), # Transition Actions and Priorities - (r'\*|\?|\+|{[0-9]*,[0-9]*}', Operator), # Repetition - (r'!|\^', Operator), # Negation - (r'\(|\)', Operator), # Grouping + (r',', Operator), # Join + (r'\||&|--?', Operator), # Union, Intersection and Subtraction + (r'\.|<:|:>>?', Operator), # Concatention + (r':', Operator), # Label + (r'->', Operator), # Epsilon Transition + (r'(>|\$|%|<|@|<>)(/|eof\b)', Operator), # EOF Actions + (r'(>|\$|%|<|@|<>)(!|err\b)', Operator), # Global Error Actions + (r'(>|\$|%|<|@|<>)(\^|lerr\b)', Operator), # Local Error Actions + (r'(>|\$|%|<|@|<>)(~|to\b)', Operator), # To-State Actions + (r'(>|\$|%|<|@|<>)(\*|from\b)', Operator), # From-State Actions + (r'>|@|\$|%', Operator), # Transition Actions and Priorities + (r'\*|\?|\+|\{[0-9]*,[0-9]*\}', Operator), # Repetition + (r'!|\^', Operator), # Negation + (r'\(|\)', Operator), # Grouping ], 'root': [ include('literals'), @@ -97,21 +97,21 @@ include('numbers'), include('identifiers'), include('operators'), - (r'{', Punctuation, 'host'), + (r'\{', Punctuation, 'host'), (r'=', Operator), (r';', Punctuation), ], 'host': [ - (r'(' + r'|'.join(( # keep host code in largest possible chunks - r'[^{}\'"/#]+', # exclude unsafe characters - r'[^\\][\\][{}]', # allow escaped { or } + (r'(' + r'|'.join(( # keep host code in largest possible chunks + r'[^{}\'"/#]+', # exclude unsafe characters + r'[^\\]\\[{}]', # allow escaped { or } # strings and comments may safely contain unsafe characters - r'"(\\\\|\\"|[^"])*"', # double quote string - r"'(\\\\|\\'|[^'])*'", # single quote string - r'//.*$\n?', # single line comment - r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment - r'\#.*$\n?', # ruby comment + r'"(\\\\|\\"|[^"])*"', # double quote string + r"'(\\\\|\\'|[^'])*'", # single quote string + r'//.*$\n?', # single line comment + r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment + r'\#.*$\n?', # ruby comment # regular expression: There's no reason for it to start # with a * and this stops confusion with comments. @@ -121,8 +121,8 @@ r'/', )) + r')+', Other), - (r'{', Punctuation, '#push'), - (r'}', Punctuation, '#pop'), + (r'\{', Punctuation, '#push'), + (r'\}', Punctuation, '#pop'), ], } @@ -134,7 +134,7 @@ This will only highlight Ragel statements. If you want host language highlighting then call the language-specific Ragel lexer. - *New in Pygments 1.1.* + .. versionadded:: 1.1 """ name = 'Embedded Ragel' @@ -143,17 +143,17 @@ tokens = { 'root': [ - (r'(' + r'|'.join(( # keep host code in largest possible chunks - r'[^%\'"/#]+', # exclude unsafe characters - r'%(?=[^%]|$)', # a single % sign is okay, just not 2 of them + (r'(' + r'|'.join(( # keep host code in largest possible chunks + r'[^%\'"/#]+', # exclude unsafe characters + r'%(?=[^%]|$)', # a single % sign is okay, just not 2 of them # strings and comments may safely contain unsafe characters - r'"(\\\\|\\"|[^"])*"', # double quote string - r"'(\\\\|\\'|[^'])*'", # single quote string - r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment - r'//.*$\n?', # single line comment - r'\#.*$\n?', # ruby/ragel comment - r'/(?!\*)(\\\\|\\/|[^/])*/', # regular expression + r'"(\\\\|\\"|[^"])*"', # double quote string + r"'(\\\\|\\'|[^'])*'", # single quote string + r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment + r'//.*$\n?', # single line comment + r'\#.*$\n?', # ruby/ragel comment + r'/(?!\*)(\\\\|\\/|[^/])*/', # regular expression # / is safe now that we've handled regex and javadoc comments r'/', @@ -167,15 +167,15 @@ Punctuation, Text)), # Multi Line FSM. - (r'(%%%%|%%){', Punctuation, 'multi-line-fsm'), + (r'(%%%%|%%)\{', Punctuation, 'multi-line-fsm'), ], 'multi-line-fsm': [ - (r'(' + r'|'.join(( # keep ragel code in largest possible chunks. + (r'(' + r'|'.join(( # keep ragel code in largest possible chunks. r'(' + r'|'.join(( - r'[^}\'"\[/#]', # exclude unsafe characters - r'}(?=[^%]|$)', # } is okay as long as it's not followed by % - r'}%(?=[^%]|$)', # ...well, one %'s okay, just not two... - r'[^\\][\\][{}]', # ...and } is okay if it's escaped + r'[^}\'"\[/#]', # exclude unsafe characters + r'\}(?=[^%]|$)', # } is okay as long as it's not followed by % + r'\}%(?=[^%]|$)', # ...well, one %'s okay, just not two... + r'[^\\]\\[{}]', # ...and } is okay if it's escaped # allow / if it's preceded with one of these symbols # (ragel EOF actions) @@ -186,35 +186,35 @@ r'/(?!\*)(\\\\|\\/|[^/])*/\*', # allow / as long as it's not followed by another / or by a * - r'/(?=[^/\*]|$)', + r'/(?=[^/*]|$)', # We want to match as many of these as we can in one block. # Not sure if we need the + sign here, # does it help performance? - )) + r')+', + )) + r')+', # strings and comments may safely contain unsafe characters - r'"(\\\\|\\"|[^"])*"', # double quote string - r"'(\\\\|\\'|[^'])*'", # single quote string - r"\[(\\\\|\\\]|[^\]])*\]", # square bracket literal - r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment - r'//.*$\n?', # single line comment - r'\#.*$\n?', # ruby/ragel comment + r'"(\\\\|\\"|[^"])*"', # double quote string + r"'(\\\\|\\'|[^'])*'", # single quote string + r"\[(\\\\|\\\]|[^\]])*\]", # square bracket literal + r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment + r'//.*$\n?', # single line comment + r'\#.*$\n?', # ruby/ragel comment )) + r')+', using(RagelLexer)), - (r'}%%', Punctuation, '#pop'), + (r'\}%%', Punctuation, '#pop'), ] } def analyse_text(text): - return '@LANG: indep' in text or 0.1 + return '@LANG: indep' in text class RagelRubyLexer(DelegatingLexer): """ A lexer for `Ragel`_ in a Ruby host file. - *New in Pygments 1.1.* + .. versionadded:: 1.1 """ name = 'Ragel in Ruby Host' @@ -223,7 +223,7 @@ def __init__(self, **options): super(RagelRubyLexer, self).__init__(RubyLexer, RagelEmbeddedLexer, - **options) + **options) def analyse_text(text): return '@LANG: ruby' in text @@ -233,7 +233,7 @@ """ A lexer for `Ragel`_ in a C host file. - *New in Pygments 1.1.* + .. versionadded:: 1.1 """ name = 'Ragel in C Host' @@ -252,7 +252,7 @@ """ A lexer for `Ragel`_ in a D host file. - *New in Pygments 1.1.* + .. versionadded:: 1.1 """ name = 'Ragel in D Host' @@ -270,7 +270,7 @@ """ A lexer for `Ragel`_ in a CPP host file. - *New in Pygments 1.1.* + .. versionadded:: 1.1 """ name = 'Ragel in CPP Host' @@ -288,7 +288,7 @@ """ A lexer for `Ragel`_ in an Objective C host file. - *New in Pygments 1.1.* + .. versionadded:: 1.1 """ name = 'Ragel in Objective C Host' @@ -308,7 +308,7 @@ """ A lexer for `Ragel`_ in a Java host file. - *New in Pygments 1.1.* + .. versionadded:: 1.1 """ name = 'Ragel in Java Host' @@ -329,7 +329,7 @@ Should not be called directly, instead use DelegatingLexer for your target language. - *New in Pygments 1.1.* + .. versionadded:: 1.1 .. _ANTLR: http://www.antlr.org/ """ @@ -338,9 +338,9 @@ aliases = ['antlr'] filenames = [] - _id = r'[A-Za-z][A-Za-z_0-9]*' - _TOKEN_REF = r'[A-Z][A-Za-z_0-9]*' - _RULE_REF = r'[a-z][A-Za-z_0-9]*' + _id = r'[A-Za-z]\w*' + _TOKEN_REF = r'[A-Z]\w*' + _RULE_REF = r'[a-z]\w*' _STRING_LITERAL = r'\'(?:\\\\|\\\'|[^\']*)\'' _INT = r'[0-9]+' @@ -364,17 +364,17 @@ # tokensSpec (r'tokens\b', Keyword, 'tokens'), # attrScope - (r'(scope)(\s*)(' + _id + ')(\s*)({)', + (r'(scope)(\s*)(' + _id + ')(\s*)(\{)', bygroups(Keyword, Whitespace, Name.Variable, Whitespace, Punctuation), 'action'), # exception (r'(catch|finally)\b', Keyword, 'exception'), # action - (r'(@' + _id + ')(\s*)(::)?(\s*)(' + _id + ')(\s*)({)', + (r'(@' + _id + ')(\s*)(::)?(\s*)(' + _id + ')(\s*)(\{)', bygroups(Name.Label, Whitespace, Punctuation, Whitespace, Name.Label, Whitespace, Punctuation), 'action'), # rule - (r'((?:protected|private|public|fragment)\b)?(\s*)(' + _id + ')(!)?', \ + (r'((?:protected|private|public|fragment)\b)?(\s*)(' + _id + ')(!)?', bygroups(Keyword, Whitespace, Name.Label, Punctuation), ('rule-alts', 'rule-prelims')), ], @@ -397,18 +397,18 @@ (r'(throws)(\s+)(' + _id + ')', bygroups(Keyword, Whitespace, Name.Label)), (r'(,)(\s*)(' + _id + ')', - bygroups(Punctuation, Whitespace, Name.Label)), # Additional throws + bygroups(Punctuation, Whitespace, Name.Label)), # Additional throws # optionsSpec (r'options\b', Keyword, 'options'), # ruleScopeSpec - scope followed by target language code or name of action # TODO finish implementing other possibilities for scope # L173 ANTLRv3.g from ANTLR book - (r'(scope)(\s+)({)', bygroups(Keyword, Whitespace, Punctuation), - 'action'), + (r'(scope)(\s+)(\{)', bygroups(Keyword, Whitespace, Punctuation), + 'action'), (r'(scope)(\s+)(' + _id + ')(\s*)(;)', bygroups(Keyword, Whitespace, Name.Label, Whitespace, Punctuation)), # ruleAction - (r'(@' + _id + ')(\s*)({)', + (r'(@' + _id + ')(\s*)(\{)', bygroups(Name.Label, Whitespace, Punctuation), 'action'), # finished prelims, go to rule alts! (r':', Punctuation, '#pop') @@ -427,9 +427,9 @@ (r'<<([^>]|>[^>])>>', String), # identifiers # Tokens start with capital letter. - (r'\$?[A-Z_][A-Za-z_0-9]*', Name.Constant), + (r'\$?[A-Z_]\w*', Name.Constant), # Rules start with small letter. - (r'\$?[a-z_][A-Za-z_0-9]*', Name.Variable), + (r'\$?[a-z_]\w*', Name.Variable), # operators (r'(\+|\||->|=>|=|\(|\)|\.\.|\.|\?|\*|\^|!|\#|~)', Operator), (r',', Punctuation), @@ -440,32 +440,32 @@ 'tokens': [ include('whitespace'), include('comments'), - (r'{', Punctuation), + (r'\{', Punctuation), (r'(' + _TOKEN_REF + r')(\s*)(=)?(\s*)(' + _STRING_LITERAL + ')?(\s*)(;)', bygroups(Name.Label, Whitespace, Punctuation, Whitespace, String, Whitespace, Punctuation)), - (r'}', Punctuation, '#pop'), + (r'\}', Punctuation, '#pop'), ], 'options': [ include('whitespace'), include('comments'), - (r'{', Punctuation), + (r'\{', Punctuation), (r'(' + _id + r')(\s*)(=)(\s*)(' + - '|'.join((_id, _STRING_LITERAL, _INT, '\*'))+ ')(\s*)(;)', + '|'.join((_id, _STRING_LITERAL, _INT, '\*')) + ')(\s*)(;)', bygroups(Name.Variable, Whitespace, Punctuation, Whitespace, Text, Whitespace, Punctuation)), - (r'}', Punctuation, '#pop'), + (r'\}', Punctuation, '#pop'), ], 'action': [ - (r'(' + r'|'.join(( # keep host code in largest possible chunks - r'[^\${}\'"/\\]+', # exclude unsafe characters + (r'(' + r'|'.join(( # keep host code in largest possible chunks + r'[^${}\'"/\\]+', # exclude unsafe characters # strings and comments may safely contain unsafe characters - r'"(\\\\|\\"|[^"])*"', # double quote string - r"'(\\\\|\\'|[^'])*'", # single quote string - r'//.*$\n?', # single line comment - r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment + r'"(\\\\|\\"|[^"])*"', # double quote string + r"'(\\\\|\\'|[^'])*'", # single quote string + r'//.*$\n?', # single line comment + r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment # regular expression: There's no reason for it to start # with a * and this stops confusion with comments. @@ -481,18 +481,18 @@ (r'(\\)(%)', bygroups(Punctuation, Other)), (r'(\$[a-zA-Z]+)(\.?)(text|value)?', bygroups(Name.Variable, Punctuation, Name.Property)), - (r'{', Punctuation, '#push'), - (r'}', Punctuation, '#pop'), + (r'\{', Punctuation, '#push'), + (r'\}', Punctuation, '#pop'), ], 'nested-arg-action': [ - (r'(' + r'|'.join(( # keep host code in largest possible chunks. - r'[^\$\[\]\'"/]+', # exclude unsafe characters + (r'(' + r'|'.join(( # keep host code in largest possible chunks. + r'[^$\[\]\'"/]+', # exclude unsafe characters # strings and comments may safely contain unsafe characters - r'"(\\\\|\\"|[^"])*"', # double quote string - r"'(\\\\|\\'|[^'])*'", # single quote string - r'//.*$\n?', # single line comment - r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment + r'"(\\\\|\\"|[^"])*"', # double quote string + r"'(\\\\|\\'|[^'])*'", # single quote string + r'//.*$\n?', # single line comment + r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment # regular expression: There's no reason for it to start # with a * and this stops confusion with comments. @@ -522,11 +522,11 @@ # so just assume they're C++. No idea how to make Objective C work in the # future. -#class AntlrCLexer(DelegatingLexer): +# class AntlrCLexer(DelegatingLexer): # """ # ANTLR with C Target # -# *New in Pygments 1.1* +# .. versionadded:: 1.1 # """ # # name = 'ANTLR With C Target' @@ -539,11 +539,12 @@ # def analyse_text(text): # return re.match(r'^\s*language\s*=\s*C\s*;', text) + class AntlrCppLexer(DelegatingLexer): """ `ANTLR`_ with CPP Target - *New in Pygments 1.1.* + .. versionadded:: 1.1 """ name = 'ANTLR With CPP Target' @@ -555,14 +556,14 @@ def analyse_text(text): return AntlrLexer.analyse_text(text) and \ - re.search(r'^\s*language\s*=\s*C\s*;', text, re.M) + re.search(r'^\s*language\s*=\s*C\s*;', text, re.M) class AntlrObjectiveCLexer(DelegatingLexer): """ `ANTLR`_ with Objective-C Target - *New in Pygments 1.1.* + .. versionadded:: 1.1 """ name = 'ANTLR With ObjectiveC Target' @@ -575,14 +576,14 @@ def analyse_text(text): return AntlrLexer.analyse_text(text) and \ - re.search(r'^\s*language\s*=\s*ObjC\s*;', text) + re.search(r'^\s*language\s*=\s*ObjC\s*;', text) class AntlrCSharpLexer(DelegatingLexer): """ `ANTLR`_ with C# Target - *New in Pygments 1.1.* + .. versionadded:: 1.1 """ name = 'ANTLR With C# Target' @@ -595,14 +596,14 @@ def analyse_text(text): return AntlrLexer.analyse_text(text) and \ - re.search(r'^\s*language\s*=\s*CSharp2\s*;', text, re.M) + re.search(r'^\s*language\s*=\s*CSharp2\s*;', text, re.M) class AntlrPythonLexer(DelegatingLexer): """ `ANTLR`_ with Python Target - *New in Pygments 1.1.* + .. versionadded:: 1.1 """ name = 'ANTLR With Python Target' @@ -615,14 +616,14 @@ def analyse_text(text): return AntlrLexer.analyse_text(text) and \ - re.search(r'^\s*language\s*=\s*Python\s*;', text, re.M) + re.search(r'^\s*language\s*=\s*Python\s*;', text, re.M) class AntlrJavaLexer(DelegatingLexer): """ `ANTLR`_ with Java Target - *New in Pygments 1.1* + .. versionadded:: 1. """ name = 'ANTLR With Java Target' @@ -642,7 +643,7 @@ """ `ANTLR`_ with Ruby Target - *New in Pygments 1.1.* + .. versionadded:: 1.1 """ name = 'ANTLR With Ruby Target' @@ -655,14 +656,14 @@ def analyse_text(text): return AntlrLexer.analyse_text(text) and \ - re.search(r'^\s*language\s*=\s*Ruby\s*;', text, re.M) + re.search(r'^\s*language\s*=\s*Ruby\s*;', text, re.M) class AntlrPerlLexer(DelegatingLexer): """ `ANTLR`_ with Perl Target - *New in Pygments 1.1.* + .. versionadded:: 1.1 """ name = 'ANTLR With Perl Target' @@ -675,14 +676,14 @@ def analyse_text(text): return AntlrLexer.analyse_text(text) and \ - re.search(r'^\s*language\s*=\s*Perl5\s*;', text, re.M) + re.search(r'^\s*language\s*=\s*Perl5\s*;', text, re.M) class AntlrActionScriptLexer(DelegatingLexer): """ `ANTLR`_ with ActionScript Target - *New in Pygments 1.1.* + .. versionadded:: 1.1 """ name = 'ANTLR With ActionScript Target' @@ -690,19 +691,21 @@ filenames = ['*.G', '*.g'] def __init__(self, **options): + from pygments.lexers.actionscript import ActionScriptLexer super(AntlrActionScriptLexer, self).__init__(ActionScriptLexer, AntlrLexer, **options) def analyse_text(text): return AntlrLexer.analyse_text(text) and \ - re.search(r'^\s*language\s*=\s*ActionScript\s*;', text, re.M) + re.search(r'^\s*language\s*=\s*ActionScript\s*;', text, re.M) + class TreetopBaseLexer(RegexLexer): """ A base lexer for `Treetop <http://treetop.rubyforge.org/>`_ grammars. Not for direct use; use TreetopLexer instead. - *New in Pygments 1.6.* + .. versionadded:: 1.6 """ tokens = { @@ -717,43 +720,43 @@ include('end'), (r'module\b', Keyword, '#push'), (r'grammar\b', Keyword, 'grammar'), - (r'[A-Z][A-Za-z_0-9]*(?:::[A-Z][A-Za-z_0-9]*)*', Name.Namespace), + (r'[A-Z]\w*(?:::[A-Z]\w*)*', Name.Namespace), ], 'grammar': [ include('space'), include('end'), (r'rule\b', Keyword, 'rule'), (r'include\b', Keyword, 'include'), - (r'[A-Z][A-Za-z_0-9]*', Name), + (r'[A-Z]\w*', Name), ], 'include': [ include('space'), - (r'[A-Z][A-Za-z_0-9]*(?:::[A-Z][A-Za-z_0-9]*)*', Name.Class, '#pop'), + (r'[A-Z]\w*(?:::[A-Z]\w*)*', Name.Class, '#pop'), ], 'rule': [ include('space'), include('end'), (r'"(\\\\|\\"|[^"])*"', String.Double), (r"'(\\\\|\\'|[^'])*'", String.Single), - (r'([A-Za-z_][A-Za-z_0-9]*)(:)', bygroups(Name.Label, Punctuation)), - (r'[A-Za-z_][A-Za-z_0-9]*', Name), + (r'([A-Za-z_]\w*)(:)', bygroups(Name.Label, Punctuation)), + (r'[A-Za-z_]\w*', Name), (r'[()]', Punctuation), (r'[?+*/&!~]', Operator), (r'\[(?:\\.|\[:\^?[a-z]+:\]|[^\\\]])+\]', String.Regex), (r'([0-9]*)(\.\.)([0-9]*)', bygroups(Number.Integer, Operator, Number.Integer)), (r'(<)([^>]+)(>)', bygroups(Punctuation, Name.Class, Punctuation)), - (r'{', Punctuation, 'inline_module'), + (r'\{', Punctuation, 'inline_module'), (r'\.', String.Regex), ], 'inline_module': [ - (r'{', Other, 'ruby'), - (r'}', Punctuation, '#pop'), + (r'\{', Other, 'ruby'), + (r'\}', Punctuation, '#pop'), (r'[^{}]+', Other), ], 'ruby': [ - (r'{', Other, '#push'), - (r'}', Other, '#pop'), + (r'\{', Other, '#push'), + (r'\}', Other, '#pop'), (r'[^{}]+', Other), ], 'space': [ @@ -765,11 +768,12 @@ ], } + class TreetopLexer(DelegatingLexer): """ A lexer for `Treetop <http://treetop.rubyforge.org/>`_ grammars. - *New in Pygments 1.6.* + .. versionadded:: 1.6 """ name = 'Treetop' @@ -778,3 +782,54 @@ def __init__(self, **options): super(TreetopLexer, self).__init__(RubyLexer, TreetopBaseLexer, **options) + + +class EbnfLexer(RegexLexer): + """ + Lexer for `ISO/IEC 14977 EBNF + <http://en.wikipedia.org/wiki/Extended_Backus%E2%80%93Naur_Form>`_ + grammars. + + .. versionadded:: 2.0 + """ + + name = 'EBNF' + aliases = ['ebnf'] + filenames = ['*.ebnf'] + mimetypes = ['text/x-ebnf'] + + tokens = { + 'root': [ + include('whitespace'), + include('comment_start'), + include('identifier'), + (r'=', Operator, 'production'), + ], + 'production': [ + include('whitespace'), + include('comment_start'), + include('identifier'), + (r'"[^"]*"', String.Double), + (r"'[^']*'", String.Single), + (r'(\?[^?]*\?)', Name.Entity), + (r'[\[\]{}(),|]', Punctuation), + (r'-', Operator), + (r';', Punctuation, '#pop'), + (r'\.', Punctuation, '#pop'), + ], + 'whitespace': [ + (r'\s+', Text), + ], + 'comment_start': [ + (r'\(\*', Comment.Multiline, 'comment'), + ], + 'comment': [ + (r'[^*)]', Comment.Multiline), + include('comment_start'), + (r'\*\)', Comment.Multiline, '#pop'), + (r'[*)]', Comment.Multiline), + ], + 'identifier': [ + (r'([a-zA-Z][\w \-]*)', Keyword), + ], + }