ThirdParty/Pygments/pygments/lexers/sql.py

changeset 5713
6762afd9f963
parent 5072
aab59042fefb
child 6651
e8f3b5568b21
--- a/ThirdParty/Pygments/pygments/lexers/sql.py	Sun Apr 23 16:40:31 2017 +0200
+++ b/ThirdParty/Pygments/pygments/lexers/sql.py	Tue Apr 25 18:36:38 2017 +0200
@@ -34,34 +34,39 @@
     The ``tests/examplefiles`` contains a few test files with data to be
     parsed by these lexers.
 
-    :copyright: Copyright 2006-2015 by the Pygments team, see AUTHORS.
+    :copyright: Copyright 2006-2017 by the Pygments team, see AUTHORS.
     :license: BSD, see LICENSE for details.
 """
 
 import re
 
 from pygments.lexer import Lexer, RegexLexer, do_insertions, bygroups, words
-from pygments.token import Punctuation, \
+from pygments.token import Punctuation, Whitespace, Error, \
     Text, Comment, Operator, Keyword, Name, String, Number, Generic
 from pygments.lexers import get_lexer_by_name, ClassNotFound
 from pygments.util import iteritems
 
 from pygments.lexers._postgres_builtins import KEYWORDS, DATATYPES, \
     PSEUDO_TYPES, PLPGSQL_KEYWORDS
+from pygments.lexers import _tsql_builtins
 
 
 __all__ = ['PostgresLexer', 'PlPgsqlLexer', 'PostgresConsoleLexer',
-           'SqlLexer', 'MySqlLexer', 'SqliteConsoleLexer', 'RqlLexer']
+           'SqlLexer', 'TransactSqlLexer', 'MySqlLexer',
+           'SqliteConsoleLexer', 'RqlLexer']
 
 line_re  = re.compile('.*?\n')
 
 language_re = re.compile(r"\s+LANGUAGE\s+'?(\w+)'?", re.IGNORECASE)
 
+do_re = re.compile(r'\bDO\b', re.IGNORECASE) 
+
 
 def language_callback(lexer, match):
     """Parse the content of a $-string using a lexer
 
-    The lexer is chosen looking for a nearby LANGUAGE.
+    The lexer is chosen looking for a nearby LANGUAGE or assumed as
+    plpgsql if inside a DO statement and no LANGUAGE has been found.
     """
     l = None
     m = language_re.match(lexer.text[match.end():match.end()+100])
@@ -72,15 +77,26 @@
             lexer.text[max(0, match.start()-100):match.start()]))
         if m:
             l = lexer._get_lexer(m[-1].group(1))
-
+        else:
+            m = list(do_re.finditer(
+                lexer.text[max(0, match.start()-25):match.start()]))
+            if m:
+                l = lexer._get_lexer('plpgsql')
+    
+    # 1 = $, 2 = delimiter, 3 = $
+    yield (match.start(1), String, match.group(1))
+    yield (match.start(2), String.Delimiter, match.group(2))
+    yield (match.start(3), String, match.group(3))
+    # 4 = string contents
     if l:
-        yield (match.start(1), String, match.group(1))
-        for x in l.get_tokens_unprocessed(match.group(2)):
+        for x in l.get_tokens_unprocessed(match.group(4)):
             yield x
-        yield (match.start(3), String, match.group(3))
-
     else:
-        yield (match.start(), String, match.group())
+        yield (match.start(4), String, match.group(4))
+    # 5 = $, 6 = delimiter, 7 = $
+    yield (match.start(5), String, match.group(5))
+    yield (match.start(6), String.Delimiter, match.group(6))
+    yield (match.start(7), String, match.group(7))
 
 
 class PostgresBase(object):
@@ -137,7 +153,7 @@
     tokens = {
         'root': [
             (r'\s+', Text),
-            (r'--.*?\n', Comment.Single),
+            (r'--.*\n?', Comment.Single),
             (r'/\*', Comment.Multiline, 'multiline-comments'),
             (r'(' + '|'.join(s.replace(" ", "\s+")
                              for s in DATATYPES + PSEUDO_TYPES)
@@ -148,9 +164,10 @@
             (r'\$\d+', Name.Variable),
             (r'([0-9]*\.[0-9]*|[0-9]+)(e[+-]?[0-9]+)?', Number.Float),
             (r'[0-9]+', Number.Integer),
-            (r"(E|U&)?'", String.Single, 'string'),
-            (r'(U&)?"', String.Name, 'quoted-ident'),  # quoted identifier
-            (r'(?s)(\$[^$]*\$)(.*?)(\1)', language_callback),
+            (r"((?:E|U&)?)(')", bygroups(String.Affix, String.Single), 'string'),
+            # quoted identifier
+            (r'((?:U&)?)(")', bygroups(String.Affix, String.Name), 'quoted-ident'),
+            (r'(?s)(\$)([^$]*)(\$)(.*?)(\$)(\2)(\$)', language_callback),
             (r'[a-z_]\w*', Name),
 
             # psql variable in SQL
@@ -363,7 +380,7 @@
     tokens = {
         'root': [
             (r'\s+', Text),
-            (r'--.*?\n', Comment.Single),
+            (r'--.*\n?', Comment.Single),
             (r'/\*', Comment.Multiline, 'multiline-comments'),
             (words((
                 'ABORT', 'ABS', 'ABSOLUTE', 'ACCESS', 'ADA', 'ADD', 'ADMIN', 'AFTER', 'AGGREGATE',
@@ -464,6 +481,62 @@
     }
 
 
+class TransactSqlLexer(RegexLexer):
+    """
+    Transact-SQL (T-SQL) is Microsoft's and Sybase's proprietary extension to
+    SQL.
+
+    The list of keywords includes ODBC and keywords reserved for future use..
+    """
+
+    name = 'Transact-SQL'
+    aliases = ['tsql', 't-sql']
+    filenames = ['*.sql']
+    mimetypes = ['text/x-tsql']
+
+    # Use re.UNICODE to allow non ASCII letters in names.
+    flags = re.IGNORECASE | re.UNICODE
+    tokens = {
+        'root': [
+            (r'\s+', Whitespace),
+            (r'--(?m).*?$\n?', Comment.Single),
+            (r'/\*', Comment.Multiline, 'multiline-comments'),
+            (words(_tsql_builtins.OPERATORS), Operator),
+            (words(_tsql_builtins.OPERATOR_WORDS, suffix=r'\b'), Operator.Word),
+            (words(_tsql_builtins.TYPES, suffix=r'\b'), Name.Class),
+            (words(_tsql_builtins.FUNCTIONS, suffix=r'\b'), Name.Function),
+            (r'(goto)(\s+)(\w+\b)', bygroups(Keyword, Whitespace, Name.Label)),
+            (words(_tsql_builtins.KEYWORDS, suffix=r'\b'), Keyword),
+            (r'(\[)([^]]+)(\])', bygroups(Operator, Name, Operator)),
+            (r'0x[0-9a-f]+', Number.Hex),
+            # Float variant 1, for example: 1., 1.e2, 1.2e3
+            (r'[0-9]+\.[0-9]*(e[+-]?[0-9]+)?', Number.Float),
+            # Float variant 2, for example: .1, .1e2
+            (r'\.[0-9]+(e[+-]?[0-9]+)?', Number.Float),
+            # Float variant 3, for example: 123e45
+            (r'[0-9]+e[+-]?[0-9]+', Number.Float),
+            (r'[0-9]+', Number.Integer),
+            (r"'(''|[^'])*'", String.Single),
+            (r'"(""|[^"])*"', String.Symbol),
+            (r'[;(),.]', Punctuation),
+            # Below we use \w even for the first "real" character because
+            # tokens starting with a digit have already been recognized
+            # as Number above.
+            (r'@@\w+', Name.Builtin),
+            (r'@\w+', Name.Variable),
+            (r'(\w+)(:)', bygroups(Name.Label, Punctuation)),
+            (r'#?#?\w+', Name),  # names for temp tables and anything else
+            (r'\?', Name.Variable.Magic),  # parameter for prepared statements
+        ],
+        'multiline-comments': [
+            (r'/\*', Comment.Multiline, 'multiline-comments'),
+            (r'\*/', Comment.Multiline, '#pop'),
+            (r'[^/*]+', Comment.Multiline),
+            (r'[/*]', Comment.Multiline)
+        ]
+    }
+
+
 class MySqlLexer(RegexLexer):
     """
     Special lexer for MySQL.
@@ -477,7 +550,7 @@
     tokens = {
         'root': [
             (r'\s+', Text),
-            (r'(#|--\s+).*?\n', Comment.Single),
+            (r'(#|--\s+).*\n?', Comment.Single),
             (r'/\*', Comment.Multiline, 'multiline-comments'),
             (r'[0-9]+', Number.Integer),
             (r'[0-9]*\.[0-9]+(e[+-][0-9]+)', Number.Float),

eric ide

mercurial