eric6/ThirdParty/Pygments/pygments/lexers/stata.py

changeset 7547
21b0534faebc
parent 6942
2602857055c5
child 7701
25f42e208e08
--- a/eric6/ThirdParty/Pygments/pygments/lexers/stata.py	Tue Apr 21 19:44:19 2020 +0200
+++ b/eric6/ThirdParty/Pygments/pygments/lexers/stata.py	Tue Apr 21 19:47:10 2020 +0200
@@ -5,10 +5,11 @@
 
     Lexer for Stata
 
-    :copyright: Copyright 2006-2017 by the Pygments team, see AUTHORS.
+    :copyright: Copyright 2006-2019 by the Pygments team, see AUTHORS.
     :license: BSD, see LICENSE for details.
 """
 
+import re
 from pygments.lexer import RegexLexer, include, words
 from pygments.token import Comment, Keyword, Name, Number, \
     String, Text, Operator
@@ -26,63 +27,125 @@
     """
     # Syntax based on
     # - http://fmwww.bc.edu/RePEc/bocode/s/synlightlist.ado
-    # - http://github.com/isagalaev/highlight.js/blob/master/src/languages/stata.js
-    # - http://github.com/jpitblado/vim-stata/blob/master/syntax/stata.vim
+    # - https://github.com/isagalaev/highlight.js/blob/master/src/languages/stata.js
+    # - https://github.com/jpitblado/vim-stata/blob/master/syntax/stata.vim
 
     name      = 'Stata'
     aliases   = ['stata', 'do']
     filenames = ['*.do', '*.ado']
     mimetypes = ['text/x-stata', 'text/stata', 'application/x-stata']
+    flags     = re.MULTILINE | re.DOTALL
 
     tokens = {
         'root': [
             include('comments'),
-            include('vars-strings'),
+            include('strings'),
+            include('macros'),
             include('numbers'),
             include('keywords'),
+            include('operators'),
+            include('format'),
             (r'.', Text),
         ],
-        # Global and local macros; regular and special strings
-        'vars-strings': [
-            (r'\$[\w{]', Name.Variable.Global, 'var_validglobal'),
-            (r'`\w{0,31}\'', Name.Variable),
-            (r'"', String, 'string_dquote'),
-            (r'`"', String, 'string_mquote'),
+        # Comments are a complicated beast in Stata because they can be
+        # nested and there are a few corner cases with that. See:
+        # - github.com/kylebarron/language-stata/issues/90
+        # - statalist.org/forums/forum/general-stata-discussion/general/1448244
+        'comments': [
+            (r'(^//|(?<=\s)//)(?!/)', Comment.Single, 'comments-double-slash'),
+            (r'^\s*\*', Comment.Single, 'comments-star'),
+            (r'/\*', Comment.Multiline, 'comments-block'),
+            (r'(^///|(?<=\s)///)', Comment.Special, 'comments-triple-slash')
+        ],
+        'comments-block': [
+            (r'/\*', Comment.Multiline, '#push'),
+            # this ends and restarts a comment block. but need to catch this so
+            # that it doesn\'t start _another_ level of comment blocks
+            (r'\*/\*', Comment.Multiline),
+            (r'(\*/\s+\*(?!/)[^\n]*)|(\*/)', Comment.Multiline, '#pop'),
+            # Match anything else as a character inside the comment
+            (r'.', Comment.Multiline),
         ],
-        # For either string type, highlight macros as macros
-        'string_dquote': [
-            (r'"', String, '#pop'),
-            (r'\\\\|\\"|\\\n', String.Escape),
-            (r'\$', Name.Variable.Global, 'var_validglobal'),
-            (r'`', Name.Variable, 'var_validlocal'),
-            (r'[^$`"\\]+', String),
-            (r'[$"\\]', String),
+        'comments-star': [
+            (r'///.*?\n', Comment.Single,
+                ('#pop', 'comments-triple-slash')),
+            (r'(^//|(?<=\s)//)(?!/)', Comment.Single,
+                ('#pop', 'comments-double-slash')),
+            (r'/\*', Comment.Multiline, 'comments-block'),
+            (r'.(?=\n)', Comment.Single, '#pop'),
+            (r'.', Comment.Single),
         ],
-        'string_mquote': [
+        'comments-triple-slash': [
+            (r'\n', Comment.Special, '#pop'),
+            # A // breaks out of a comment for the rest of the line
+            (r'//.*?(?=\n)', Comment.Single, '#pop'),
+            (r'.', Comment.Special),
+        ],
+        'comments-double-slash': [
+            (r'\n', Text, '#pop'),
+            (r'.', Comment.Single),
+        ],
+        # `"compound string"' and regular "string"; note the former are
+        # nested.
+        'strings': [
+            (r'`"', String, 'string-compound'),
+            (r'(?<!`)"', String, 'string-regular'),
+        ],
+        'string-compound': [
+            (r'`"', String, '#push'),
             (r'"\'', String, '#pop'),
-            (r'\\\\|\\"|\\\n', String.Escape),
-            (r'\$', Name.Variable.Global, 'var_validglobal'),
-            (r'`', Name.Variable, 'var_validlocal'),
-            (r'[^$`"\\]+', String),
-            (r'[$"\\]', String),
+            (r'\\\\|\\"|\\\$|\\`|\\\n', String.Escape),
+            include('macros'),
+            (r'.', String)
         ],
-        'var_validglobal': [
-            (r'\{\w{0,32}\}', Name.Variable.Global, '#pop'),
-            (r'\w{1,32}', Name.Variable.Global, '#pop'),
+        'string-regular': [
+            (r'(")(?!\')|(?=\n)', String, '#pop'),
+            (r'\\\\|\\"|\\\$|\\`|\\\n', String.Escape),
+            include('macros'),
+            (r'.', String)
         ],
-        'var_validlocal': [
-            (r'\w{0,31}\'', Name.Variable, '#pop'),
+        # A local is usually
+        #     `\w{0,31}'
+        #     `:extended macro'
+        #     `=expression'
+        #     `[rsen](results)'
+        #     `(++--)scalar(++--)'
+        #
+        # However, there are all sorts of weird rules wrt edge
+        # cases. Instead of writing 27 exceptions, anything inside
+        # `' is a local.
+        #
+        # A global is more restricted, so we do follow rules. Note only
+        # locals explicitly enclosed ${} can be nested.
+        'macros': [
+            (r'\$(\{|(?=[\$`]))', Name.Variable.Global, 'macro-global-nested'),
+            (r'\$', Name.Variable.Global,  'macro-global-name'),
+            (r'`', Name.Variable, 'macro-local'),
         ],
-        # * only OK at line start, // OK anywhere
-        'comments': [
-            (r'^\s*\*.*$', Comment),
-            (r'//.*', Comment.Single),
-            (r'/\*.*?\*/', Comment.Multiline),
-            (r'/[*](.|\n)*?[*]/', Comment.Multiline),
+        'macro-local': [
+            (r'`', Name.Variable, '#push'),
+            (r"'", Name.Variable, '#pop'),
+            (r'\$(\{|(?=[\$`]))', Name.Variable.Global, 'macro-global-nested'),
+            (r'\$', Name.Variable.Global, 'macro-global-name'),
+            (r'.', Name.Variable),  # fallback
+        ],
+        'macro-global-nested': [
+            (r'\$(\{|(?=[\$`]))', Name.Variable.Global, '#push'),
+            (r'\}', Name.Variable.Global, '#pop'),
+            (r'\$', Name.Variable.Global, 'macro-global-name'),
+            (r'`', Name.Variable, 'macro-local'),
+            (r'\w', Name.Variable.Global),  # fallback
+            (r'(?!\w)', Name.Variable.Global, '#pop'),
+        ],
+        'macro-global-name': [
+            (r'\$(\{|(?=[\$`]))', Name.Variable.Global, 'macro-global-nested', '#pop'),
+            (r'\$', Name.Variable.Global, 'macro-global-name', '#pop'),
+            (r'`', Name.Variable, 'macro-local', '#pop'),
+            (r'\w{1,32}', Name.Variable.Global, '#pop'),
         ],
         # Built in functions and statements
         'keywords': [
-            (words(builtins_functions, prefix = r'\b', suffix = r'\('),
+            (words(builtins_functions, prefix = r'\b', suffix = r'(?=\()'),
              Name.Function),
             (words(builtins_base, prefix = r'(^\s*|\s)', suffix = r'\b'),
              Keyword),
@@ -100,9 +163,9 @@
         ],
         # Stata formats
         'format': [
-            (r'%-?\d{1,2}(\.\d{1,2})?[gfe]c?', Name.Variable),
-            (r'%(21x|16H|16L|8H|8L)', Name.Variable),
-            (r'%-?(tc|tC|td|tw|tm|tq|th|ty|tg).{0,32}', Name.Variable),
-            (r'%[-~]?\d{1,4}s', Name.Variable),
+            (r'%-?\d{1,2}(\.\d{1,2})?[gfe]c?', Name.Other),
+            (r'%(21x|16H|16L|8H|8L)', Name.Other),
+            (r'%-?(tc|tC|td|tw|tm|tq|th|ty|tg)\S{0,32}', Name.Other),
+            (r'%[-~]?\d{1,4}s', Name.Other),
         ]
     }

eric ide

mercurial