1 # -*- coding: utf-8 -*- |
|
2 """ |
|
3 pygments.lexers.stata |
|
4 ~~~~~~~~~~~~~~~~~~~~~ |
|
5 |
|
6 Lexer for Stata |
|
7 |
|
8 :copyright: Copyright 2006-2021 by the Pygments team, see AUTHORS. |
|
9 :license: BSD, see LICENSE for details. |
|
10 """ |
|
11 |
|
12 import re |
|
13 from pygments.lexer import RegexLexer, default, include, words |
|
14 from pygments.token import Comment, Keyword, Name, Number, \ |
|
15 String, Text, Operator |
|
16 |
|
17 from pygments.lexers._stata_builtins import builtins_base, builtins_functions |
|
18 |
|
19 __all__ = ['StataLexer'] |
|
20 |
|
21 |
|
22 class StataLexer(RegexLexer): |
|
23 """ |
|
24 For `Stata <http://www.stata.com/>`_ do files. |
|
25 |
|
26 .. versionadded:: 2.2 |
|
27 """ |
|
28 # Syntax based on |
|
29 # - http://fmwww.bc.edu/RePEc/bocode/s/synlightlist.ado |
|
30 # - https://github.com/isagalaev/highlight.js/blob/master/src/languages/stata.js |
|
31 # - https://github.com/jpitblado/vim-stata/blob/master/syntax/stata.vim |
|
32 |
|
33 name = 'Stata' |
|
34 aliases = ['stata', 'do'] |
|
35 filenames = ['*.do', '*.ado'] |
|
36 mimetypes = ['text/x-stata', 'text/stata', 'application/x-stata'] |
|
37 flags = re.MULTILINE | re.DOTALL |
|
38 |
|
39 tokens = { |
|
40 'root': [ |
|
41 include('comments'), |
|
42 include('strings'), |
|
43 include('macros'), |
|
44 include('numbers'), |
|
45 include('keywords'), |
|
46 include('operators'), |
|
47 include('format'), |
|
48 (r'.', Text), |
|
49 ], |
|
50 # Comments are a complicated beast in Stata because they can be |
|
51 # nested and there are a few corner cases with that. See: |
|
52 # - github.com/kylebarron/language-stata/issues/90 |
|
53 # - statalist.org/forums/forum/general-stata-discussion/general/1448244 |
|
54 'comments': [ |
|
55 (r'(^//|(?<=\s)//)(?!/)', Comment.Single, 'comments-double-slash'), |
|
56 (r'^\s*\*', Comment.Single, 'comments-star'), |
|
57 (r'/\*', Comment.Multiline, 'comments-block'), |
|
58 (r'(^///|(?<=\s)///)', Comment.Special, 'comments-triple-slash') |
|
59 ], |
|
60 'comments-block': [ |
|
61 (r'/\*', Comment.Multiline, '#push'), |
|
62 # this ends and restarts a comment block. but need to catch this so |
|
63 # that it doesn\'t start _another_ level of comment blocks |
|
64 (r'\*/\*', Comment.Multiline), |
|
65 (r'(\*/\s+\*(?!/)[^\n]*)|(\*/)', Comment.Multiline, '#pop'), |
|
66 # Match anything else as a character inside the comment |
|
67 (r'.', Comment.Multiline), |
|
68 ], |
|
69 'comments-star': [ |
|
70 (r'///.*?\n', Comment.Single, |
|
71 ('#pop', 'comments-triple-slash')), |
|
72 (r'(^//|(?<=\s)//)(?!/)', Comment.Single, |
|
73 ('#pop', 'comments-double-slash')), |
|
74 (r'/\*', Comment.Multiline, 'comments-block'), |
|
75 (r'.(?=\n)', Comment.Single, '#pop'), |
|
76 (r'.', Comment.Single), |
|
77 ], |
|
78 'comments-triple-slash': [ |
|
79 (r'\n', Comment.Special, '#pop'), |
|
80 # A // breaks out of a comment for the rest of the line |
|
81 (r'//.*?(?=\n)', Comment.Single, '#pop'), |
|
82 (r'.', Comment.Special), |
|
83 ], |
|
84 'comments-double-slash': [ |
|
85 (r'\n', Text, '#pop'), |
|
86 (r'.', Comment.Single), |
|
87 ], |
|
88 # `"compound string"' and regular "string"; note the former are |
|
89 # nested. |
|
90 'strings': [ |
|
91 (r'`"', String, 'string-compound'), |
|
92 (r'(?<!`)"', String, 'string-regular'), |
|
93 ], |
|
94 'string-compound': [ |
|
95 (r'`"', String, '#push'), |
|
96 (r'"\'', String, '#pop'), |
|
97 (r'\\\\|\\"|\\\$|\\`|\\\n', String.Escape), |
|
98 include('macros'), |
|
99 (r'.', String) |
|
100 ], |
|
101 'string-regular': [ |
|
102 (r'(")(?!\')|(?=\n)', String, '#pop'), |
|
103 (r'\\\\|\\"|\\\$|\\`|\\\n', String.Escape), |
|
104 include('macros'), |
|
105 (r'.', String) |
|
106 ], |
|
107 # A local is usually |
|
108 # `\w{0,31}' |
|
109 # `:extended macro' |
|
110 # `=expression' |
|
111 # `[rsen](results)' |
|
112 # `(++--)scalar(++--)' |
|
113 # |
|
114 # However, there are all sorts of weird rules wrt edge |
|
115 # cases. Instead of writing 27 exceptions, anything inside |
|
116 # `' is a local. |
|
117 # |
|
118 # A global is more restricted, so we do follow rules. Note only |
|
119 # locals explicitly enclosed ${} can be nested. |
|
120 'macros': [ |
|
121 (r'\$(\{|(?=[$`]))', Name.Variable.Global, 'macro-global-nested'), |
|
122 (r'\$', Name.Variable.Global, 'macro-global-name'), |
|
123 (r'`', Name.Variable, 'macro-local'), |
|
124 ], |
|
125 'macro-local': [ |
|
126 (r'`', Name.Variable, '#push'), |
|
127 (r"'", Name.Variable, '#pop'), |
|
128 (r'\$(\{|(?=[$`]))', Name.Variable.Global, 'macro-global-nested'), |
|
129 (r'\$', Name.Variable.Global, 'macro-global-name'), |
|
130 (r'.', Name.Variable), # fallback |
|
131 ], |
|
132 'macro-global-nested': [ |
|
133 (r'\$(\{|(?=[$`]))', Name.Variable.Global, '#push'), |
|
134 (r'\}', Name.Variable.Global, '#pop'), |
|
135 (r'\$', Name.Variable.Global, 'macro-global-name'), |
|
136 (r'`', Name.Variable, 'macro-local'), |
|
137 (r'\w', Name.Variable.Global), # fallback |
|
138 default('#pop'), |
|
139 ], |
|
140 'macro-global-name': [ |
|
141 (r'\$(\{|(?=[$`]))', Name.Variable.Global, 'macro-global-nested', '#pop'), |
|
142 (r'\$', Name.Variable.Global, 'macro-global-name', '#pop'), |
|
143 (r'`', Name.Variable, 'macro-local', '#pop'), |
|
144 (r'\w{1,32}', Name.Variable.Global, '#pop'), |
|
145 ], |
|
146 # Built in functions and statements |
|
147 'keywords': [ |
|
148 (words(builtins_functions, prefix = r'\b', suffix = r'(?=\()'), |
|
149 Name.Function), |
|
150 (words(builtins_base, prefix = r'(^\s*|\s)', suffix = r'\b'), |
|
151 Keyword), |
|
152 ], |
|
153 # http://www.stata.com/help.cgi?operators |
|
154 'operators': [ |
|
155 (r'-|==|<=|>=|<|>|&|!=', Operator), |
|
156 (r'\*|\+|\^|/|!|~|==|~=', Operator) |
|
157 ], |
|
158 # Stata numbers |
|
159 'numbers': [ |
|
160 # decimal number |
|
161 (r'\b[+-]?([0-9]+(\.[0-9]+)?|\.[0-9]+|\.)([eE][+-]?[0-9]+)?[i]?\b', |
|
162 Number), |
|
163 ], |
|
164 # Stata formats |
|
165 'format': [ |
|
166 (r'%-?\d{1,2}(\.\d{1,2})?[gfe]c?', Name.Other), |
|
167 (r'%(21x|16H|16L|8H|8L)', Name.Other), |
|
168 (r'%-?(tc|tC|td|tw|tm|tq|th|ty|tg)\S{0,32}', Name.Other), |
|
169 (r'%[-~]?\d{1,4}s', Name.Other), |
|
170 ] |
|
171 } |
|