eric6/ThirdParty/Pygments/pygments/lexers/stata.py

changeset 8258
82b608e352ec
parent 8257
28146736bbfc
child 8259
2bbec88047dd
equal deleted inserted replaced
8257:28146736bbfc 8258:82b608e352ec
1 # -*- coding: utf-8 -*-
2 """
3 pygments.lexers.stata
4 ~~~~~~~~~~~~~~~~~~~~~
5
6 Lexer for Stata
7
8 :copyright: Copyright 2006-2021 by the Pygments team, see AUTHORS.
9 :license: BSD, see LICENSE for details.
10 """
11
12 import re
13 from pygments.lexer import RegexLexer, default, include, words
14 from pygments.token import Comment, Keyword, Name, Number, \
15 String, Text, Operator
16
17 from pygments.lexers._stata_builtins import builtins_base, builtins_functions
18
19 __all__ = ['StataLexer']
20
21
22 class StataLexer(RegexLexer):
23 """
24 For `Stata <http://www.stata.com/>`_ do files.
25
26 .. versionadded:: 2.2
27 """
28 # Syntax based on
29 # - http://fmwww.bc.edu/RePEc/bocode/s/synlightlist.ado
30 # - https://github.com/isagalaev/highlight.js/blob/master/src/languages/stata.js
31 # - https://github.com/jpitblado/vim-stata/blob/master/syntax/stata.vim
32
33 name = 'Stata'
34 aliases = ['stata', 'do']
35 filenames = ['*.do', '*.ado']
36 mimetypes = ['text/x-stata', 'text/stata', 'application/x-stata']
37 flags = re.MULTILINE | re.DOTALL
38
39 tokens = {
40 'root': [
41 include('comments'),
42 include('strings'),
43 include('macros'),
44 include('numbers'),
45 include('keywords'),
46 include('operators'),
47 include('format'),
48 (r'.', Text),
49 ],
50 # Comments are a complicated beast in Stata because they can be
51 # nested and there are a few corner cases with that. See:
52 # - github.com/kylebarron/language-stata/issues/90
53 # - statalist.org/forums/forum/general-stata-discussion/general/1448244
54 'comments': [
55 (r'(^//|(?<=\s)//)(?!/)', Comment.Single, 'comments-double-slash'),
56 (r'^\s*\*', Comment.Single, 'comments-star'),
57 (r'/\*', Comment.Multiline, 'comments-block'),
58 (r'(^///|(?<=\s)///)', Comment.Special, 'comments-triple-slash')
59 ],
60 'comments-block': [
61 (r'/\*', Comment.Multiline, '#push'),
62 # this ends and restarts a comment block. but need to catch this so
63 # that it doesn\'t start _another_ level of comment blocks
64 (r'\*/\*', Comment.Multiline),
65 (r'(\*/\s+\*(?!/)[^\n]*)|(\*/)', Comment.Multiline, '#pop'),
66 # Match anything else as a character inside the comment
67 (r'.', Comment.Multiline),
68 ],
69 'comments-star': [
70 (r'///.*?\n', Comment.Single,
71 ('#pop', 'comments-triple-slash')),
72 (r'(^//|(?<=\s)//)(?!/)', Comment.Single,
73 ('#pop', 'comments-double-slash')),
74 (r'/\*', Comment.Multiline, 'comments-block'),
75 (r'.(?=\n)', Comment.Single, '#pop'),
76 (r'.', Comment.Single),
77 ],
78 'comments-triple-slash': [
79 (r'\n', Comment.Special, '#pop'),
80 # A // breaks out of a comment for the rest of the line
81 (r'//.*?(?=\n)', Comment.Single, '#pop'),
82 (r'.', Comment.Special),
83 ],
84 'comments-double-slash': [
85 (r'\n', Text, '#pop'),
86 (r'.', Comment.Single),
87 ],
88 # `"compound string"' and regular "string"; note the former are
89 # nested.
90 'strings': [
91 (r'`"', String, 'string-compound'),
92 (r'(?<!`)"', String, 'string-regular'),
93 ],
94 'string-compound': [
95 (r'`"', String, '#push'),
96 (r'"\'', String, '#pop'),
97 (r'\\\\|\\"|\\\$|\\`|\\\n', String.Escape),
98 include('macros'),
99 (r'.', String)
100 ],
101 'string-regular': [
102 (r'(")(?!\')|(?=\n)', String, '#pop'),
103 (r'\\\\|\\"|\\\$|\\`|\\\n', String.Escape),
104 include('macros'),
105 (r'.', String)
106 ],
107 # A local is usually
108 # `\w{0,31}'
109 # `:extended macro'
110 # `=expression'
111 # `[rsen](results)'
112 # `(++--)scalar(++--)'
113 #
114 # However, there are all sorts of weird rules wrt edge
115 # cases. Instead of writing 27 exceptions, anything inside
116 # `' is a local.
117 #
118 # A global is more restricted, so we do follow rules. Note only
119 # locals explicitly enclosed ${} can be nested.
120 'macros': [
121 (r'\$(\{|(?=[$`]))', Name.Variable.Global, 'macro-global-nested'),
122 (r'\$', Name.Variable.Global, 'macro-global-name'),
123 (r'`', Name.Variable, 'macro-local'),
124 ],
125 'macro-local': [
126 (r'`', Name.Variable, '#push'),
127 (r"'", Name.Variable, '#pop'),
128 (r'\$(\{|(?=[$`]))', Name.Variable.Global, 'macro-global-nested'),
129 (r'\$', Name.Variable.Global, 'macro-global-name'),
130 (r'.', Name.Variable), # fallback
131 ],
132 'macro-global-nested': [
133 (r'\$(\{|(?=[$`]))', Name.Variable.Global, '#push'),
134 (r'\}', Name.Variable.Global, '#pop'),
135 (r'\$', Name.Variable.Global, 'macro-global-name'),
136 (r'`', Name.Variable, 'macro-local'),
137 (r'\w', Name.Variable.Global), # fallback
138 default('#pop'),
139 ],
140 'macro-global-name': [
141 (r'\$(\{|(?=[$`]))', Name.Variable.Global, 'macro-global-nested', '#pop'),
142 (r'\$', Name.Variable.Global, 'macro-global-name', '#pop'),
143 (r'`', Name.Variable, 'macro-local', '#pop'),
144 (r'\w{1,32}', Name.Variable.Global, '#pop'),
145 ],
146 # Built in functions and statements
147 'keywords': [
148 (words(builtins_functions, prefix = r'\b', suffix = r'(?=\()'),
149 Name.Function),
150 (words(builtins_base, prefix = r'(^\s*|\s)', suffix = r'\b'),
151 Keyword),
152 ],
153 # http://www.stata.com/help.cgi?operators
154 'operators': [
155 (r'-|==|<=|>=|<|>|&|!=', Operator),
156 (r'\*|\+|\^|/|!|~|==|~=', Operator)
157 ],
158 # Stata numbers
159 'numbers': [
160 # decimal number
161 (r'\b[+-]?([0-9]+(\.[0-9]+)?|\.[0-9]+|\.)([eE][+-]?[0-9]+)?[i]?\b',
162 Number),
163 ],
164 # Stata formats
165 'format': [
166 (r'%-?\d{1,2}(\.\d{1,2})?[gfe]c?', Name.Other),
167 (r'%(21x|16H|16L|8H|8L)', Name.Other),
168 (r'%-?(tc|tC|td|tw|tm|tq|th|ty|tg)\S{0,32}', Name.Other),
169 (r'%[-~]?\d{1,4}s', Name.Other),
170 ]
171 }

eric ide

mercurial