|
1 # -*- coding: utf-8 -*- |
|
2 """ |
|
3 pygments.lexers.sas |
|
4 ~~~~~~~~~~~~~~~~~~~ |
|
5 |
|
6 Lexer for SAS. |
|
7 |
|
8 :copyright: Copyright 2006-2017 by the Pygments team, see AUTHORS. |
|
9 :license: BSD, see LICENSE for details. |
|
10 """ |
|
11 |
|
12 import re |
|
13 from pygments.lexer import RegexLexer, include, words |
|
14 from pygments.token import Comment, Keyword, Name, Number, String, Text, \ |
|
15 Other, Generic |
|
16 |
|
17 __all__ = ['SASLexer'] |
|
18 |
|
19 |
|
20 class SASLexer(RegexLexer): |
|
21 """ |
|
22 For `SAS <http://www.sas.com/>`_ files. |
|
23 |
|
24 .. versionadded:: 2.2 |
|
25 """ |
|
26 # Syntax from syntax/sas.vim by James Kidd <james.kidd@covance.com> |
|
27 |
|
28 name = 'SAS' |
|
29 aliases = ['sas'] |
|
30 filenames = ['*.SAS', '*.sas'] |
|
31 mimetypes = ['text/x-sas', 'text/sas', 'application/x-sas'] |
|
32 flags = re.IGNORECASE | re.MULTILINE |
|
33 |
|
34 builtins_macros = ( |
|
35 "bquote", "nrbquote", "cmpres", "qcmpres", "compstor", "datatyp", |
|
36 "display", "do", "else", "end", "eval", "global", "goto", "if", |
|
37 "index", "input", "keydef", "label", "left", "length", "let", |
|
38 "local", "lowcase", "macro", "mend", "nrquote", |
|
39 "nrstr", "put", "qleft", "qlowcase", "qscan", |
|
40 "qsubstr", "qsysfunc", "qtrim", "quote", "qupcase", "scan", |
|
41 "str", "substr", "superq", "syscall", "sysevalf", "sysexec", |
|
42 "sysfunc", "sysget", "syslput", "sysprod", "sysrc", "sysrput", |
|
43 "then", "to", "trim", "unquote", "until", "upcase", "verify", |
|
44 "while", "window" |
|
45 ) |
|
46 |
|
47 builtins_conditionals = ( |
|
48 "do", "if", "then", "else", "end", "until", "while" |
|
49 ) |
|
50 |
|
51 builtins_statements = ( |
|
52 "abort", "array", "attrib", "by", "call", "cards", "cards4", |
|
53 "catname", "continue", "datalines", "datalines4", "delete", "delim", |
|
54 "delimiter", "display", "dm", "drop", "endsas", "error", "file", |
|
55 "filename", "footnote", "format", "goto", "in", "infile", "informat", |
|
56 "input", "keep", "label", "leave", "length", "libname", "link", |
|
57 "list", "lostcard", "merge", "missing", "modify", "options", "output", |
|
58 "out", "page", "put", "redirect", "remove", "rename", "replace", |
|
59 "retain", "return", "select", "set", "skip", "startsas", "stop", |
|
60 "title", "update", "waitsas", "where", "window", "x", "systask" |
|
61 ) |
|
62 |
|
63 builtins_sql = ( |
|
64 "add", "and", "alter", "as", "cascade", "check", "create", |
|
65 "delete", "describe", "distinct", "drop", "foreign", "from", |
|
66 "group", "having", "index", "insert", "into", "in", "key", "like", |
|
67 "message", "modify", "msgtype", "not", "null", "on", "or", |
|
68 "order", "primary", "references", "reset", "restrict", "select", |
|
69 "set", "table", "unique", "update", "validate", "view", "where" |
|
70 ) |
|
71 |
|
72 builtins_functions = ( |
|
73 "abs", "addr", "airy", "arcos", "arsin", "atan", "attrc", |
|
74 "attrn", "band", "betainv", "blshift", "bnot", "bor", |
|
75 "brshift", "bxor", "byte", "cdf", "ceil", "cexist", "cinv", |
|
76 "close", "cnonct", "collate", "compbl", "compound", |
|
77 "compress", "cos", "cosh", "css", "curobs", "cv", "daccdb", |
|
78 "daccdbsl", "daccsl", "daccsyd", "dacctab", "dairy", "date", |
|
79 "datejul", "datepart", "datetime", "day", "dclose", "depdb", |
|
80 "depdbsl", "depsl", "depsyd", |
|
81 "deptab", "dequote", "dhms", "dif", "digamma", |
|
82 "dim", "dinfo", "dnum", "dopen", "doptname", "doptnum", |
|
83 "dread", "dropnote", "dsname", "erf", "erfc", "exist", "exp", |
|
84 "fappend", "fclose", "fcol", "fdelete", "fetch", "fetchobs", |
|
85 "fexist", "fget", "fileexist", "filename", "fileref", |
|
86 "finfo", "finv", "fipname", "fipnamel", "fipstate", "floor", |
|
87 "fnonct", "fnote", "fopen", "foptname", "foptnum", "fpoint", |
|
88 "fpos", "fput", "fread", "frewind", "frlen", "fsep", "fuzz", |
|
89 "fwrite", "gaminv", "gamma", "getoption", "getvarc", "getvarn", |
|
90 "hbound", "hms", "hosthelp", "hour", "ibessel", "index", |
|
91 "indexc", "indexw", "input", "inputc", "inputn", "int", |
|
92 "intck", "intnx", "intrr", "irr", "jbessel", "juldate", |
|
93 "kurtosis", "lag", "lbound", "left", "length", "lgamma", |
|
94 "libname", "libref", "log", "log10", "log2", "logpdf", "logpmf", |
|
95 "logsdf", "lowcase", "max", "mdy", "mean", "min", "minute", |
|
96 "mod", "month", "mopen", "mort", "n", "netpv", "nmiss", |
|
97 "normal", "note", "npv", "open", "ordinal", "pathname", |
|
98 "pdf", "peek", "peekc", "pmf", "point", "poisson", "poke", |
|
99 "probbeta", "probbnml", "probchi", "probf", "probgam", |
|
100 "probhypr", "probit", "probnegb", "probnorm", "probt", |
|
101 "put", "putc", "putn", "qtr", "quote", "ranbin", "rancau", |
|
102 "ranexp", "rangam", "range", "rank", "rannor", "ranpoi", |
|
103 "rantbl", "rantri", "ranuni", "repeat", "resolve", "reverse", |
|
104 "rewind", "right", "round", "saving", "scan", "sdf", "second", |
|
105 "sign", "sin", "sinh", "skewness", "soundex", "spedis", |
|
106 "sqrt", "std", "stderr", "stfips", "stname", "stnamel", |
|
107 "substr", "sum", "symget", "sysget", "sysmsg", "sysprod", |
|
108 "sysrc", "system", "tan", "tanh", "time", "timepart", "tinv", |
|
109 "tnonct", "today", "translate", "tranwrd", "trigamma", |
|
110 "trim", "trimn", "trunc", "uniform", "upcase", "uss", "var", |
|
111 "varfmt", "varinfmt", "varlabel", "varlen", "varname", |
|
112 "varnum", "varray", "varrayx", "vartype", "verify", "vformat", |
|
113 "vformatd", "vformatdx", "vformatn", "vformatnx", "vformatw", |
|
114 "vformatwx", "vformatx", "vinarray", "vinarrayx", "vinformat", |
|
115 "vinformatd", "vinformatdx", "vinformatn", "vinformatnx", |
|
116 "vinformatw", "vinformatwx", "vinformatx", "vlabel", |
|
117 "vlabelx", "vlength", "vlengthx", "vname", "vnamex", "vtype", |
|
118 "vtypex", "weekday", "year", "yyq", "zipfips", "zipname", |
|
119 "zipnamel", "zipstate" |
|
120 ) |
|
121 |
|
122 tokens = { |
|
123 'root': [ |
|
124 include('comments'), |
|
125 include('proc-data'), |
|
126 include('cards-datalines'), |
|
127 include('logs'), |
|
128 include('general'), |
|
129 (r'.', Text), |
|
130 ], |
|
131 # SAS is multi-line regardless, but * is ended by ; |
|
132 'comments': [ |
|
133 (r'^\s*\*.*?;', Comment), |
|
134 (r'/\*.*?\*/', Comment), |
|
135 (r'^\s*\*(.|\n)*?;', Comment.Multiline), |
|
136 (r'/[*](.|\n)*?[*]/', Comment.Multiline), |
|
137 ], |
|
138 # Special highlight for proc, data, quit, run |
|
139 'proc-data': [ |
|
140 (r'(^|;)\s*(proc \w+|data|run|quit)[\s;]', |
|
141 Keyword.Reserved), |
|
142 ], |
|
143 # Special highlight cards and datalines |
|
144 'cards-datalines': [ |
|
145 (r'^\s*(datalines|cards)\s*;\s*$', Keyword, 'data'), |
|
146 ], |
|
147 'data': [ |
|
148 (r'(.|\n)*^\s*;\s*$', Other, '#pop'), |
|
149 ], |
|
150 # Special highlight for put NOTE|ERROR|WARNING (order matters) |
|
151 'logs': [ |
|
152 (r'\n?^\s*%?put ', Keyword, 'log-messages'), |
|
153 ], |
|
154 'log-messages': [ |
|
155 (r'NOTE(:|-).*', Generic, '#pop'), |
|
156 (r'WARNING(:|-).*', Generic.Emph, '#pop'), |
|
157 (r'ERROR(:|-).*', Generic.Error, '#pop'), |
|
158 include('general'), |
|
159 ], |
|
160 'general': [ |
|
161 include('keywords'), |
|
162 include('vars-strings'), |
|
163 include('special'), |
|
164 include('numbers'), |
|
165 ], |
|
166 # Keywords, statements, functions, macros |
|
167 'keywords': [ |
|
168 (words(builtins_statements, |
|
169 prefix = r'\b', |
|
170 suffix = r'\b'), |
|
171 Keyword), |
|
172 (words(builtins_sql, |
|
173 prefix = r'\b', |
|
174 suffix = r'\b'), |
|
175 Keyword), |
|
176 (words(builtins_conditionals, |
|
177 prefix = r'\b', |
|
178 suffix = r'\b'), |
|
179 Keyword), |
|
180 (words(builtins_macros, |
|
181 prefix = r'%', |
|
182 suffix = r'\b'), |
|
183 Name.Builtin), |
|
184 (words(builtins_functions, |
|
185 prefix = r'\b', |
|
186 suffix = r'\('), |
|
187 Name.Builtin), |
|
188 ], |
|
189 # Strings and user-defined variables and macros (order matters) |
|
190 'vars-strings': [ |
|
191 (r'&[a-z_]\w{0,31}\.?', Name.Variable), |
|
192 (r'%[a-z_]\w{0,31}', Name.Function), |
|
193 (r'\'', String, 'string_squote'), |
|
194 (r'"', String, 'string_dquote'), |
|
195 ], |
|
196 'string_squote': [ |
|
197 ('\'', String, '#pop'), |
|
198 (r'\\\\|\\"|\\\n', String.Escape), |
|
199 # AFAIK, macro variables are not evaluated in single quotes |
|
200 # (r'&', Name.Variable, 'validvar'), |
|
201 (r'[^$\'\\]+', String), |
|
202 (r'[$\'\\]', String), |
|
203 ], |
|
204 'string_dquote': [ |
|
205 (r'"', String, '#pop'), |
|
206 (r'\\\\|\\"|\\\n', String.Escape), |
|
207 (r'&', Name.Variable, 'validvar'), |
|
208 (r'[^$&"\\]+', String), |
|
209 (r'[$"\\]', String), |
|
210 ], |
|
211 'validvar': [ |
|
212 (r'[a-z_]\w{0,31}\.?', Name.Variable, '#pop'), |
|
213 ], |
|
214 # SAS numbers and special variables |
|
215 'numbers': [ |
|
216 (r'\b[+-]?([0-9]+(\.[0-9]+)?|\.[0-9]+|\.)(E[+-]?[0-9]+)?i?\b', |
|
217 Number), |
|
218 ], |
|
219 'special': [ |
|
220 (r'(null|missing|_all_|_automatic_|_character_|_n_|' |
|
221 r'_infile_|_name_|_null_|_numeric_|_user_|_webout_)', |
|
222 Keyword.Constant), |
|
223 ], |
|
224 # 'operators': [ |
|
225 # (r'(-|=|<=|>=|<|>|<>|&|!=|' |
|
226 # r'\||\*|\+|\^|/|!|~|~=)', Operator) |
|
227 # ], |
|
228 } |