|
1 # -*- coding: utf-8 -*- |
|
2 """ |
|
3 pygments.lexers.archetype |
|
4 ~~~~~~~~~~~~~~~~~~~~~~~~~ |
|
5 |
|
6 Lexer for Archetype-related syntaxes, including: |
|
7 |
|
8 - ODIN syntax <https://github.com/openEHR/odin> |
|
9 - ADL syntax <http://www.openehr.org/releases/trunk/architecture/am/adl2.pdf> |
|
10 - cADL sub-syntax of ADL |
|
11 |
|
12 For uses of this syntax, see the openEHR archetypes <http://www.openEHR.org/ckm> |
|
13 |
|
14 Contributed by Thomas Beale <https://github.com/wolandscat>, |
|
15 <https://bitbucket.org/thomas_beale>. |
|
16 |
|
17 :copyright: Copyright 2006-2015 by the Pygments team, see AUTHORS. |
|
18 :license: BSD, see LICENSE for details. |
|
19 """ |
|
20 |
|
21 from pygments.lexer import RegexLexer, include, bygroups, using, default |
|
22 from pygments.token import Text, Comment, Name, Literal, Number, String, \ |
|
23 Punctuation, Keyword, Operator, Generic |
|
24 |
|
25 __all__ = ['OdinLexer', 'CadlLexer', 'AdlLexer'] |
|
26 |
|
27 |
|
28 class AtomsLexer(RegexLexer): |
|
29 """ |
|
30 Lexer for Values used in ADL and ODIN. |
|
31 |
|
32 .. versionadded:: 2.1 |
|
33 """ |
|
34 |
|
35 tokens = { |
|
36 # ----- pseudo-states for inclusion ----- |
|
37 'whitespace': [ |
|
38 (r'\n', Text), |
|
39 (r'\s+', Text), |
|
40 (r'[ \t]*--.*$', Comment), |
|
41 ], |
|
42 'archetype_id': [ |
|
43 (r'[ \t]*([a-zA-Z]\w+(\.[a-zA-Z]\w+)*::)?[a-zA-Z]\w+(-[a-zA-Z]\w+){2}' |
|
44 r'\.\w+[\w-]*\.v\d+(\.\d+){,2}((-[a-z]+)(\.\d+)?)?', Name.Decorator), |
|
45 ], |
|
46 'date_constraints': [ |
|
47 # ISO 8601-based date/time constraints |
|
48 (r'[Xx?YyMmDdHhSs\d]{2,4}([:-][Xx?YyMmDdHhSs\d]{2}){2}', Literal.Date), |
|
49 # ISO 8601-based duration constraints + optional trailing slash |
|
50 (r'(P[YyMmWwDd]+(T[HhMmSs]+)?|PT[HhMmSs]+)/?', Literal.Date), |
|
51 ], |
|
52 'ordered_values': [ |
|
53 # ISO 8601 date with optional 'T' ligature |
|
54 (r'\d{4}-\d{2}-\d{2}T?', Literal.Date), |
|
55 # ISO 8601 time |
|
56 (r'\d{2}:\d{2}:\d{2}(\.\d+)?([+-]\d{4}|Z)?', Literal.Date), |
|
57 # ISO 8601 duration |
|
58 (r'P((\d*(\.\d+)?[YyMmWwDd]){1,3}(T(\d*(\.\d+)?[HhMmSs]){,3})?|' |
|
59 r'T(\d*(\.\d+)?[HhMmSs]){,3})', Literal.Date), |
|
60 (r'[+-]?(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+', Number.Float), |
|
61 (r'[+-]?(\d+)*\.\d+%?', Number.Float), |
|
62 (r'0x[0-9a-fA-F]+', Number.Hex), |
|
63 (r'[+-]?\d+%?', Number.Integer), |
|
64 ], |
|
65 'values': [ |
|
66 include('ordered_values'), |
|
67 (r'([Tt]rue|[Ff]alse)', Literal), |
|
68 (r'"', String, 'string'), |
|
69 (r"'(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\'\n])'", String.Char), |
|
70 (r'[a-z][a-z0-9+.-]*:', Literal, 'uri'), |
|
71 # term code |
|
72 (r'(\[)(\w[\w-]*(?:\([^)\n]+\))?)(::)(\w[\w-]*)(\])', |
|
73 bygroups(Punctuation, Name.Decorator, Punctuation, Name.Decorator, |
|
74 Punctuation)), |
|
75 (r'\|', Punctuation, 'interval'), |
|
76 # list continuation |
|
77 (r'\.\.\.', Punctuation), |
|
78 ], |
|
79 'constraint_values': [ |
|
80 (r'(\[)(\w[\w-]*(?:\([^)\n]+\))?)(::)', |
|
81 bygroups(Punctuation, Name.Decorator, Punctuation), 'adl14_code_constraint'), |
|
82 # ADL 1.4 ordinal constraint |
|
83 (r'(\d*)(\|)(\[\w[\w-]*::\w[\w-]*\])((?:[,;])?)', |
|
84 bygroups(Number, Punctuation, Name.Decorator, Punctuation)), |
|
85 include('date_constraints'), |
|
86 include('values'), |
|
87 ], |
|
88 |
|
89 # ----- real states ----- |
|
90 'string': [ |
|
91 ('"', String, '#pop'), |
|
92 (r'\\([\\abfnrtv"\']|x[a-fA-F0-9]{2,4}|' |
|
93 r'u[a-fA-F0-9]{4}|U[a-fA-F0-9]{8}|[0-7]{1,3})', String.Escape), |
|
94 # all other characters |
|
95 (r'[^\\"]+', String), |
|
96 # stray backslash |
|
97 (r'\\', String), |
|
98 ], |
|
99 'uri': [ |
|
100 # effective URI terminators |
|
101 (r'[,>\s]', Punctuation, '#pop'), |
|
102 (r'[^>\s,]+', Literal), |
|
103 ], |
|
104 'interval': [ |
|
105 (r'\|', Punctuation, '#pop'), |
|
106 include('ordered_values'), |
|
107 (r'\.\.', Punctuation), |
|
108 (r'[<>=] *', Punctuation), |
|
109 # handle +/- |
|
110 (r'\+/-', Punctuation), |
|
111 (r'\s+', Text), |
|
112 ], |
|
113 'any_code': [ |
|
114 include('archetype_id'), |
|
115 # if it is a code |
|
116 (r'[a-z_]\w*[0-9.]+(@[^\]]+)?', Name.Decorator), |
|
117 # if it is tuple with attribute names |
|
118 (r'[a-z_]\w*', Name.Class), |
|
119 # if it is an integer, i.e. Xpath child index |
|
120 (r'[0-9]+', Text), |
|
121 (r'\|', Punctuation, 'code_rubric'), |
|
122 (r'\]', Punctuation, '#pop'), |
|
123 # handle use_archetype statement |
|
124 (r'\s*,\s*', Punctuation), |
|
125 ], |
|
126 'code_rubric': [ |
|
127 (r'\|', Punctuation, '#pop'), |
|
128 (r'[^|]+', String), |
|
129 ], |
|
130 'adl14_code_constraint': [ |
|
131 (r'\]', Punctuation, '#pop'), |
|
132 (r'\|', Punctuation, 'code_rubric'), |
|
133 (r'(\w[\w-]*)([;,]?)', bygroups(Name.Decorator, Punctuation)), |
|
134 include('whitespace'), |
|
135 ], |
|
136 } |
|
137 |
|
138 |
|
139 class OdinLexer(AtomsLexer): |
|
140 """ |
|
141 Lexer for ODIN syntax. |
|
142 |
|
143 .. versionadded:: 2.1 |
|
144 """ |
|
145 name = 'ODIN' |
|
146 aliases = ['odin'] |
|
147 filenames = ['*.odin'] |
|
148 mimetypes = ['text/odin'] |
|
149 |
|
150 tokens = { |
|
151 'path': [ |
|
152 (r'>', Punctuation, '#pop'), |
|
153 # attribute name |
|
154 (r'[a-z_]\w*', Name.Class), |
|
155 (r'/', Punctuation), |
|
156 (r'\[', Punctuation, 'key'), |
|
157 (r'\s*,\s*', Punctuation, '#pop'), |
|
158 (r'\s+', Text, '#pop'), |
|
159 ], |
|
160 'key': [ |
|
161 include('values'), |
|
162 (r'\]', Punctuation, '#pop'), |
|
163 ], |
|
164 'type_cast': [ |
|
165 (r'\)', Punctuation, '#pop'), |
|
166 (r'[^)]+', Name.Class), |
|
167 ], |
|
168 'root': [ |
|
169 include('whitespace'), |
|
170 (r'([Tt]rue|[Ff]alse)', Literal), |
|
171 include('values'), |
|
172 # x-ref path |
|
173 (r'/', Punctuation, 'path'), |
|
174 # x-ref path starting with key |
|
175 (r'\[', Punctuation, 'key'), |
|
176 # attribute name |
|
177 (r'[a-z_]\w*', Name.Class), |
|
178 (r'=', Operator), |
|
179 (r'\(', Punctuation, 'type_cast'), |
|
180 (r',', Punctuation), |
|
181 (r'<', Punctuation), |
|
182 (r'>', Punctuation), |
|
183 (r';', Punctuation), |
|
184 ], |
|
185 } |
|
186 |
|
187 |
|
188 class CadlLexer(AtomsLexer): |
|
189 """ |
|
190 Lexer for cADL syntax. |
|
191 |
|
192 .. versionadded:: 2.1 |
|
193 """ |
|
194 name = 'cADL' |
|
195 aliases = ['cadl'] |
|
196 filenames = ['*.cadl'] |
|
197 |
|
198 tokens = { |
|
199 'path': [ |
|
200 # attribute name |
|
201 (r'[a-z_]\w*', Name.Class), |
|
202 (r'/', Punctuation), |
|
203 (r'\[', Punctuation, 'any_code'), |
|
204 (r'\s+', Punctuation, '#pop'), |
|
205 ], |
|
206 'root': [ |
|
207 include('whitespace'), |
|
208 (r'(cardinality|existence|occurrences|group|include|exclude|' |
|
209 r'allow_archetype|use_archetype|use_node)\W', Keyword.Type), |
|
210 (r'(and|or|not|there_exists|xor|implies|for_all)\W', Keyword.Type), |
|
211 (r'(after|before|closed)\W', Keyword.Type), |
|
212 (r'(not)\W', Operator), |
|
213 (r'(matches|is_in)\W', Operator), |
|
214 # is_in / not is_in char |
|
215 (u'(\u2208|\u2209)', Operator), |
|
216 # there_exists / not there_exists / for_all / and / or |
|
217 (u'(\u2203|\u2204|\u2200|\u2227|\u2228|\u22BB|\223C)', |
|
218 Operator), |
|
219 # regex in slot or as string constraint |
|
220 (r'(\{)(\s*/[^}]+/\s*)(\})', |
|
221 bygroups(Punctuation, String.Regex, Punctuation)), |
|
222 # regex in slot or as string constraint |
|
223 (r'(\{)(\s*\^[^}]+\^\s*)(\})', |
|
224 bygroups(Punctuation, String.Regex, Punctuation)), |
|
225 (r'/', Punctuation, 'path'), |
|
226 # for cardinality etc |
|
227 (r'(\{)((?:\d+\.\.)?(?:\d+|\*))' |
|
228 r'((?:\s*;\s*(?:ordered|unordered|unique)){,2})(\})', |
|
229 bygroups(Punctuation, Number, Number, Punctuation)), |
|
230 # [{ is start of a tuple value |
|
231 (r'\[\{', Punctuation), |
|
232 (r'\}\]', Punctuation), |
|
233 (r'\{', Punctuation), |
|
234 (r'\}', Punctuation), |
|
235 include('constraint_values'), |
|
236 # type name |
|
237 (r'[A-Z]\w+(<[A-Z]\w+([A-Za-z_<>]*)>)?', Name.Class), |
|
238 # attribute name |
|
239 (r'[a-z_]\w*', Name.Class), |
|
240 (r'\[', Punctuation, 'any_code'), |
|
241 (r'(~|//|\\\\|\+|-|/|\*|\^|!=|=|<=|>=|<|>]?)', Operator), |
|
242 (r'\(', Punctuation), |
|
243 (r'\)', Punctuation), |
|
244 # for lists of values |
|
245 (r',', Punctuation), |
|
246 (r'"', String, 'string'), |
|
247 # for assumed value |
|
248 (r';', Punctuation), |
|
249 ], |
|
250 } |
|
251 |
|
252 |
|
253 class AdlLexer(AtomsLexer): |
|
254 """ |
|
255 Lexer for ADL syntax. |
|
256 |
|
257 .. versionadded:: 2.1 |
|
258 """ |
|
259 |
|
260 name = 'ADL' |
|
261 aliases = ['adl'] |
|
262 filenames = ['*.adl', '*.adls', '*.adlf', '*.adlx'] |
|
263 |
|
264 tokens = { |
|
265 'whitespace': [ |
|
266 # blank line ends |
|
267 (r'\s*\n', Text), |
|
268 # comment-only line |
|
269 (r'^[ \t]*--.*$', Comment), |
|
270 ], |
|
271 'odin_section': [ |
|
272 # repeating the following two rules from the root state enable multi-line |
|
273 # strings that start in the first column to be dealt with |
|
274 (r'^(language|description|ontology|terminology|annotations|' |
|
275 r'component_terminologies|revision_history)[ \t]*\n', Generic.Heading), |
|
276 (r'^(definition)[ \t]*\n', Generic.Heading, 'cadl_section'), |
|
277 (r'^([ \t]*|[ \t]+.*)\n', using(OdinLexer)), |
|
278 (r'^([^"]*")(>[ \t]*\n)', bygroups(String, Punctuation)), |
|
279 # template overlay delimiter |
|
280 (r'^----------*\n', Text, '#pop'), |
|
281 (r'^.*\n', String), |
|
282 default('#pop'), |
|
283 ], |
|
284 'cadl_section': [ |
|
285 (r'^([ \t]*|[ \t]+.*)\n', using(CadlLexer)), |
|
286 default('#pop'), |
|
287 ], |
|
288 'rules_section': [ |
|
289 (r'^[ \t]+.*\n', using(CadlLexer)), |
|
290 default('#pop'), |
|
291 ], |
|
292 'metadata': [ |
|
293 (r'\)', Punctuation, '#pop'), |
|
294 (r';', Punctuation), |
|
295 (r'([Tt]rue|[Ff]alse)', Literal), |
|
296 # numbers and version ids |
|
297 (r'\d+(\.\d+)*', Literal), |
|
298 # Guids |
|
299 (r'(\d|[a-fA-F])+(-(\d|[a-fA-F])+){3,}', Literal), |
|
300 (r'\w+', Name.Class), |
|
301 (r'"', String, 'string'), |
|
302 (r'=', Operator), |
|
303 (r'[ \t]+', Text), |
|
304 default('#pop'), |
|
305 ], |
|
306 'root': [ |
|
307 (r'^(archetype|template_overlay|operational_template|template|' |
|
308 r'speciali[sz]e)', Generic.Heading), |
|
309 (r'^(language|description|ontology|terminology|annotations|' |
|
310 r'component_terminologies|revision_history)[ \t]*\n', |
|
311 Generic.Heading, 'odin_section'), |
|
312 (r'^(definition)[ \t]*\n', Generic.Heading, 'cadl_section'), |
|
313 (r'^(rules)[ \t]*\n', Generic.Heading, 'rules_section'), |
|
314 include('archetype_id'), |
|
315 (r'[ \t]*\(', Punctuation, 'metadata'), |
|
316 include('whitespace'), |
|
317 ], |
|
318 } |