ThirdParty/Pygments/pygments/lexers/archetype.py

changeset 4697
c2e9bf425554
child 5713
6762afd9f963
equal deleted inserted replaced
4696:bf4d19a7cade 4697:c2e9bf425554
1 # -*- coding: utf-8 -*-
2 """
3 pygments.lexers.archetype
4 ~~~~~~~~~~~~~~~~~~~~~~~~~
5
6 Lexer for Archetype-related syntaxes, including:
7
8 - ODIN syntax <https://github.com/openEHR/odin>
9 - ADL syntax <http://www.openehr.org/releases/trunk/architecture/am/adl2.pdf>
10 - cADL sub-syntax of ADL
11
12 For uses of this syntax, see the openEHR archetypes <http://www.openEHR.org/ckm>
13
14 Contributed by Thomas Beale <https://github.com/wolandscat>,
15 <https://bitbucket.org/thomas_beale>.
16
17 :copyright: Copyright 2006-2015 by the Pygments team, see AUTHORS.
18 :license: BSD, see LICENSE for details.
19 """
20
21 from pygments.lexer import RegexLexer, include, bygroups, using, default
22 from pygments.token import Text, Comment, Name, Literal, Number, String, \
23 Punctuation, Keyword, Operator, Generic
24
25 __all__ = ['OdinLexer', 'CadlLexer', 'AdlLexer']
26
27
28 class AtomsLexer(RegexLexer):
29 """
30 Lexer for Values used in ADL and ODIN.
31
32 .. versionadded:: 2.1
33 """
34
35 tokens = {
36 # ----- pseudo-states for inclusion -----
37 'whitespace': [
38 (r'\n', Text),
39 (r'\s+', Text),
40 (r'[ \t]*--.*$', Comment),
41 ],
42 'archetype_id': [
43 (r'[ \t]*([a-zA-Z]\w+(\.[a-zA-Z]\w+)*::)?[a-zA-Z]\w+(-[a-zA-Z]\w+){2}'
44 r'\.\w+[\w-]*\.v\d+(\.\d+){,2}((-[a-z]+)(\.\d+)?)?', Name.Decorator),
45 ],
46 'date_constraints': [
47 # ISO 8601-based date/time constraints
48 (r'[Xx?YyMmDdHhSs\d]{2,4}([:-][Xx?YyMmDdHhSs\d]{2}){2}', Literal.Date),
49 # ISO 8601-based duration constraints + optional trailing slash
50 (r'(P[YyMmWwDd]+(T[HhMmSs]+)?|PT[HhMmSs]+)/?', Literal.Date),
51 ],
52 'ordered_values': [
53 # ISO 8601 date with optional 'T' ligature
54 (r'\d{4}-\d{2}-\d{2}T?', Literal.Date),
55 # ISO 8601 time
56 (r'\d{2}:\d{2}:\d{2}(\.\d+)?([+-]\d{4}|Z)?', Literal.Date),
57 # ISO 8601 duration
58 (r'P((\d*(\.\d+)?[YyMmWwDd]){1,3}(T(\d*(\.\d+)?[HhMmSs]){,3})?|'
59 r'T(\d*(\.\d+)?[HhMmSs]){,3})', Literal.Date),
60 (r'[+-]?(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+', Number.Float),
61 (r'[+-]?(\d+)*\.\d+%?', Number.Float),
62 (r'0x[0-9a-fA-F]+', Number.Hex),
63 (r'[+-]?\d+%?', Number.Integer),
64 ],
65 'values': [
66 include('ordered_values'),
67 (r'([Tt]rue|[Ff]alse)', Literal),
68 (r'"', String, 'string'),
69 (r"'(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\'\n])'", String.Char),
70 (r'[a-z][a-z0-9+.-]*:', Literal, 'uri'),
71 # term code
72 (r'(\[)(\w[\w-]*(?:\([^)\n]+\))?)(::)(\w[\w-]*)(\])',
73 bygroups(Punctuation, Name.Decorator, Punctuation, Name.Decorator,
74 Punctuation)),
75 (r'\|', Punctuation, 'interval'),
76 # list continuation
77 (r'\.\.\.', Punctuation),
78 ],
79 'constraint_values': [
80 (r'(\[)(\w[\w-]*(?:\([^)\n]+\))?)(::)',
81 bygroups(Punctuation, Name.Decorator, Punctuation), 'adl14_code_constraint'),
82 # ADL 1.4 ordinal constraint
83 (r'(\d*)(\|)(\[\w[\w-]*::\w[\w-]*\])((?:[,;])?)',
84 bygroups(Number, Punctuation, Name.Decorator, Punctuation)),
85 include('date_constraints'),
86 include('values'),
87 ],
88
89 # ----- real states -----
90 'string': [
91 ('"', String, '#pop'),
92 (r'\\([\\abfnrtv"\']|x[a-fA-F0-9]{2,4}|'
93 r'u[a-fA-F0-9]{4}|U[a-fA-F0-9]{8}|[0-7]{1,3})', String.Escape),
94 # all other characters
95 (r'[^\\"]+', String),
96 # stray backslash
97 (r'\\', String),
98 ],
99 'uri': [
100 # effective URI terminators
101 (r'[,>\s]', Punctuation, '#pop'),
102 (r'[^>\s,]+', Literal),
103 ],
104 'interval': [
105 (r'\|', Punctuation, '#pop'),
106 include('ordered_values'),
107 (r'\.\.', Punctuation),
108 (r'[<>=] *', Punctuation),
109 # handle +/-
110 (r'\+/-', Punctuation),
111 (r'\s+', Text),
112 ],
113 'any_code': [
114 include('archetype_id'),
115 # if it is a code
116 (r'[a-z_]\w*[0-9.]+(@[^\]]+)?', Name.Decorator),
117 # if it is tuple with attribute names
118 (r'[a-z_]\w*', Name.Class),
119 # if it is an integer, i.e. Xpath child index
120 (r'[0-9]+', Text),
121 (r'\|', Punctuation, 'code_rubric'),
122 (r'\]', Punctuation, '#pop'),
123 # handle use_archetype statement
124 (r'\s*,\s*', Punctuation),
125 ],
126 'code_rubric': [
127 (r'\|', Punctuation, '#pop'),
128 (r'[^|]+', String),
129 ],
130 'adl14_code_constraint': [
131 (r'\]', Punctuation, '#pop'),
132 (r'\|', Punctuation, 'code_rubric'),
133 (r'(\w[\w-]*)([;,]?)', bygroups(Name.Decorator, Punctuation)),
134 include('whitespace'),
135 ],
136 }
137
138
139 class OdinLexer(AtomsLexer):
140 """
141 Lexer for ODIN syntax.
142
143 .. versionadded:: 2.1
144 """
145 name = 'ODIN'
146 aliases = ['odin']
147 filenames = ['*.odin']
148 mimetypes = ['text/odin']
149
150 tokens = {
151 'path': [
152 (r'>', Punctuation, '#pop'),
153 # attribute name
154 (r'[a-z_]\w*', Name.Class),
155 (r'/', Punctuation),
156 (r'\[', Punctuation, 'key'),
157 (r'\s*,\s*', Punctuation, '#pop'),
158 (r'\s+', Text, '#pop'),
159 ],
160 'key': [
161 include('values'),
162 (r'\]', Punctuation, '#pop'),
163 ],
164 'type_cast': [
165 (r'\)', Punctuation, '#pop'),
166 (r'[^)]+', Name.Class),
167 ],
168 'root': [
169 include('whitespace'),
170 (r'([Tt]rue|[Ff]alse)', Literal),
171 include('values'),
172 # x-ref path
173 (r'/', Punctuation, 'path'),
174 # x-ref path starting with key
175 (r'\[', Punctuation, 'key'),
176 # attribute name
177 (r'[a-z_]\w*', Name.Class),
178 (r'=', Operator),
179 (r'\(', Punctuation, 'type_cast'),
180 (r',', Punctuation),
181 (r'<', Punctuation),
182 (r'>', Punctuation),
183 (r';', Punctuation),
184 ],
185 }
186
187
188 class CadlLexer(AtomsLexer):
189 """
190 Lexer for cADL syntax.
191
192 .. versionadded:: 2.1
193 """
194 name = 'cADL'
195 aliases = ['cadl']
196 filenames = ['*.cadl']
197
198 tokens = {
199 'path': [
200 # attribute name
201 (r'[a-z_]\w*', Name.Class),
202 (r'/', Punctuation),
203 (r'\[', Punctuation, 'any_code'),
204 (r'\s+', Punctuation, '#pop'),
205 ],
206 'root': [
207 include('whitespace'),
208 (r'(cardinality|existence|occurrences|group|include|exclude|'
209 r'allow_archetype|use_archetype|use_node)\W', Keyword.Type),
210 (r'(and|or|not|there_exists|xor|implies|for_all)\W', Keyword.Type),
211 (r'(after|before|closed)\W', Keyword.Type),
212 (r'(not)\W', Operator),
213 (r'(matches|is_in)\W', Operator),
214 # is_in / not is_in char
215 (u'(\u2208|\u2209)', Operator),
216 # there_exists / not there_exists / for_all / and / or
217 (u'(\u2203|\u2204|\u2200|\u2227|\u2228|\u22BB|\223C)',
218 Operator),
219 # regex in slot or as string constraint
220 (r'(\{)(\s*/[^}]+/\s*)(\})',
221 bygroups(Punctuation, String.Regex, Punctuation)),
222 # regex in slot or as string constraint
223 (r'(\{)(\s*\^[^}]+\^\s*)(\})',
224 bygroups(Punctuation, String.Regex, Punctuation)),
225 (r'/', Punctuation, 'path'),
226 # for cardinality etc
227 (r'(\{)((?:\d+\.\.)?(?:\d+|\*))'
228 r'((?:\s*;\s*(?:ordered|unordered|unique)){,2})(\})',
229 bygroups(Punctuation, Number, Number, Punctuation)),
230 # [{ is start of a tuple value
231 (r'\[\{', Punctuation),
232 (r'\}\]', Punctuation),
233 (r'\{', Punctuation),
234 (r'\}', Punctuation),
235 include('constraint_values'),
236 # type name
237 (r'[A-Z]\w+(<[A-Z]\w+([A-Za-z_<>]*)>)?', Name.Class),
238 # attribute name
239 (r'[a-z_]\w*', Name.Class),
240 (r'\[', Punctuation, 'any_code'),
241 (r'(~|//|\\\\|\+|-|/|\*|\^|!=|=|<=|>=|<|>]?)', Operator),
242 (r'\(', Punctuation),
243 (r'\)', Punctuation),
244 # for lists of values
245 (r',', Punctuation),
246 (r'"', String, 'string'),
247 # for assumed value
248 (r';', Punctuation),
249 ],
250 }
251
252
253 class AdlLexer(AtomsLexer):
254 """
255 Lexer for ADL syntax.
256
257 .. versionadded:: 2.1
258 """
259
260 name = 'ADL'
261 aliases = ['adl']
262 filenames = ['*.adl', '*.adls', '*.adlf', '*.adlx']
263
264 tokens = {
265 'whitespace': [
266 # blank line ends
267 (r'\s*\n', Text),
268 # comment-only line
269 (r'^[ \t]*--.*$', Comment),
270 ],
271 'odin_section': [
272 # repeating the following two rules from the root state enable multi-line
273 # strings that start in the first column to be dealt with
274 (r'^(language|description|ontology|terminology|annotations|'
275 r'component_terminologies|revision_history)[ \t]*\n', Generic.Heading),
276 (r'^(definition)[ \t]*\n', Generic.Heading, 'cadl_section'),
277 (r'^([ \t]*|[ \t]+.*)\n', using(OdinLexer)),
278 (r'^([^"]*")(>[ \t]*\n)', bygroups(String, Punctuation)),
279 # template overlay delimiter
280 (r'^----------*\n', Text, '#pop'),
281 (r'^.*\n', String),
282 default('#pop'),
283 ],
284 'cadl_section': [
285 (r'^([ \t]*|[ \t]+.*)\n', using(CadlLexer)),
286 default('#pop'),
287 ],
288 'rules_section': [
289 (r'^[ \t]+.*\n', using(CadlLexer)),
290 default('#pop'),
291 ],
292 'metadata': [
293 (r'\)', Punctuation, '#pop'),
294 (r';', Punctuation),
295 (r'([Tt]rue|[Ff]alse)', Literal),
296 # numbers and version ids
297 (r'\d+(\.\d+)*', Literal),
298 # Guids
299 (r'(\d|[a-fA-F])+(-(\d|[a-fA-F])+){3,}', Literal),
300 (r'\w+', Name.Class),
301 (r'"', String, 'string'),
302 (r'=', Operator),
303 (r'[ \t]+', Text),
304 default('#pop'),
305 ],
306 'root': [
307 (r'^(archetype|template_overlay|operational_template|template|'
308 r'speciali[sz]e)', Generic.Heading),
309 (r'^(language|description|ontology|terminology|annotations|'
310 r'component_terminologies|revision_history)[ \t]*\n',
311 Generic.Heading, 'odin_section'),
312 (r'^(definition)[ \t]*\n', Generic.Heading, 'cadl_section'),
313 (r'^(rules)[ \t]*\n', Generic.Heading, 'rules_section'),
314 include('archetype_id'),
315 (r'[ \t]*\(', Punctuation, 'metadata'),
316 include('whitespace'),
317 ],
318 }

eric ide

mercurial