|
1 # -*- coding: utf-8 -*- |
|
2 """ |
|
3 pygments.lexers.grammar_notation |
|
4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|
5 |
|
6 Lexers for grammer notations like BNF. |
|
7 |
|
8 :copyright: Copyright 2006-2017 by the Pygments team, see AUTHORS. |
|
9 :license: BSD, see LICENSE for details. |
|
10 """ |
|
11 |
|
12 import re |
|
13 |
|
14 from pygments.lexer import RegexLexer, bygroups, include, this, using, words |
|
15 from pygments.token import Comment, Keyword, Literal, Name, Number, \ |
|
16 Operator, Punctuation, String, Text |
|
17 |
|
18 __all__ = ['BnfLexer', 'AbnfLexer', 'JsgfLexer'] |
|
19 |
|
20 |
|
21 class BnfLexer(RegexLexer): |
|
22 """ |
|
23 This lexer is for grammer notations which are similar to |
|
24 original BNF. |
|
25 |
|
26 In order to maximize a number of targets of this lexer, |
|
27 let's decide some designs: |
|
28 |
|
29 * We don't distinguish `Terminal Symbol`. |
|
30 |
|
31 * We do assume that `NonTerminal Symbol` are always enclosed |
|
32 with arrow brackets. |
|
33 |
|
34 * We do assume that `NonTerminal Symbol` may include |
|
35 any printable characters except arrow brackets and ASCII 0x20. |
|
36 This assumption is for `RBNF <http://www.rfc-base.org/txt/rfc-5511.txt>`_. |
|
37 |
|
38 * We do assume that target notation doesn't support comment. |
|
39 |
|
40 * We don't distinguish any operators and punctuation except |
|
41 `::=`. |
|
42 |
|
43 Though these desision making might cause too minimal highlighting |
|
44 and you might be disappointed, but it is reasonable for us. |
|
45 |
|
46 .. versionadded:: 2.1 |
|
47 """ |
|
48 |
|
49 name = 'BNF' |
|
50 aliases = ['bnf'] |
|
51 filenames = ['*.bnf'] |
|
52 mimetypes = ['text/x-bnf'] |
|
53 |
|
54 tokens = { |
|
55 'root': [ |
|
56 (r'(<)([ -;=?-~]+)(>)', |
|
57 bygroups(Punctuation, Name.Class, Punctuation)), |
|
58 |
|
59 # an only operator |
|
60 (r'::=', Operator), |
|
61 |
|
62 # fallback |
|
63 (r'[^<>:]+', Text), # for performance |
|
64 (r'.', Text), |
|
65 ], |
|
66 } |
|
67 |
|
68 |
|
69 class AbnfLexer(RegexLexer): |
|
70 """ |
|
71 Lexer for `IETF 7405 ABNF |
|
72 <http://www.ietf.org/rfc/rfc7405.txt>`_ |
|
73 (Updates `5234 <http://www.ietf.org/rfc/rfc5234.txt>`_) |
|
74 grammars. |
|
75 |
|
76 .. versionadded:: 2.1 |
|
77 """ |
|
78 |
|
79 name = 'ABNF' |
|
80 aliases = ['abnf'] |
|
81 filenames = ['*.abnf'] |
|
82 mimetypes = ['text/x-abnf'] |
|
83 |
|
84 _core_rules = ( |
|
85 'ALPHA', 'BIT', 'CHAR', 'CR', 'CRLF', 'CTL', 'DIGIT', |
|
86 'DQUOTE', 'HEXDIG', 'HTAB', 'LF', 'LWSP', 'OCTET', |
|
87 'SP', 'VCHAR', 'WSP') |
|
88 |
|
89 tokens = { |
|
90 'root': [ |
|
91 # comment |
|
92 (r';.*$', Comment.Single), |
|
93 |
|
94 # quoted |
|
95 # double quote itself in this state, it is as '%x22'. |
|
96 (r'(%[si])?"[^"]*"', Literal), |
|
97 |
|
98 # binary (but i have never seen...) |
|
99 (r'%b[01]+\-[01]+\b', Literal), # range |
|
100 (r'%b[01]+(\.[01]+)*\b', Literal), # concat |
|
101 |
|
102 # decimal |
|
103 (r'%d[0-9]+\-[0-9]+\b', Literal), # range |
|
104 (r'%d[0-9]+(\.[0-9]+)*\b', Literal), # concat |
|
105 |
|
106 # hexadecimal |
|
107 (r'%x[0-9a-fA-F]+\-[0-9a-fA-F]+\b', Literal), # range |
|
108 (r'%x[0-9a-fA-F]+(\.[0-9a-fA-F]+)*\b', Literal), # concat |
|
109 |
|
110 # repetition (<a>*<b>element) including nRule |
|
111 (r'\b[0-9]+\*[0-9]+', Operator), |
|
112 (r'\b[0-9]+\*', Operator), |
|
113 (r'\b[0-9]+', Operator), |
|
114 (r'\*', Operator), |
|
115 |
|
116 # Strictly speaking, these are not keyword but |
|
117 # are called `Core Rule'. |
|
118 (words(_core_rules, suffix=r'\b'), Keyword), |
|
119 |
|
120 # nonterminals (ALPHA *(ALPHA / DIGIT / "-")) |
|
121 (r'[a-zA-Z][a-zA-Z0-9-]+\b', Name.Class), |
|
122 |
|
123 # operators |
|
124 (r'(=/|=|/)', Operator), |
|
125 |
|
126 # punctuation |
|
127 (r'[\[\]()]', Punctuation), |
|
128 |
|
129 # fallback |
|
130 (r'\s+', Text), |
|
131 (r'.', Text), |
|
132 ], |
|
133 } |
|
134 |
|
135 |
|
136 class JsgfLexer(RegexLexer): |
|
137 """ |
|
138 For `JSpeech Grammar Format <https://www.w3.org/TR/jsgf/>`_ |
|
139 grammars. |
|
140 |
|
141 .. versionadded:: 2.2 |
|
142 """ |
|
143 name = 'JSGF' |
|
144 aliases = ['jsgf'] |
|
145 filenames = ['*.jsgf'] |
|
146 mimetypes = ['application/jsgf', 'application/x-jsgf', 'text/jsgf'] |
|
147 |
|
148 flags = re.MULTILINE | re.UNICODE |
|
149 |
|
150 tokens = { |
|
151 'root': [ |
|
152 include('comments'), |
|
153 include('non-comments'), |
|
154 ], |
|
155 'comments': [ |
|
156 (r'/\*\*(?!/)', Comment.Multiline, 'documentation comment'), |
|
157 (r'/\*[\w\W]*?\*/', Comment.Multiline), |
|
158 (r'//.*', Comment.Single), |
|
159 ], |
|
160 'non-comments': [ |
|
161 (r'\A#JSGF[^;]*', Comment.Preproc), |
|
162 (r'\s+', Text), |
|
163 (r';', Punctuation), |
|
164 (r'[=|()\[\]*+]', Operator), |
|
165 (r'/[^/]+/', Number.Float), |
|
166 (r'"', String.Double, 'string'), |
|
167 (r'\{', String.Other, 'tag'), |
|
168 (words(('import', 'public'), suffix=r'\b'), Keyword.Reserved), |
|
169 (r'grammar\b', Keyword.Reserved, 'grammar name'), |
|
170 (r'(<)(NULL|VOID)(>)', |
|
171 bygroups(Punctuation, Name.Builtin, Punctuation)), |
|
172 (r'<', Punctuation, 'rulename'), |
|
173 (r'\w+|[^\s;=|()\[\]*+/"{<\w]+', Text), |
|
174 ], |
|
175 'string': [ |
|
176 (r'"', String.Double, '#pop'), |
|
177 (r'\\.', String.Escape), |
|
178 (r'[^\\"]+', String.Double), |
|
179 ], |
|
180 'tag': [ |
|
181 (r'\}', String.Other, '#pop'), |
|
182 (r'\\.', String.Escape), |
|
183 (r'[^\\}]+', String.Other), |
|
184 ], |
|
185 'grammar name': [ |
|
186 (r';', Punctuation, '#pop'), |
|
187 (r'\s+', Text), |
|
188 (r'\.', Punctuation), |
|
189 (r'[^;\s.]+', Name.Namespace), |
|
190 ], |
|
191 'rulename': [ |
|
192 (r'>', Punctuation, '#pop'), |
|
193 (r'\*', Punctuation), |
|
194 (r'\s+', Text), |
|
195 (r'([^.>]+)(\s*)(\.)', bygroups(Name.Namespace, Text, Punctuation)), |
|
196 (r'[^.>]+', Name.Constant), |
|
197 ], |
|
198 'documentation comment': [ |
|
199 (r'\*/', Comment.Multiline, '#pop'), |
|
200 (r'(^\s*\*?\s*)(@(?:example|see)\s+)' |
|
201 r'([\w\W]*?(?=(?:^\s*\*?\s*@|\*/)))', |
|
202 bygroups(Comment.Multiline, Comment.Special, |
|
203 using(this, state='example'))), |
|
204 (r'(^\s*\*?\s*)(@\S*)', |
|
205 bygroups(Comment.Multiline, Comment.Special)), |
|
206 (r'[^*\n@]+|\w|\W', Comment.Multiline), |
|
207 ], |
|
208 'example': [ |
|
209 (r'\n\s*\*', Comment.Multiline), |
|
210 include('non-comments'), |
|
211 (r'.', Comment.Multiline), |
|
212 ], |
|
213 } |