|
1 # -*- coding: utf-8 -*- |
|
2 """ |
|
3 pygments.lexers.grammar_notation |
|
4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|
5 |
|
6 Lexers for grammer notations like BNF. |
|
7 |
|
8 :copyright: Copyright 2006-2015 by the Pygments team, see AUTHORS. |
|
9 :license: BSD, see LICENSE for details. |
|
10 """ |
|
11 |
|
12 from pygments.lexer import RegexLexer, bygroups, words |
|
13 from pygments.token import Punctuation, Text, Comment, Operator, \ |
|
14 Keyword, Name, Literal |
|
15 |
|
16 __all__ = ['BnfLexer', 'AbnfLexer'] |
|
17 |
|
18 |
|
19 class BnfLexer(RegexLexer): |
|
20 """ |
|
21 This lexer is for grammer notations which are similar to |
|
22 original BNF. |
|
23 |
|
24 In order to maximize a number of targets of this lexer, |
|
25 let's decide some designs: |
|
26 |
|
27 * We don't distinguish `Terminal Symbol`. |
|
28 |
|
29 * We do assume that `NonTerminal Symbol` are always enclosed |
|
30 with arrow brackets. |
|
31 |
|
32 * We do assume that `NonTerminal Symbol` may include |
|
33 any printable characters except arrow brackets and ASCII 0x20. |
|
34 This assumption is for `RBNF <http://www.rfc-base.org/txt/rfc-5511.txt>`_. |
|
35 |
|
36 * We do assume that target notation doesn't support comment. |
|
37 |
|
38 * We don't distinguish any operators and punctuation except |
|
39 `::=`. |
|
40 |
|
41 Though these desision making might cause too minimal highlighting |
|
42 and you might be disappointed, but it is reasonable for us. |
|
43 |
|
44 .. versionadded:: 2.1 |
|
45 """ |
|
46 |
|
47 name = 'BNF' |
|
48 aliases = ['bnf'] |
|
49 filenames = ['*.bnf'] |
|
50 mimetypes = ['text/x-bnf'] |
|
51 |
|
52 tokens = { |
|
53 'root': [ |
|
54 (r'(<)([ -;=?-~]+)(>)', |
|
55 bygroups(Punctuation, Name.Class, Punctuation)), |
|
56 |
|
57 # an only operator |
|
58 (r'::=', Operator), |
|
59 |
|
60 # fallback |
|
61 (r'[^<>:]+', Text), # for performance |
|
62 (r'.', Text), |
|
63 ], |
|
64 } |
|
65 |
|
66 |
|
67 class AbnfLexer(RegexLexer): |
|
68 """ |
|
69 Lexer for `IETF 7405 ABNF |
|
70 <http://www.ietf.org/rfc/rfc7405.txt>`_ |
|
71 (Updates `5234 <http://www.ietf.org/rfc/rfc5234.txt>`_) |
|
72 grammars. |
|
73 |
|
74 .. versionadded:: 2.1 |
|
75 """ |
|
76 |
|
77 name = 'ABNF' |
|
78 aliases = ['abnf'] |
|
79 filenames = ['*.abnf'] |
|
80 mimetypes = ['text/x-abnf'] |
|
81 |
|
82 _core_rules = ( |
|
83 'ALPHA', 'BIT', 'CHAR', 'CR', 'CRLF', 'CTL', 'DIGIT', |
|
84 'DQUOTE', 'HEXDIG', 'HTAB', 'LF', 'LWSP', 'OCTET', |
|
85 'SP', 'VCHAR', 'WSP') |
|
86 |
|
87 tokens = { |
|
88 'root': [ |
|
89 # comment |
|
90 (r';.*$', Comment.Single), |
|
91 |
|
92 # quoted |
|
93 # double quote itself in this state, it is as '%x22'. |
|
94 (r'(%[si])?"[^"]*"', Literal), |
|
95 |
|
96 # binary (but i have never seen...) |
|
97 (r'%b[01]+\-[01]+\b', Literal), # range |
|
98 (r'%b[01]+(\.[01]+)*\b', Literal), # concat |
|
99 |
|
100 # decimal |
|
101 (r'%d[0-9]+\-[0-9]+\b', Literal), # range |
|
102 (r'%d[0-9]+(\.[0-9]+)*\b', Literal), # concat |
|
103 |
|
104 # hexadecimal |
|
105 (r'%x[0-9a-fA-F]+\-[0-9a-fA-F]+\b', Literal), # range |
|
106 (r'%x[0-9a-fA-F]+(\.[0-9a-fA-F]+)*\b', Literal), # concat |
|
107 |
|
108 # repetition (<a>*<b>element) including nRule |
|
109 (r'\b[0-9]+\*[0-9]+', Operator), |
|
110 (r'\b[0-9]+\*', Operator), |
|
111 (r'\b[0-9]+', Operator), |
|
112 (r'\*', Operator), |
|
113 |
|
114 # Strictly speaking, these are not keyword but |
|
115 # are called `Core Rule'. |
|
116 (words(_core_rules, suffix=r'\b'), Keyword), |
|
117 |
|
118 # nonterminals (ALPHA *(ALPHA / DIGIT / "-")) |
|
119 (r'[a-zA-Z][a-zA-Z0-9-]+\b', Name.Class), |
|
120 |
|
121 # operators |
|
122 (r'(=/|=|/)', Operator), |
|
123 |
|
124 # punctuation |
|
125 (r'[\[\]()]', Punctuation), |
|
126 |
|
127 # fallback |
|
128 (r'\s+', Text), |
|
129 (r'.', Text), |
|
130 ], |
|
131 } |