|
1 # -*- coding: utf-8 -*- |
|
2 """ |
|
3 pygments.lexers.tnt |
|
4 ~~~~~~~~~~~~~~~~~~~ |
|
5 |
|
6 Lexer for Typographic Number Theory. |
|
7 |
|
8 :copyright: Copyright 2006-2020 by the Pygments team, see AUTHORS. |
|
9 :license: BSD, see LICENSE for details. |
|
10 """ |
|
11 |
|
12 import re |
|
13 |
|
14 from pygments.lexer import Lexer |
|
15 from pygments.token import Text, Comment, Operator, Keyword, Name, Number, \ |
|
16 Punctuation, Error |
|
17 |
|
18 __all__ = ['TNTLexer'] |
|
19 |
|
20 |
|
21 class TNTLexer(Lexer): |
|
22 """ |
|
23 Lexer for Typographic Number Theory, as described in the book |
|
24 Gödel, Escher, Bach, by Douglas R. Hofstadter, |
|
25 or as summarized here: |
|
26 https://github.com/Kenny2github/language-tnt/blob/master/README.md#summary-of-tnt |
|
27 |
|
28 .. versionadded:: 2.7 |
|
29 """ |
|
30 |
|
31 name = 'Typographic Number Theory' |
|
32 aliases = ['tnt'] |
|
33 filenames = ['*.tnt'] |
|
34 |
|
35 cur = [] |
|
36 |
|
37 LOGIC = set('⊃→]&∧^|∨Vv') |
|
38 OPERATORS = set('+.⋅*') |
|
39 VARIABLES = set('abcde') |
|
40 PRIMES = set("'′") |
|
41 NEGATORS = set('~!') |
|
42 QUANTIFIERS = set('AE∀∃') |
|
43 NUMBERS = set('0123456789') |
|
44 WHITESPACE = set('\t \v\n') |
|
45 |
|
46 RULES = re.compile('''(?xi) |
|
47 joining | separation | double-tilde | fantasy\\ rule |
|
48 | carry[- ]over(?:\\ of)?(?:\\ line)?\\ ([0-9]+) | detachment |
|
49 | contrapositive | De\\ Morgan | switcheroo |
|
50 | specification | generalization | interchange |
|
51 | existence | symmetry | transitivity |
|
52 | add\\ S | drop\\ S | induction |
|
53 | axiom\\ ([1-5]) | premise | push | pop |
|
54 ''') |
|
55 LINENOS = re.compile(r'(?:[0-9]+)(?:(?:, ?|,? and )(?:[0-9]+))*') |
|
56 COMMENT = re.compile(r'\[[^\n\]]+\]') |
|
57 |
|
58 def whitespace(self, start, text, required=False): |
|
59 """Tokenize whitespace.""" |
|
60 end = start |
|
61 try: |
|
62 while text[end] in self.WHITESPACE: |
|
63 end += 1 |
|
64 except IndexError: |
|
65 end = len(text) |
|
66 if required: |
|
67 assert end != start |
|
68 if end != start: |
|
69 self.cur.append((start, Text, text[start:end])) |
|
70 return end |
|
71 |
|
72 def variable(self, start, text): |
|
73 """Tokenize a variable.""" |
|
74 assert text[start] in self.VARIABLES |
|
75 end = start+1 |
|
76 while text[end] in self.PRIMES: |
|
77 end += 1 |
|
78 self.cur.append((start, Name.Variable, text[start:end])) |
|
79 return end |
|
80 |
|
81 def term(self, start, text): |
|
82 """Tokenize a term.""" |
|
83 if text[start] == 'S': # S...S(...) or S...0 |
|
84 end = start+1 |
|
85 while text[end] == 'S': |
|
86 end += 1 |
|
87 self.cur.append((start, Number.Integer, text[start:end])) |
|
88 return self.term(end, text) |
|
89 if text[start] == '0': # the singleton 0 |
|
90 self.cur.append((start, Number.Integer, text[start])) |
|
91 return start+1 |
|
92 if text[start] in self.VARIABLES: # a''... |
|
93 return self.variable(start, text) |
|
94 if text[start] == '(': # (...+...) |
|
95 self.cur.append((start, Punctuation, text[start])) |
|
96 start = self.term(start+1, text) |
|
97 assert text[start] in self.OPERATORS |
|
98 self.cur.append((start, Operator, text[start])) |
|
99 start = self.term(start+1, text) |
|
100 assert text[start] == ')' |
|
101 self.cur.append((start, Punctuation, text[start])) |
|
102 return start+1 |
|
103 raise AssertionError # no matches |
|
104 |
|
105 def formula(self, start, text): |
|
106 """Tokenize a formula.""" |
|
107 if text[start] in '[]': # fantasy push or pop |
|
108 self.cur.append((start, Keyword, text[start])) |
|
109 return start+1 |
|
110 if text[start] in self.NEGATORS: # ~<...> |
|
111 end = start+1 |
|
112 while text[end] in self.NEGATORS: |
|
113 end += 1 |
|
114 self.cur.append((start, Operator, text[start:end])) |
|
115 return self.formula(end, text) |
|
116 if text[start] in self.QUANTIFIERS: # Aa:<...> |
|
117 self.cur.append((start, Keyword.Declaration, text[start])) |
|
118 start = self.variable(start+1, text) |
|
119 assert text[start] == ':' |
|
120 self.cur.append((start, Punctuation, text[start])) |
|
121 return self.formula(start+1, text) |
|
122 if text[start] == '<': # <...&...> |
|
123 self.cur.append((start, Punctuation, text[start])) |
|
124 start = self.formula(start+1, text) |
|
125 assert text[start] in self.LOGIC |
|
126 self.cur.append((start, Operator, text[start])) |
|
127 start = self.formula(start+1, text) |
|
128 assert text[start] == '>' |
|
129 self.cur.append((start, Punctuation, text[start])) |
|
130 return start+1 |
|
131 # ...=... |
|
132 start = self.term(start, text) |
|
133 assert text[start] == '=' |
|
134 self.cur.append((start, Operator, text[start])) |
|
135 start = self.term(start+1, text) |
|
136 return start |
|
137 |
|
138 def rule(self, start, text): |
|
139 """Tokenize a rule.""" |
|
140 match = self.RULES.match(text, start) |
|
141 assert match is not None |
|
142 groups = sorted(match.regs[1:]) # exclude whole match |
|
143 for group in groups: |
|
144 if group[0] >= 0: # this group matched |
|
145 self.cur.append((start, Keyword, text[start:group[0]])) |
|
146 self.cur.append((group[0], Number.Integer, |
|
147 text[group[0]:group[1]])) |
|
148 if group[1] != match.end(): |
|
149 self.cur.append((group[1], Keyword, |
|
150 text[group[1]:match.end()])) |
|
151 break |
|
152 else: |
|
153 self.cur.append((start, Keyword, text[start:match.end()])) |
|
154 return match.end() |
|
155 |
|
156 def lineno(self, start, text): |
|
157 """Tokenize a line marker.""" |
|
158 end = start |
|
159 while text[end] not in self.NUMBERS: |
|
160 end += 1 |
|
161 self.cur.append((start, Punctuation, text[start])) |
|
162 self.cur.append((start+1, Text, text[start+1:end])) |
|
163 start = end |
|
164 match = self.LINENOS.match(text, start) |
|
165 assert match is not None |
|
166 assert text[match.end()] == ')' |
|
167 self.cur.append((match.start(), Number.Integer, match.group(0))) |
|
168 self.cur.append((match.end(), Punctuation, text[match.end()])) |
|
169 return match.end() + 1 |
|
170 |
|
171 def error_till_line_end(self, start, text): |
|
172 """Mark everything from ``start`` to the end of the line as Error.""" |
|
173 end = start |
|
174 try: |
|
175 while text[end] != '\n': # there's whitespace in rules |
|
176 end += 1 |
|
177 except IndexError: |
|
178 end = len(text) |
|
179 if end != start: |
|
180 self.cur.append((start, Error, text[start:end])) |
|
181 end = self.whitespace(end, text) |
|
182 return end |
|
183 |
|
184 def get_tokens_unprocessed(self, text): |
|
185 """Returns a list of TNT tokens.""" |
|
186 self.cur = [] |
|
187 start = end = self.whitespace(0, text) |
|
188 while start <= end < len(text): |
|
189 # try line number |
|
190 while text[end] in self.NUMBERS: |
|
191 end += 1 |
|
192 if end != start: # actual number present |
|
193 self.cur.append((start, Number.Integer, text[start:end])) |
|
194 # whitespace is required after a line number |
|
195 orig = len(self.cur) |
|
196 try: |
|
197 start = end = self.whitespace(end, text, True) |
|
198 except AssertionError: |
|
199 del self.cur[orig:] |
|
200 start = end = self.error_till_line_end(end, text) |
|
201 continue |
|
202 # at this point it could be a comment |
|
203 match = self.COMMENT.match(text, start) |
|
204 if match is not None: |
|
205 self.cur.append((start, Comment, text[start:match.end()])) |
|
206 start = end = match.end() |
|
207 # anything after the closing bracket is invalid |
|
208 start = end = self.error_till_line_end(start, text) |
|
209 # do not attempt to process the rest |
|
210 continue |
|
211 del match |
|
212 # one formula, possibly containing subformulae |
|
213 orig = len(self.cur) |
|
214 try: |
|
215 start = end = self.formula(start, text) |
|
216 except AssertionError: # not well-formed |
|
217 del self.cur[orig:] |
|
218 while text[end] not in self.WHITESPACE: |
|
219 end += 1 |
|
220 self.cur.append((start, Error, text[start:end])) |
|
221 start = end |
|
222 # skip whitespace after formula |
|
223 orig = len(self.cur) |
|
224 try: |
|
225 start = end = self.whitespace(end, text, True) |
|
226 except AssertionError: |
|
227 del self.cur[orig:] |
|
228 start = end = self.error_till_line_end(start, text) |
|
229 continue |
|
230 # rule proving this formula a theorem |
|
231 orig = len(self.cur) |
|
232 try: |
|
233 start = end = self.rule(start, text) |
|
234 except AssertionError: |
|
235 del self.cur[orig:] |
|
236 start = end = self.error_till_line_end(start, text) |
|
237 continue |
|
238 # skip whitespace after rule |
|
239 start = end = self.whitespace(end, text) |
|
240 # line marker |
|
241 if text[start] == '(': |
|
242 orig = len(self.cur) |
|
243 try: |
|
244 start = end = self.lineno(start, text) |
|
245 except AssertionError: |
|
246 del self.cur[orig:] |
|
247 start = end = self.error_till_line_end(start, text) |
|
248 continue |
|
249 start = end = self.whitespace(start, text) |
|
250 return self.cur |