1 # -*- coding: utf-8 -*- |
|
2 """ |
|
3 pygments.lexers.tnt |
|
4 ~~~~~~~~~~~~~~~~~~~ |
|
5 |
|
6 Lexer for Typographic Number Theory. |
|
7 |
|
8 :copyright: Copyright 2006-2021 by the Pygments team, see AUTHORS. |
|
9 :license: BSD, see LICENSE for details. |
|
10 """ |
|
11 |
|
12 import re |
|
13 |
|
14 from pygments.lexer import Lexer |
|
15 from pygments.token import Text, Comment, Operator, Keyword, Name, Number, \ |
|
16 Punctuation, Error |
|
17 |
|
18 __all__ = ['TNTLexer'] |
|
19 |
|
20 |
|
21 class TNTLexer(Lexer): |
|
22 """ |
|
23 Lexer for Typographic Number Theory, as described in the book |
|
24 Gödel, Escher, Bach, by Douglas R. Hofstadter, |
|
25 or as summarized here: |
|
26 https://github.com/Kenny2github/language-tnt/blob/master/README.md#summary-of-tnt |
|
27 |
|
28 .. versionadded:: 2.7 |
|
29 """ |
|
30 |
|
31 name = 'Typographic Number Theory' |
|
32 aliases = ['tnt'] |
|
33 filenames = ['*.tnt'] |
|
34 |
|
35 cur = [] |
|
36 |
|
37 LOGIC = set('⊃→]&∧^|∨Vv') |
|
38 OPERATORS = set('+.⋅*') |
|
39 VARIABLES = set('abcde') |
|
40 PRIMES = set("'′") |
|
41 NEGATORS = set('~!') |
|
42 QUANTIFIERS = set('AE∀∃') |
|
43 NUMBERS = set('0123456789') |
|
44 WHITESPACE = set('\t \v\n') |
|
45 |
|
46 RULES = re.compile('''(?xi) |
|
47 joining | separation | double-tilde | fantasy\\ rule |
|
48 | carry[- ]over(?:\\ of)?(?:\\ line)?\\ ([0-9]+) | detachment |
|
49 | contrapositive | De\\ Morgan | switcheroo |
|
50 | specification | generalization | interchange |
|
51 | existence | symmetry | transitivity |
|
52 | add\\ S | drop\\ S | induction |
|
53 | axiom\\ ([1-5]) | premise | push | pop |
|
54 ''') |
|
55 LINENOS = re.compile(r'(?:[0-9]+)(?:(?:, ?|,? and )(?:[0-9]+))*') |
|
56 COMMENT = re.compile(r'\[[^\n\]]+\]') |
|
57 |
|
58 def __init__(self, *args, **kwargs): |
|
59 Lexer.__init__(self, *args, **kwargs) |
|
60 self.cur = [] |
|
61 |
|
62 def whitespace(self, start, text, required=False): |
|
63 """Tokenize whitespace.""" |
|
64 end = start |
|
65 try: |
|
66 while text[end] in self.WHITESPACE: |
|
67 end += 1 |
|
68 except IndexError: |
|
69 end = len(text) |
|
70 if required: |
|
71 assert end != start |
|
72 if end != start: |
|
73 self.cur.append((start, Text, text[start:end])) |
|
74 return end |
|
75 |
|
76 def variable(self, start, text): |
|
77 """Tokenize a variable.""" |
|
78 assert text[start] in self.VARIABLES |
|
79 end = start+1 |
|
80 while text[end] in self.PRIMES: |
|
81 end += 1 |
|
82 self.cur.append((start, Name.Variable, text[start:end])) |
|
83 return end |
|
84 |
|
85 def term(self, start, text): |
|
86 """Tokenize a term.""" |
|
87 if text[start] == 'S': # S...S(...) or S...0 |
|
88 end = start+1 |
|
89 while text[end] == 'S': |
|
90 end += 1 |
|
91 self.cur.append((start, Number.Integer, text[start:end])) |
|
92 return self.term(end, text) |
|
93 if text[start] == '0': # the singleton 0 |
|
94 self.cur.append((start, Number.Integer, text[start])) |
|
95 return start+1 |
|
96 if text[start] in self.VARIABLES: # a''... |
|
97 return self.variable(start, text) |
|
98 if text[start] == '(': # (...+...) |
|
99 self.cur.append((start, Punctuation, text[start])) |
|
100 start = self.term(start+1, text) |
|
101 assert text[start] in self.OPERATORS |
|
102 self.cur.append((start, Operator, text[start])) |
|
103 start = self.term(start+1, text) |
|
104 assert text[start] == ')' |
|
105 self.cur.append((start, Punctuation, text[start])) |
|
106 return start+1 |
|
107 raise AssertionError # no matches |
|
108 |
|
109 def formula(self, start, text): |
|
110 """Tokenize a formula.""" |
|
111 if text[start] in self.NEGATORS: # ~<...> |
|
112 end = start+1 |
|
113 while text[end] in self.NEGATORS: |
|
114 end += 1 |
|
115 self.cur.append((start, Operator, text[start:end])) |
|
116 return self.formula(end, text) |
|
117 if text[start] in self.QUANTIFIERS: # Aa:<...> |
|
118 self.cur.append((start, Keyword.Declaration, text[start])) |
|
119 start = self.variable(start+1, text) |
|
120 assert text[start] == ':' |
|
121 self.cur.append((start, Punctuation, text[start])) |
|
122 return self.formula(start+1, text) |
|
123 if text[start] == '<': # <...&...> |
|
124 self.cur.append((start, Punctuation, text[start])) |
|
125 start = self.formula(start+1, text) |
|
126 assert text[start] in self.LOGIC |
|
127 self.cur.append((start, Operator, text[start])) |
|
128 start = self.formula(start+1, text) |
|
129 assert text[start] == '>' |
|
130 self.cur.append((start, Punctuation, text[start])) |
|
131 return start+1 |
|
132 # ...=... |
|
133 start = self.term(start, text) |
|
134 assert text[start] == '=' |
|
135 self.cur.append((start, Operator, text[start])) |
|
136 start = self.term(start+1, text) |
|
137 return start |
|
138 |
|
139 def rule(self, start, text): |
|
140 """Tokenize a rule.""" |
|
141 match = self.RULES.match(text, start) |
|
142 assert match is not None |
|
143 groups = sorted(match.regs[1:]) # exclude whole match |
|
144 for group in groups: |
|
145 if group[0] >= 0: # this group matched |
|
146 self.cur.append((start, Keyword, text[start:group[0]])) |
|
147 self.cur.append((group[0], Number.Integer, |
|
148 text[group[0]:group[1]])) |
|
149 if group[1] != match.end(): |
|
150 self.cur.append((group[1], Keyword, |
|
151 text[group[1]:match.end()])) |
|
152 break |
|
153 else: |
|
154 self.cur.append((start, Keyword, text[start:match.end()])) |
|
155 return match.end() |
|
156 |
|
157 def lineno(self, start, text): |
|
158 """Tokenize a line referral.""" |
|
159 end = start |
|
160 while text[end] not in self.NUMBERS: |
|
161 end += 1 |
|
162 self.cur.append((start, Punctuation, text[start])) |
|
163 self.cur.append((start+1, Text, text[start+1:end])) |
|
164 start = end |
|
165 match = self.LINENOS.match(text, start) |
|
166 assert match is not None |
|
167 assert text[match.end()] == ')' |
|
168 self.cur.append((match.start(), Number.Integer, match.group(0))) |
|
169 self.cur.append((match.end(), Punctuation, text[match.end()])) |
|
170 return match.end() + 1 |
|
171 |
|
172 def error_till_line_end(self, start, text): |
|
173 """Mark everything from ``start`` to the end of the line as Error.""" |
|
174 end = start |
|
175 try: |
|
176 while text[end] != '\n': # there's whitespace in rules |
|
177 end += 1 |
|
178 except IndexError: |
|
179 end = len(text) |
|
180 if end != start: |
|
181 self.cur.append((start, Error, text[start:end])) |
|
182 end = self.whitespace(end, text) |
|
183 return end |
|
184 |
|
185 def get_tokens_unprocessed(self, text): |
|
186 """Returns a list of TNT tokens.""" |
|
187 self.cur = [] |
|
188 start = end = self.whitespace(0, text) |
|
189 while start <= end < len(text): |
|
190 try: |
|
191 # try line number |
|
192 while text[end] in self.NUMBERS: |
|
193 end += 1 |
|
194 if end != start: # actual number present |
|
195 self.cur.append((start, Number.Integer, text[start:end])) |
|
196 # whitespace is required after a line number |
|
197 orig = len(self.cur) |
|
198 try: |
|
199 start = end = self.whitespace(end, text, True) |
|
200 except AssertionError: |
|
201 del self.cur[orig:] |
|
202 start = end = self.error_till_line_end(end, text) |
|
203 continue |
|
204 # at this point it could be a comment |
|
205 match = self.COMMENT.match(text, start) |
|
206 if match is not None: |
|
207 self.cur.append((start, Comment, text[start:match.end()])) |
|
208 start = end = match.end() |
|
209 # anything after the closing bracket is invalid |
|
210 start = end = self.error_till_line_end(start, text) |
|
211 # do not attempt to process the rest |
|
212 continue |
|
213 del match |
|
214 if text[start] in '[]': # fantasy push or pop |
|
215 self.cur.append((start, Keyword, text[start])) |
|
216 start += 1 |
|
217 end += 1 |
|
218 else: |
|
219 # one formula, possibly containing subformulae |
|
220 orig = len(self.cur) |
|
221 try: |
|
222 start = end = self.formula(start, text) |
|
223 except AssertionError: # not well-formed |
|
224 del self.cur[orig:] |
|
225 while text[end] not in self.WHITESPACE: |
|
226 end += 1 |
|
227 self.cur.append((start, Error, text[start:end])) |
|
228 start = end |
|
229 # skip whitespace after formula |
|
230 orig = len(self.cur) |
|
231 try: |
|
232 start = end = self.whitespace(end, text, True) |
|
233 except AssertionError: |
|
234 del self.cur[orig:] |
|
235 start = end = self.error_till_line_end(start, text) |
|
236 continue |
|
237 # rule proving this formula a theorem |
|
238 orig = len(self.cur) |
|
239 try: |
|
240 start = end = self.rule(start, text) |
|
241 except AssertionError: |
|
242 del self.cur[orig:] |
|
243 start = end = self.error_till_line_end(start, text) |
|
244 continue |
|
245 # skip whitespace after rule |
|
246 start = end = self.whitespace(end, text) |
|
247 # line marker |
|
248 if text[start] == '(': |
|
249 orig = len(self.cur) |
|
250 try: |
|
251 start = end = self.lineno(start, text) |
|
252 except AssertionError: |
|
253 del self.cur[orig:] |
|
254 start = end = self.error_till_line_end(start, text) |
|
255 continue |
|
256 start = end = self.whitespace(start, text) |
|
257 except IndexError: |
|
258 try: |
|
259 del self.cur[orig:] |
|
260 except NameError: |
|
261 pass # if orig was never defined, fine |
|
262 self.error_till_line_end(start, text) |
|
263 return self.cur |
|