eric6/ThirdParty/Pygments/pygments/lexers/textfmts.py

changeset 6942
2602857055c5
parent 6651
e8f3b5568b21
child 7547
21b0534faebc
equal deleted inserted replaced
6941:f99d60d6b59b 6942:2602857055c5
1 # -*- coding: utf-8 -*-
2 """
3 pygments.lexers.textfmts
4 ~~~~~~~~~~~~~~~~~~~~~~~~
5
6 Lexers for various text formats.
7
8 :copyright: Copyright 2006-2017 by the Pygments team, see AUTHORS.
9 :license: BSD, see LICENSE for details.
10 """
11
12 import re
13
14 from pygments.lexer import RegexLexer, bygroups
15 from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
16 Number, Generic, Literal
17 from pygments.util import ClassNotFound
18
19 __all__ = ['IrcLogsLexer', 'TodotxtLexer', 'HttpLexer', 'GettextLexer']
20
21
22 class IrcLogsLexer(RegexLexer):
23 """
24 Lexer for IRC logs in *irssi*, *xchat* or *weechat* style.
25 """
26
27 name = 'IRC logs'
28 aliases = ['irc']
29 filenames = ['*.weechatlog']
30 mimetypes = ['text/x-irclog']
31
32 flags = re.VERBOSE | re.MULTILINE
33 timestamp = r"""
34 (
35 # irssi / xchat and others
36 (?: \[|\()? # Opening bracket or paren for the timestamp
37 (?: # Timestamp
38 (?: (?:\d{1,4} [-/])* # Date as - or /-separated groups of digits
39 (?:\d{1,4})
40 [T ])? # Date/time separator: T or space
41 (?: \d?\d [:.])* # Time as :/.-separated groups of 1 or 2 digits
42 (?: \d?\d)
43 )
44 (?: \]|\))?\s+ # Closing bracket or paren for the timestamp
45 |
46 # weechat
47 \d{4}\s\w{3}\s\d{2}\s # Date
48 \d{2}:\d{2}:\d{2}\s+ # Time + Whitespace
49 |
50 # xchat
51 \w{3}\s\d{2}\s # Date
52 \d{2}:\d{2}:\d{2}\s+ # Time + Whitespace
53 )?
54 """
55 tokens = {
56 'root': [
57 # log start/end
58 (r'^\*\*\*\*(.*)\*\*\*\*$', Comment),
59 # hack
60 ("^" + timestamp + r'(\s*<[^>]*>\s*)$', bygroups(Comment.Preproc, Name.Tag)),
61 # normal msgs
62 ("^" + timestamp + r"""
63 (\s*<.*?>\s*) # Nick """,
64 bygroups(Comment.Preproc, Name.Tag), 'msg'),
65 # /me msgs
66 ("^" + timestamp + r"""
67 (\s*[*]\s+) # Star
68 (\S+\s+.*?\n) # Nick + rest of message """,
69 bygroups(Comment.Preproc, Keyword, Generic.Inserted)),
70 # join/part msgs
71 ("^" + timestamp + r"""
72 (\s*(?:\*{3}|<?-[!@=P]?->?)\s*) # Star(s) or symbols
73 (\S+\s+) # Nick + Space
74 (.*?\n) # Rest of message """,
75 bygroups(Comment.Preproc, Keyword, String, Comment)),
76 (r"^.*?\n", Text),
77 ],
78 'msg': [
79 (r"\S+:(?!//)", Name.Attribute), # Prefix
80 (r".*\n", Text, '#pop'),
81 ],
82 }
83
84
85 class GettextLexer(RegexLexer):
86 """
87 Lexer for Gettext catalog files.
88
89 .. versionadded:: 0.9
90 """
91 name = 'Gettext Catalog'
92 aliases = ['pot', 'po']
93 filenames = ['*.pot', '*.po']
94 mimetypes = ['application/x-gettext', 'text/x-gettext', 'text/gettext']
95
96 tokens = {
97 'root': [
98 (r'^#,\s.*?$', Keyword.Type),
99 (r'^#:\s.*?$', Keyword.Declaration),
100 # (r'^#$', Comment),
101 (r'^(#|#\.\s|#\|\s|#~\s|#\s).*$', Comment.Single),
102 (r'^(")([A-Za-z-]+:)(.*")$',
103 bygroups(String, Name.Property, String)),
104 (r'^".*"$', String),
105 (r'^(msgid|msgid_plural|msgstr|msgctxt)(\s+)(".*")$',
106 bygroups(Name.Variable, Text, String)),
107 (r'^(msgstr\[)(\d)(\])(\s+)(".*")$',
108 bygroups(Name.Variable, Number.Integer, Name.Variable, Text, String)),
109 ]
110 }
111
112
113 class HttpLexer(RegexLexer):
114 """
115 Lexer for HTTP sessions.
116
117 .. versionadded:: 1.5
118 """
119
120 name = 'HTTP'
121 aliases = ['http']
122
123 flags = re.DOTALL
124
125 def get_tokens_unprocessed(self, text, stack=('root',)):
126 """Reset the content-type state."""
127 self.content_type = None
128 return RegexLexer.get_tokens_unprocessed(self, text, stack)
129
130 def header_callback(self, match):
131 if match.group(1).lower() == 'content-type':
132 content_type = match.group(5).strip()
133 if ';' in content_type:
134 content_type = content_type[:content_type.find(';')].strip()
135 self.content_type = content_type
136 yield match.start(1), Name.Attribute, match.group(1)
137 yield match.start(2), Text, match.group(2)
138 yield match.start(3), Operator, match.group(3)
139 yield match.start(4), Text, match.group(4)
140 yield match.start(5), Literal, match.group(5)
141 yield match.start(6), Text, match.group(6)
142
143 def continuous_header_callback(self, match):
144 yield match.start(1), Text, match.group(1)
145 yield match.start(2), Literal, match.group(2)
146 yield match.start(3), Text, match.group(3)
147
148 def content_callback(self, match):
149 content_type = getattr(self, 'content_type', None)
150 content = match.group()
151 offset = match.start()
152 if content_type:
153 from pygments.lexers import get_lexer_for_mimetype
154 possible_lexer_mimetypes = [content_type]
155 if '+' in content_type:
156 # application/calendar+xml can be treated as application/xml
157 # if there's not a better match.
158 general_type = re.sub(r'^(.*)/.*\+(.*)$', r'\1/\2',
159 content_type)
160 possible_lexer_mimetypes.append(general_type)
161
162 for i in possible_lexer_mimetypes:
163 try:
164 lexer = get_lexer_for_mimetype(i)
165 except ClassNotFound:
166 pass
167 else:
168 for idx, token, value in lexer.get_tokens_unprocessed(content):
169 yield offset + idx, token, value
170 return
171 yield offset, Text, content
172
173 tokens = {
174 'root': [
175 (r'(GET|POST|PUT|DELETE|HEAD|OPTIONS|TRACE|PATCH)( +)([^ ]+)( +)'
176 r'(HTTP)(/)(1\.[01])(\r?\n|\Z)',
177 bygroups(Name.Function, Text, Name.Namespace, Text,
178 Keyword.Reserved, Operator, Number, Text),
179 'headers'),
180 (r'(HTTP)(/)(1\.[01])( +)(\d{3})( +)([^\r\n]+)(\r?\n|\Z)',
181 bygroups(Keyword.Reserved, Operator, Number, Text, Number,
182 Text, Name.Exception, Text),
183 'headers'),
184 ],
185 'headers': [
186 (r'([^\s:]+)( *)(:)( *)([^\r\n]+)(\r?\n|\Z)', header_callback),
187 (r'([\t ]+)([^\r\n]+)(\r?\n|\Z)', continuous_header_callback),
188 (r'\r?\n', Text, 'content')
189 ],
190 'content': [
191 (r'.+', content_callback)
192 ]
193 }
194
195 def analyse_text(text):
196 return text.startswith(('GET /', 'POST /', 'PUT /', 'DELETE /', 'HEAD /',
197 'OPTIONS /', 'TRACE /', 'PATCH /'))
198
199
200 class TodotxtLexer(RegexLexer):
201 """
202 Lexer for `Todo.txt <http://todotxt.com/>`_ todo list format.
203
204 .. versionadded:: 2.0
205 """
206
207 name = 'Todotxt'
208 aliases = ['todotxt']
209 # *.todotxt is not a standard extension for Todo.txt files; including it
210 # makes testing easier, and also makes autodetecting file type easier.
211 filenames = ['todo.txt', '*.todotxt']
212 mimetypes = ['text/x-todo']
213
214 # Aliases mapping standard token types of Todo.txt format concepts
215 CompleteTaskText = Operator # Chosen to de-emphasize complete tasks
216 IncompleteTaskText = Text # Incomplete tasks should look like plain text
217
218 # Priority should have most emphasis to indicate importance of tasks
219 Priority = Generic.Heading
220 # Dates should have next most emphasis because time is important
221 Date = Generic.Subheading
222
223 # Project and context should have equal weight, and be in different colors
224 Project = Generic.Error
225 Context = String
226
227 # If tag functionality is added, it should have the same weight as Project
228 # and Context, and a different color. Generic.Traceback would work well.
229
230 # Regex patterns for building up rules; dates, priorities, projects, and
231 # contexts are all atomic
232 # TODO: Make date regex more ISO 8601 compliant
233 date_regex = r'\d{4,}-\d{2}-\d{2}'
234 priority_regex = r'\([A-Z]\)'
235 project_regex = r'\+\S+'
236 context_regex = r'@\S+'
237
238 # Compound regex expressions
239 complete_one_date_regex = r'(x )(' + date_regex + r')'
240 complete_two_date_regex = (complete_one_date_regex + r'( )(' +
241 date_regex + r')')
242 priority_date_regex = r'(' + priority_regex + r')( )(' + date_regex + r')'
243
244 tokens = {
245 # Should parse starting at beginning of line; each line is a task
246 'root': [
247 # Complete task entry points: two total:
248 # 1. Complete task with two dates
249 (complete_two_date_regex, bygroups(CompleteTaskText, Date,
250 CompleteTaskText, Date),
251 'complete'),
252 # 2. Complete task with one date
253 (complete_one_date_regex, bygroups(CompleteTaskText, Date),
254 'complete'),
255
256 # Incomplete task entry points: six total:
257 # 1. Priority plus date
258 (priority_date_regex, bygroups(Priority, IncompleteTaskText, Date),
259 'incomplete'),
260 # 2. Priority only
261 (priority_regex, Priority, 'incomplete'),
262 # 3. Leading date
263 (date_regex, Date, 'incomplete'),
264 # 4. Leading context
265 (context_regex, Context, 'incomplete'),
266 # 5. Leading project
267 (project_regex, Project, 'incomplete'),
268 # 6. Non-whitespace catch-all
269 (r'\S+', IncompleteTaskText, 'incomplete'),
270 ],
271
272 # Parse a complete task
273 'complete': [
274 # Newline indicates end of task, should return to root
275 (r'\s*\n', CompleteTaskText, '#pop'),
276 # Tokenize contexts and projects
277 (context_regex, Context),
278 (project_regex, Project),
279 # Tokenize non-whitespace text
280 (r'\S+', CompleteTaskText),
281 # Tokenize whitespace not containing a newline
282 (r'\s+', CompleteTaskText),
283 ],
284
285 # Parse an incomplete task
286 'incomplete': [
287 # Newline indicates end of task, should return to root
288 (r'\s*\n', IncompleteTaskText, '#pop'),
289 # Tokenize contexts and projects
290 (context_regex, Context),
291 (project_regex, Project),
292 # Tokenize non-whitespace text
293 (r'\S+', IncompleteTaskText),
294 # Tokenize whitespace not containing a newline
295 (r'\s+', IncompleteTaskText),
296 ],
297 }

eric ide

mercurial