|
1 # -*- coding: utf-8 -*- |
|
2 """ |
|
3 pygments.cmdline |
|
4 ~~~~~~~~~~~~~~~~ |
|
5 |
|
6 Command line interface. |
|
7 |
|
8 :copyright: Copyright 2006-2017 by the Pygments team, see AUTHORS. |
|
9 :license: BSD, see LICENSE for details. |
|
10 """ |
|
11 |
|
12 from __future__ import print_function |
|
13 |
|
14 import os |
|
15 import sys |
|
16 import getopt |
|
17 from textwrap import dedent |
|
18 |
|
19 from pygments import __version__, highlight |
|
20 from pygments.util import ClassNotFound, OptionError, docstring_headline, \ |
|
21 guess_decode, guess_decode_from_terminal, terminal_encoding |
|
22 from pygments.lexers import get_all_lexers, get_lexer_by_name, guess_lexer, \ |
|
23 load_lexer_from_file, get_lexer_for_filename, find_lexer_class_for_filename |
|
24 from pygments.lexers.special import TextLexer |
|
25 from pygments.formatters.latex import LatexEmbeddedLexer, LatexFormatter |
|
26 from pygments.formatters import get_all_formatters, get_formatter_by_name, \ |
|
27 load_formatter_from_file, get_formatter_for_filename, find_formatter_class |
|
28 from pygments.formatters.terminal import TerminalFormatter |
|
29 from pygments.formatters.terminal256 import Terminal256Formatter |
|
30 from pygments.filters import get_all_filters, find_filter_class |
|
31 from pygments.styles import get_all_styles, get_style_by_name |
|
32 |
|
33 |
|
34 USAGE = """\ |
|
35 Usage: %s [-l <lexer> | -g] [-F <filter>[:<options>]] [-f <formatter>] |
|
36 [-O <options>] [-P <option=value>] [-s] [-v] [-x] [-o <outfile>] [<infile>] |
|
37 |
|
38 %s -S <style> -f <formatter> [-a <arg>] [-O <options>] [-P <option=value>] |
|
39 %s -L [<which> ...] |
|
40 %s -N <filename> |
|
41 %s -H <type> <name> |
|
42 %s -h | -V |
|
43 |
|
44 Highlight the input file and write the result to <outfile>. |
|
45 |
|
46 If no input file is given, use stdin, if -o is not given, use stdout. |
|
47 |
|
48 If -s is passed, lexing will be done in "streaming" mode, reading and |
|
49 highlighting one line at a time. This will only work properly with |
|
50 lexers that have no constructs spanning multiple lines! |
|
51 |
|
52 <lexer> is a lexer name (query all lexer names with -L). If -l is not |
|
53 given, the lexer is guessed from the extension of the input file name |
|
54 (this obviously doesn't work if the input is stdin). If -g is passed, |
|
55 attempt to guess the lexer from the file contents, or pass through as |
|
56 plain text if this fails (this can work for stdin). |
|
57 |
|
58 Likewise, <formatter> is a formatter name, and will be guessed from |
|
59 the extension of the output file name. If no output file is given, |
|
60 the terminal formatter will be used by default. |
|
61 |
|
62 The additional option -x allows custom lexers and formatters to be |
|
63 loaded from a .py file relative to the current working directory. For |
|
64 example, ``-l ./customlexer.py -x``. By default, this option expects a |
|
65 file with a class named CustomLexer or CustomFormatter; you can also |
|
66 specify your own class name with a colon (``-l ./lexer.py:MyLexer``). |
|
67 Users should be very careful not to use this option with untrusted files, |
|
68 because it will import and run them. |
|
69 |
|
70 With the -O option, you can give the lexer and formatter a comma- |
|
71 separated list of options, e.g. ``-O bg=light,python=cool``. |
|
72 |
|
73 The -P option adds lexer and formatter options like the -O option, but |
|
74 you can only give one option per -P. That way, the option value may |
|
75 contain commas and equals signs, which it can't with -O, e.g. |
|
76 ``-P "heading=Pygments, the Python highlighter". |
|
77 |
|
78 With the -F option, you can add filters to the token stream, you can |
|
79 give options in the same way as for -O after a colon (note: there must |
|
80 not be spaces around the colon). |
|
81 |
|
82 The -O, -P and -F options can be given multiple times. |
|
83 |
|
84 With the -S option, print out style definitions for style <style> |
|
85 for formatter <formatter>. The argument given by -a is formatter |
|
86 dependent. |
|
87 |
|
88 The -L option lists lexers, formatters, styles or filters -- set |
|
89 `which` to the thing you want to list (e.g. "styles"), or omit it to |
|
90 list everything. |
|
91 |
|
92 The -N option guesses and prints out a lexer name based solely on |
|
93 the given filename. It does not take input or highlight anything. |
|
94 If no specific lexer can be determined "text" is returned. |
|
95 |
|
96 The -H option prints detailed help for the object <name> of type <type>, |
|
97 where <type> is one of "lexer", "formatter" or "filter". |
|
98 |
|
99 The -s option processes lines one at a time until EOF, rather than |
|
100 waiting to process the entire file. This only works for stdin, and |
|
101 is intended for streaming input such as you get from 'tail -f'. |
|
102 Example usage: "tail -f sql.log | pygmentize -s -l sql" |
|
103 |
|
104 The -v option prints a detailed traceback on unhandled exceptions, |
|
105 which is useful for debugging and bug reports. |
|
106 |
|
107 The -h option prints this help. |
|
108 The -V option prints the package version. |
|
109 """ |
|
110 |
|
111 |
|
112 def _parse_options(o_strs): |
|
113 opts = {} |
|
114 if not o_strs: |
|
115 return opts |
|
116 for o_str in o_strs: |
|
117 if not o_str.strip(): |
|
118 continue |
|
119 o_args = o_str.split(',') |
|
120 for o_arg in o_args: |
|
121 o_arg = o_arg.strip() |
|
122 try: |
|
123 o_key, o_val = o_arg.split('=', 1) |
|
124 o_key = o_key.strip() |
|
125 o_val = o_val.strip() |
|
126 except ValueError: |
|
127 opts[o_arg] = True |
|
128 else: |
|
129 opts[o_key] = o_val |
|
130 return opts |
|
131 |
|
132 |
|
133 def _parse_filters(f_strs): |
|
134 filters = [] |
|
135 if not f_strs: |
|
136 return filters |
|
137 for f_str in f_strs: |
|
138 if ':' in f_str: |
|
139 fname, fopts = f_str.split(':', 1) |
|
140 filters.append((fname, _parse_options([fopts]))) |
|
141 else: |
|
142 filters.append((f_str, {})) |
|
143 return filters |
|
144 |
|
145 |
|
146 def _print_help(what, name): |
|
147 try: |
|
148 if what == 'lexer': |
|
149 cls = get_lexer_by_name(name) |
|
150 print("Help on the %s lexer:" % cls.name) |
|
151 print(dedent(cls.__doc__)) |
|
152 elif what == 'formatter': |
|
153 cls = find_formatter_class(name) |
|
154 print("Help on the %s formatter:" % cls.name) |
|
155 print(dedent(cls.__doc__)) |
|
156 elif what == 'filter': |
|
157 cls = find_filter_class(name) |
|
158 print("Help on the %s filter:" % name) |
|
159 print(dedent(cls.__doc__)) |
|
160 return 0 |
|
161 except (AttributeError, ValueError): |
|
162 print("%s not found!" % what, file=sys.stderr) |
|
163 return 1 |
|
164 |
|
165 |
|
166 def _print_list(what): |
|
167 if what == 'lexer': |
|
168 print() |
|
169 print("Lexers:") |
|
170 print("~~~~~~~") |
|
171 |
|
172 info = [] |
|
173 for fullname, names, exts, _ in get_all_lexers(): |
|
174 tup = (', '.join(names)+':', fullname, |
|
175 exts and '(filenames ' + ', '.join(exts) + ')' or '') |
|
176 info.append(tup) |
|
177 info.sort() |
|
178 for i in info: |
|
179 print(('* %s\n %s %s') % i) |
|
180 |
|
181 elif what == 'formatter': |
|
182 print() |
|
183 print("Formatters:") |
|
184 print("~~~~~~~~~~~") |
|
185 |
|
186 info = [] |
|
187 for cls in get_all_formatters(): |
|
188 doc = docstring_headline(cls) |
|
189 tup = (', '.join(cls.aliases) + ':', doc, cls.filenames and |
|
190 '(filenames ' + ', '.join(cls.filenames) + ')' or '') |
|
191 info.append(tup) |
|
192 info.sort() |
|
193 for i in info: |
|
194 print(('* %s\n %s %s') % i) |
|
195 |
|
196 elif what == 'filter': |
|
197 print() |
|
198 print("Filters:") |
|
199 print("~~~~~~~~") |
|
200 |
|
201 for name in get_all_filters(): |
|
202 cls = find_filter_class(name) |
|
203 print("* " + name + ':') |
|
204 print(" %s" % docstring_headline(cls)) |
|
205 |
|
206 elif what == 'style': |
|
207 print() |
|
208 print("Styles:") |
|
209 print("~~~~~~~") |
|
210 |
|
211 for name in get_all_styles(): |
|
212 cls = get_style_by_name(name) |
|
213 print("* " + name + ':') |
|
214 print(" %s" % docstring_headline(cls)) |
|
215 |
|
216 |
|
217 def main_inner(popts, args, usage): |
|
218 opts = {} |
|
219 O_opts = [] |
|
220 P_opts = [] |
|
221 F_opts = [] |
|
222 for opt, arg in popts: |
|
223 if opt == '-O': |
|
224 O_opts.append(arg) |
|
225 elif opt == '-P': |
|
226 P_opts.append(arg) |
|
227 elif opt == '-F': |
|
228 F_opts.append(arg) |
|
229 opts[opt] = arg |
|
230 |
|
231 if opts.pop('-h', None) is not None: |
|
232 print(usage) |
|
233 return 0 |
|
234 |
|
235 if opts.pop('-V', None) is not None: |
|
236 print('Pygments version %s, (c) 2006-2017 by Georg Brandl.' % __version__) |
|
237 return 0 |
|
238 |
|
239 # handle ``pygmentize -L`` |
|
240 L_opt = opts.pop('-L', None) |
|
241 if L_opt is not None: |
|
242 if opts: |
|
243 print(usage, file=sys.stderr) |
|
244 return 2 |
|
245 |
|
246 # print version |
|
247 main(['', '-V']) |
|
248 if not args: |
|
249 args = ['lexer', 'formatter', 'filter', 'style'] |
|
250 for arg in args: |
|
251 _print_list(arg.rstrip('s')) |
|
252 return 0 |
|
253 |
|
254 # handle ``pygmentize -H`` |
|
255 H_opt = opts.pop('-H', None) |
|
256 if H_opt is not None: |
|
257 if opts or len(args) != 2: |
|
258 print(usage, file=sys.stderr) |
|
259 return 2 |
|
260 |
|
261 what, name = args # pylint: disable=unbalanced-tuple-unpacking |
|
262 if what not in ('lexer', 'formatter', 'filter'): |
|
263 print(usage, file=sys.stderr) |
|
264 return 2 |
|
265 |
|
266 return _print_help(what, name) |
|
267 |
|
268 # parse -O options |
|
269 parsed_opts = _parse_options(O_opts) |
|
270 opts.pop('-O', None) |
|
271 |
|
272 # parse -P options |
|
273 for p_opt in P_opts: |
|
274 try: |
|
275 name, value = p_opt.split('=', 1) |
|
276 except ValueError: |
|
277 parsed_opts[p_opt] = True |
|
278 else: |
|
279 parsed_opts[name] = value |
|
280 opts.pop('-P', None) |
|
281 |
|
282 # encodings |
|
283 inencoding = parsed_opts.get('inencoding', parsed_opts.get('encoding')) |
|
284 outencoding = parsed_opts.get('outencoding', parsed_opts.get('encoding')) |
|
285 |
|
286 # handle ``pygmentize -N`` |
|
287 infn = opts.pop('-N', None) |
|
288 if infn is not None: |
|
289 lexer = find_lexer_class_for_filename(infn) |
|
290 if lexer is None: |
|
291 lexer = TextLexer |
|
292 |
|
293 print(lexer.aliases[0]) |
|
294 return 0 |
|
295 |
|
296 # handle ``pygmentize -S`` |
|
297 S_opt = opts.pop('-S', None) |
|
298 a_opt = opts.pop('-a', None) |
|
299 if S_opt is not None: |
|
300 f_opt = opts.pop('-f', None) |
|
301 if not f_opt: |
|
302 print(usage, file=sys.stderr) |
|
303 return 2 |
|
304 if opts or args: |
|
305 print(usage, file=sys.stderr) |
|
306 return 2 |
|
307 |
|
308 try: |
|
309 parsed_opts['style'] = S_opt |
|
310 fmter = get_formatter_by_name(f_opt, **parsed_opts) |
|
311 except ClassNotFound as err: |
|
312 print(err, file=sys.stderr) |
|
313 return 1 |
|
314 |
|
315 print(fmter.get_style_defs(a_opt or '')) |
|
316 return 0 |
|
317 |
|
318 # if no -S is given, -a is not allowed |
|
319 if a_opt is not None: |
|
320 print(usage, file=sys.stderr) |
|
321 return 2 |
|
322 |
|
323 # parse -F options |
|
324 F_opts = _parse_filters(F_opts) |
|
325 opts.pop('-F', None) |
|
326 |
|
327 allow_custom_lexer_formatter = False |
|
328 # -x: allow custom (eXternal) lexers and formatters |
|
329 if opts.pop('-x', None) is not None: |
|
330 allow_custom_lexer_formatter = True |
|
331 |
|
332 # select lexer |
|
333 lexer = None |
|
334 |
|
335 # given by name? |
|
336 lexername = opts.pop('-l', None) |
|
337 if lexername: |
|
338 # custom lexer, located relative to user's cwd |
|
339 if allow_custom_lexer_formatter and '.py' in lexername: |
|
340 try: |
|
341 if ':' in lexername: |
|
342 filename, name = lexername.rsplit(':', 1) |
|
343 lexer = load_lexer_from_file(filename, name, |
|
344 **parsed_opts) |
|
345 else: |
|
346 lexer = load_lexer_from_file(lexername, **parsed_opts) |
|
347 except ClassNotFound as err: |
|
348 print('Error:', err, file=sys.stderr) |
|
349 return 1 |
|
350 else: |
|
351 try: |
|
352 lexer = get_lexer_by_name(lexername, **parsed_opts) |
|
353 except (OptionError, ClassNotFound) as err: |
|
354 print('Error:', err, file=sys.stderr) |
|
355 return 1 |
|
356 |
|
357 # read input code |
|
358 code = None |
|
359 |
|
360 if args: |
|
361 if len(args) > 1: |
|
362 print(usage, file=sys.stderr) |
|
363 return 2 |
|
364 |
|
365 if '-s' in opts: |
|
366 print('Error: -s option not usable when input file specified', |
|
367 file=sys.stderr) |
|
368 return 2 |
|
369 |
|
370 infn = args[0] |
|
371 try: |
|
372 with open(infn, 'rb') as infp: |
|
373 code = infp.read() |
|
374 except Exception as err: |
|
375 print('Error: cannot read infile:', err, file=sys.stderr) |
|
376 return 1 |
|
377 if not inencoding: |
|
378 code, inencoding = guess_decode(code) |
|
379 |
|
380 # do we have to guess the lexer? |
|
381 if not lexer: |
|
382 try: |
|
383 lexer = get_lexer_for_filename(infn, code, **parsed_opts) |
|
384 except ClassNotFound as err: |
|
385 if '-g' in opts: |
|
386 try: |
|
387 lexer = guess_lexer(code, **parsed_opts) |
|
388 except ClassNotFound: |
|
389 lexer = TextLexer(**parsed_opts) |
|
390 else: |
|
391 print('Error:', err, file=sys.stderr) |
|
392 return 1 |
|
393 except OptionError as err: |
|
394 print('Error:', err, file=sys.stderr) |
|
395 return 1 |
|
396 |
|
397 elif '-s' not in opts: # treat stdin as full file (-s support is later) |
|
398 # read code from terminal, always in binary mode since we want to |
|
399 # decode ourselves and be tolerant with it |
|
400 if sys.version_info > (3,): |
|
401 # Python 3: we have to use .buffer to get a binary stream |
|
402 code = sys.stdin.buffer.read() |
|
403 else: |
|
404 code = sys.stdin.read() |
|
405 if not inencoding: |
|
406 code, inencoding = guess_decode_from_terminal(code, sys.stdin) |
|
407 # else the lexer will do the decoding |
|
408 if not lexer: |
|
409 try: |
|
410 lexer = guess_lexer(code, **parsed_opts) |
|
411 except ClassNotFound: |
|
412 lexer = TextLexer(**parsed_opts) |
|
413 |
|
414 else: # -s option needs a lexer with -l |
|
415 if not lexer: |
|
416 print('Error: when using -s a lexer has to be selected with -l', |
|
417 file=sys.stderr) |
|
418 return 2 |
|
419 |
|
420 # process filters |
|
421 for fname, fopts in F_opts: |
|
422 try: |
|
423 lexer.add_filter(fname, **fopts) |
|
424 except ClassNotFound as err: |
|
425 print('Error:', err, file=sys.stderr) |
|
426 return 1 |
|
427 |
|
428 # select formatter |
|
429 outfn = opts.pop('-o', None) |
|
430 fmter = opts.pop('-f', None) |
|
431 if fmter: |
|
432 # custom formatter, located relative to user's cwd |
|
433 if allow_custom_lexer_formatter and '.py' in fmter: |
|
434 try: |
|
435 if ':' in fmter: |
|
436 file, fmtername = fmter.rsplit(':', 1) |
|
437 fmter = load_formatter_from_file(file, fmtername, |
|
438 **parsed_opts) |
|
439 else: |
|
440 fmter = load_formatter_from_file(fmter, **parsed_opts) |
|
441 except ClassNotFound as err: |
|
442 print('Error:', err, file=sys.stderr) |
|
443 return 1 |
|
444 else: |
|
445 try: |
|
446 fmter = get_formatter_by_name(fmter, **parsed_opts) |
|
447 except (OptionError, ClassNotFound) as err: |
|
448 print('Error:', err, file=sys.stderr) |
|
449 return 1 |
|
450 |
|
451 if outfn: |
|
452 if not fmter: |
|
453 try: |
|
454 fmter = get_formatter_for_filename(outfn, **parsed_opts) |
|
455 except (OptionError, ClassNotFound) as err: |
|
456 print('Error:', err, file=sys.stderr) |
|
457 return 1 |
|
458 try: |
|
459 outfile = open(outfn, 'wb') |
|
460 except Exception as err: |
|
461 print('Error: cannot open outfile:', err, file=sys.stderr) |
|
462 return 1 |
|
463 else: |
|
464 if not fmter: |
|
465 if '256' in os.environ.get('TERM', ''): |
|
466 fmter = Terminal256Formatter(**parsed_opts) |
|
467 else: |
|
468 fmter = TerminalFormatter(**parsed_opts) |
|
469 if sys.version_info > (3,): |
|
470 # Python 3: we have to use .buffer to get a binary stream |
|
471 outfile = sys.stdout.buffer |
|
472 else: |
|
473 outfile = sys.stdout |
|
474 |
|
475 # determine output encoding if not explicitly selected |
|
476 if not outencoding: |
|
477 if outfn: |
|
478 # output file? use lexer encoding for now (can still be None) |
|
479 fmter.encoding = inencoding |
|
480 else: |
|
481 # else use terminal encoding |
|
482 fmter.encoding = terminal_encoding(sys.stdout) |
|
483 |
|
484 # provide coloring under Windows, if possible |
|
485 if not outfn and sys.platform in ('win32', 'cygwin') and \ |
|
486 fmter.name in ('Terminal', 'Terminal256'): # pragma: no cover |
|
487 # unfortunately colorama doesn't support binary streams on Py3 |
|
488 if sys.version_info > (3,): |
|
489 from pygments.util import UnclosingTextIOWrapper |
|
490 outfile = UnclosingTextIOWrapper(outfile, encoding=fmter.encoding) |
|
491 fmter.encoding = None |
|
492 try: |
|
493 import colorama.initialise |
|
494 except ImportError: |
|
495 pass |
|
496 else: |
|
497 outfile = colorama.initialise.wrap_stream( |
|
498 outfile, convert=None, strip=None, autoreset=False, wrap=True) |
|
499 |
|
500 # When using the LaTeX formatter and the option `escapeinside` is |
|
501 # specified, we need a special lexer which collects escaped text |
|
502 # before running the chosen language lexer. |
|
503 escapeinside = parsed_opts.get('escapeinside', '') |
|
504 if len(escapeinside) == 2 and isinstance(fmter, LatexFormatter): |
|
505 left = escapeinside[0] |
|
506 right = escapeinside[1] |
|
507 lexer = LatexEmbeddedLexer(left, right, lexer) |
|
508 |
|
509 # ... and do it! |
|
510 if '-s' not in opts: |
|
511 # process whole input as per normal... |
|
512 highlight(code, lexer, fmter, outfile) |
|
513 return 0 |
|
514 else: |
|
515 # line by line processing of stdin (eg: for 'tail -f')... |
|
516 try: |
|
517 while 1: |
|
518 if sys.version_info > (3,): |
|
519 # Python 3: we have to use .buffer to get a binary stream |
|
520 line = sys.stdin.buffer.readline() |
|
521 else: |
|
522 line = sys.stdin.readline() |
|
523 if not line: |
|
524 break |
|
525 if not inencoding: |
|
526 line = guess_decode_from_terminal(line, sys.stdin)[0] |
|
527 highlight(line, lexer, fmter, outfile) |
|
528 if hasattr(outfile, 'flush'): |
|
529 outfile.flush() |
|
530 return 0 |
|
531 except KeyboardInterrupt: # pragma: no cover |
|
532 return 0 |
|
533 |
|
534 |
|
535 def main(args=sys.argv): |
|
536 """ |
|
537 Main command line entry point. |
|
538 """ |
|
539 usage = USAGE % ((args[0],) * 6) |
|
540 |
|
541 try: |
|
542 popts, args = getopt.getopt(args[1:], "l:f:F:o:O:P:LS:a:N:vhVHgsx") |
|
543 except getopt.GetoptError: |
|
544 print(usage, file=sys.stderr) |
|
545 return 2 |
|
546 |
|
547 try: |
|
548 return main_inner(popts, args, usage) |
|
549 except Exception: |
|
550 if '-v' in dict(popts): |
|
551 print(file=sys.stderr) |
|
552 print('*' * 65, file=sys.stderr) |
|
553 print('An unhandled exception occurred while highlighting.', |
|
554 file=sys.stderr) |
|
555 print('Please report the whole traceback to the issue tracker at', |
|
556 file=sys.stderr) |
|
557 print('<https://bitbucket.org/birkenfeld/pygments-main/issues>.', |
|
558 file=sys.stderr) |
|
559 print('*' * 65, file=sys.stderr) |
|
560 print(file=sys.stderr) |
|
561 raise |
|
562 import traceback |
|
563 info = traceback.format_exception(*sys.exc_info()) |
|
564 msg = info[-1].strip() |
|
565 if len(info) >= 3: |
|
566 # extract relevant file and position info |
|
567 msg += '\n (f%s)' % info[-2].split('\n')[0].strip()[1:] |
|
568 print(file=sys.stderr) |
|
569 print('*** Error while highlighting:', file=sys.stderr) |
|
570 print(msg, file=sys.stderr) |
|
571 print('*** If this is a bug you want to report, please rerun with -v.', |
|
572 file=sys.stderr) |
|
573 return 1 |