|
1 # -*- coding: utf-8 -*- |
|
2 """ |
|
3 pygments.lexers.textedit |
|
4 ~~~~~~~~~~~~~~~~~~~~~~~~ |
|
5 |
|
6 Lexers for languages related to text processing. |
|
7 |
|
8 :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS. |
|
9 :license: BSD, see LICENSE for details. |
|
10 """ |
|
11 |
|
12 import re |
|
13 from bisect import bisect |
|
14 |
|
15 from pygments.lexer import RegexLexer, include, default, bygroups, using, this |
|
16 from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ |
|
17 Number, Punctuation |
|
18 |
|
19 from pygments.lexers.python import PythonLexer |
|
20 |
|
21 __all__ = ['AwkLexer', 'VimLexer'] |
|
22 |
|
23 |
|
24 class AwkLexer(RegexLexer): |
|
25 """ |
|
26 For Awk scripts. |
|
27 |
|
28 .. versionadded:: 1.5 |
|
29 """ |
|
30 |
|
31 name = 'Awk' |
|
32 aliases = ['awk', 'gawk', 'mawk', 'nawk'] |
|
33 filenames = ['*.awk'] |
|
34 mimetypes = ['application/x-awk'] |
|
35 |
|
36 tokens = { |
|
37 'commentsandwhitespace': [ |
|
38 (r'\s+', Text), |
|
39 (r'#.*$', Comment.Single) |
|
40 ], |
|
41 'slashstartsregex': [ |
|
42 include('commentsandwhitespace'), |
|
43 (r'/(\\.|[^[/\\\n]|\[(\\.|[^\]\\\n])*])+/' |
|
44 r'\B', String.Regex, '#pop'), |
|
45 (r'(?=/)', Text, ('#pop', 'badregex')), |
|
46 default('#pop') |
|
47 ], |
|
48 'badregex': [ |
|
49 (r'\n', Text, '#pop') |
|
50 ], |
|
51 'root': [ |
|
52 (r'^(?=\s|/)', Text, 'slashstartsregex'), |
|
53 include('commentsandwhitespace'), |
|
54 (r'\+\+|--|\|\||&&|in\b|\$|!?~|' |
|
55 r'(\*\*|[-<>+*%\^/!=|])=?', Operator, 'slashstartsregex'), |
|
56 (r'[{(\[;,]', Punctuation, 'slashstartsregex'), |
|
57 (r'[})\].]', Punctuation), |
|
58 (r'(break|continue|do|while|exit|for|if|else|' |
|
59 r'return)\b', Keyword, 'slashstartsregex'), |
|
60 (r'function\b', Keyword.Declaration, 'slashstartsregex'), |
|
61 (r'(atan2|cos|exp|int|log|rand|sin|sqrt|srand|gensub|gsub|index|' |
|
62 r'length|match|split|sprintf|sub|substr|tolower|toupper|close|' |
|
63 r'fflush|getline|next|nextfile|print|printf|strftime|systime|' |
|
64 r'delete|system)\b', Keyword.Reserved), |
|
65 (r'(ARGC|ARGIND|ARGV|BEGIN|CONVFMT|ENVIRON|END|ERRNO|FIELDWIDTHS|' |
|
66 r'FILENAME|FNR|FS|IGNORECASE|NF|NR|OFMT|OFS|ORFS|RLENGTH|RS|' |
|
67 r'RSTART|RT|SUBSEP)\b', Name.Builtin), |
|
68 (r'[$a-zA-Z_]\w*', Name.Other), |
|
69 (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float), |
|
70 (r'0x[0-9a-fA-F]+', Number.Hex), |
|
71 (r'[0-9]+', Number.Integer), |
|
72 (r'"(\\\\|\\"|[^"])*"', String.Double), |
|
73 (r"'(\\\\|\\'|[^'])*'", String.Single), |
|
74 ] |
|
75 } |
|
76 |
|
77 |
|
78 class VimLexer(RegexLexer): |
|
79 """ |
|
80 Lexer for VimL script files. |
|
81 |
|
82 .. versionadded:: 0.8 |
|
83 """ |
|
84 name = 'VimL' |
|
85 aliases = ['vim'] |
|
86 filenames = ['*.vim', '.vimrc', '.exrc', '.gvimrc', |
|
87 '_vimrc', '_exrc', '_gvimrc', 'vimrc', 'gvimrc'] |
|
88 mimetypes = ['text/x-vim'] |
|
89 flags = re.MULTILINE |
|
90 |
|
91 _python = r'py(?:t(?:h(?:o(?:n)?)?)?)?' |
|
92 |
|
93 tokens = { |
|
94 'root': [ |
|
95 (r'^([ \t:]*)(' + _python + r')([ \t]*)(<<)([ \t]*)(.*)((?:\n|.)*)(\6)', |
|
96 bygroups(using(this), Keyword, Text, Operator, Text, Text, |
|
97 using(PythonLexer), Text)), |
|
98 (r'^([ \t:]*)(' + _python + r')([ \t])(.*)', |
|
99 bygroups(using(this), Keyword, Text, using(PythonLexer))), |
|
100 |
|
101 (r'^\s*".*', Comment), |
|
102 |
|
103 (r'[ \t]+', Text), |
|
104 # TODO: regexes can have other delims |
|
105 (r'/(\\\\|\\/|[^\n/])*/', String.Regex), |
|
106 (r'"(\\\\|\\"|[^\n"])*"', String.Double), |
|
107 (r"'(''|[^\n'])*'", String.Single), |
|
108 |
|
109 # Who decided that doublequote was a good comment character?? |
|
110 (r'(?<=\s)"[^\-:.%#=*].*', Comment), |
|
111 (r'-?\d+', Number), |
|
112 (r'#[0-9a-f]{6}', Number.Hex), |
|
113 (r'^:', Punctuation), |
|
114 (r'[()<>+=!|,~-]', Punctuation), # Inexact list. Looks decent. |
|
115 (r'\b(let|if|else|endif|elseif|fun|function|endfunction)\b', |
|
116 Keyword), |
|
117 (r'\b(NONE|bold|italic|underline|dark|light)\b', Name.Builtin), |
|
118 (r'\b\w+\b', Name.Other), # These are postprocessed below |
|
119 (r'.', Text), |
|
120 ], |
|
121 } |
|
122 |
|
123 def __init__(self, **options): |
|
124 from pygments.lexers._vim_builtins import command, option, auto |
|
125 self._cmd = command |
|
126 self._opt = option |
|
127 self._aut = auto |
|
128 |
|
129 RegexLexer.__init__(self, **options) |
|
130 |
|
131 def is_in(self, w, mapping): |
|
132 r""" |
|
133 It's kind of difficult to decide if something might be a keyword |
|
134 in VimL because it allows you to abbreviate them. In fact, |
|
135 'ab[breviate]' is a good example. :ab, :abbre, or :abbreviate are |
|
136 valid ways to call it so rather than making really awful regexps |
|
137 like:: |
|
138 |
|
139 \bab(?:b(?:r(?:e(?:v(?:i(?:a(?:t(?:e)?)?)?)?)?)?)?)?\b |
|
140 |
|
141 we match `\b\w+\b` and then call is_in() on those tokens. See |
|
142 `scripts/get_vimkw.py` for how the lists are extracted. |
|
143 """ |
|
144 p = bisect(mapping, (w,)) |
|
145 if p > 0: |
|
146 if mapping[p-1][0] == w[:len(mapping[p-1][0])] and \ |
|
147 mapping[p-1][1][:len(w)] == w: |
|
148 return True |
|
149 if p < len(mapping): |
|
150 return mapping[p][0] == w[:len(mapping[p][0])] and \ |
|
151 mapping[p][1][:len(w)] == w |
|
152 return False |
|
153 |
|
154 def get_tokens_unprocessed(self, text): |
|
155 # TODO: builtins are only subsequent tokens on lines |
|
156 # and 'keywords' only happen at the beginning except |
|
157 # for :au ones |
|
158 for index, token, value in \ |
|
159 RegexLexer.get_tokens_unprocessed(self, text): |
|
160 if token is Name.Other: |
|
161 if self.is_in(value, self._cmd): |
|
162 yield index, Keyword, value |
|
163 elif self.is_in(value, self._opt) or \ |
|
164 self.is_in(value, self._aut): |
|
165 yield index, Name.Builtin, value |
|
166 else: |
|
167 yield index, Text, value |
|
168 else: |
|
169 yield index, token, value |