ThirdParty/Pygments/pygments/lexers/textedit.py

changeset 4172
4f20dba37ab6
child 4697
c2e9bf425554
equal deleted inserted replaced
4170:8bc578136279 4172:4f20dba37ab6
1 # -*- coding: utf-8 -*-
2 """
3 pygments.lexers.textedit
4 ~~~~~~~~~~~~~~~~~~~~~~~~
5
6 Lexers for languages related to text processing.
7
8 :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS.
9 :license: BSD, see LICENSE for details.
10 """
11
12 import re
13 from bisect import bisect
14
15 from pygments.lexer import RegexLexer, include, default, bygroups, using, this
16 from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
17 Number, Punctuation
18
19 from pygments.lexers.python import PythonLexer
20
21 __all__ = ['AwkLexer', 'VimLexer']
22
23
24 class AwkLexer(RegexLexer):
25 """
26 For Awk scripts.
27
28 .. versionadded:: 1.5
29 """
30
31 name = 'Awk'
32 aliases = ['awk', 'gawk', 'mawk', 'nawk']
33 filenames = ['*.awk']
34 mimetypes = ['application/x-awk']
35
36 tokens = {
37 'commentsandwhitespace': [
38 (r'\s+', Text),
39 (r'#.*$', Comment.Single)
40 ],
41 'slashstartsregex': [
42 include('commentsandwhitespace'),
43 (r'/(\\.|[^[/\\\n]|\[(\\.|[^\]\\\n])*])+/'
44 r'\B', String.Regex, '#pop'),
45 (r'(?=/)', Text, ('#pop', 'badregex')),
46 default('#pop')
47 ],
48 'badregex': [
49 (r'\n', Text, '#pop')
50 ],
51 'root': [
52 (r'^(?=\s|/)', Text, 'slashstartsregex'),
53 include('commentsandwhitespace'),
54 (r'\+\+|--|\|\||&&|in\b|\$|!?~|'
55 r'(\*\*|[-<>+*%\^/!=|])=?', Operator, 'slashstartsregex'),
56 (r'[{(\[;,]', Punctuation, 'slashstartsregex'),
57 (r'[})\].]', Punctuation),
58 (r'(break|continue|do|while|exit|for|if|else|'
59 r'return)\b', Keyword, 'slashstartsregex'),
60 (r'function\b', Keyword.Declaration, 'slashstartsregex'),
61 (r'(atan2|cos|exp|int|log|rand|sin|sqrt|srand|gensub|gsub|index|'
62 r'length|match|split|sprintf|sub|substr|tolower|toupper|close|'
63 r'fflush|getline|next|nextfile|print|printf|strftime|systime|'
64 r'delete|system)\b', Keyword.Reserved),
65 (r'(ARGC|ARGIND|ARGV|BEGIN|CONVFMT|ENVIRON|END|ERRNO|FIELDWIDTHS|'
66 r'FILENAME|FNR|FS|IGNORECASE|NF|NR|OFMT|OFS|ORFS|RLENGTH|RS|'
67 r'RSTART|RT|SUBSEP)\b', Name.Builtin),
68 (r'[$a-zA-Z_]\w*', Name.Other),
69 (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float),
70 (r'0x[0-9a-fA-F]+', Number.Hex),
71 (r'[0-9]+', Number.Integer),
72 (r'"(\\\\|\\"|[^"])*"', String.Double),
73 (r"'(\\\\|\\'|[^'])*'", String.Single),
74 ]
75 }
76
77
78 class VimLexer(RegexLexer):
79 """
80 Lexer for VimL script files.
81
82 .. versionadded:: 0.8
83 """
84 name = 'VimL'
85 aliases = ['vim']
86 filenames = ['*.vim', '.vimrc', '.exrc', '.gvimrc',
87 '_vimrc', '_exrc', '_gvimrc', 'vimrc', 'gvimrc']
88 mimetypes = ['text/x-vim']
89 flags = re.MULTILINE
90
91 _python = r'py(?:t(?:h(?:o(?:n)?)?)?)?'
92
93 tokens = {
94 'root': [
95 (r'^([ \t:]*)(' + _python + r')([ \t]*)(<<)([ \t]*)(.*)((?:\n|.)*)(\6)',
96 bygroups(using(this), Keyword, Text, Operator, Text, Text,
97 using(PythonLexer), Text)),
98 (r'^([ \t:]*)(' + _python + r')([ \t])(.*)',
99 bygroups(using(this), Keyword, Text, using(PythonLexer))),
100
101 (r'^\s*".*', Comment),
102
103 (r'[ \t]+', Text),
104 # TODO: regexes can have other delims
105 (r'/(\\\\|\\/|[^\n/])*/', String.Regex),
106 (r'"(\\\\|\\"|[^\n"])*"', String.Double),
107 (r"'(''|[^\n'])*'", String.Single),
108
109 # Who decided that doublequote was a good comment character??
110 (r'(?<=\s)"[^\-:.%#=*].*', Comment),
111 (r'-?\d+', Number),
112 (r'#[0-9a-f]{6}', Number.Hex),
113 (r'^:', Punctuation),
114 (r'[()<>+=!|,~-]', Punctuation), # Inexact list. Looks decent.
115 (r'\b(let|if|else|endif|elseif|fun|function|endfunction)\b',
116 Keyword),
117 (r'\b(NONE|bold|italic|underline|dark|light)\b', Name.Builtin),
118 (r'\b\w+\b', Name.Other), # These are postprocessed below
119 (r'.', Text),
120 ],
121 }
122
123 def __init__(self, **options):
124 from pygments.lexers._vim_builtins import command, option, auto
125 self._cmd = command
126 self._opt = option
127 self._aut = auto
128
129 RegexLexer.__init__(self, **options)
130
131 def is_in(self, w, mapping):
132 r"""
133 It's kind of difficult to decide if something might be a keyword
134 in VimL because it allows you to abbreviate them. In fact,
135 'ab[breviate]' is a good example. :ab, :abbre, or :abbreviate are
136 valid ways to call it so rather than making really awful regexps
137 like::
138
139 \bab(?:b(?:r(?:e(?:v(?:i(?:a(?:t(?:e)?)?)?)?)?)?)?)?\b
140
141 we match `\b\w+\b` and then call is_in() on those tokens. See
142 `scripts/get_vimkw.py` for how the lists are extracted.
143 """
144 p = bisect(mapping, (w,))
145 if p > 0:
146 if mapping[p-1][0] == w[:len(mapping[p-1][0])] and \
147 mapping[p-1][1][:len(w)] == w:
148 return True
149 if p < len(mapping):
150 return mapping[p][0] == w[:len(mapping[p][0])] and \
151 mapping[p][1][:len(w)] == w
152 return False
153
154 def get_tokens_unprocessed(self, text):
155 # TODO: builtins are only subsequent tokens on lines
156 # and 'keywords' only happen at the beginning except
157 # for :au ones
158 for index, token, value in \
159 RegexLexer.get_tokens_unprocessed(self, text):
160 if token is Name.Other:
161 if self.is_in(value, self._cmd):
162 yield index, Keyword, value
163 elif self.is_in(value, self._opt) or \
164 self.is_in(value, self._aut):
165 yield index, Name.Builtin, value
166 else:
167 yield index, Text, value
168 else:
169 yield index, token, value

eric ide

mercurial