eric6/ThirdParty/Pygments/pygments/lexers/r.py

changeset 6942
2602857055c5
parent 6651
e8f3b5568b21
child 7547
21b0534faebc
equal deleted inserted replaced
6941:f99d60d6b59b 6942:2602857055c5
1 # -*- coding: utf-8 -*-
2 """
3 pygments.lexers.r
4 ~~~~~~~~~~~~~~~~~
5
6 Lexers for the R/S languages.
7
8 :copyright: Copyright 2006-2017 by the Pygments team, see AUTHORS.
9 :license: BSD, see LICENSE for details.
10 """
11
12 import re
13
14 from pygments.lexer import Lexer, RegexLexer, include, do_insertions, bygroups
15 from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
16 Number, Punctuation, Generic
17
18 __all__ = ['RConsoleLexer', 'SLexer', 'RdLexer']
19
20
21 line_re = re.compile('.*?\n')
22
23
24 class RConsoleLexer(Lexer):
25 """
26 For R console transcripts or R CMD BATCH output files.
27 """
28
29 name = 'RConsole'
30 aliases = ['rconsole', 'rout']
31 filenames = ['*.Rout']
32
33 def get_tokens_unprocessed(self, text):
34 slexer = SLexer(**self.options)
35
36 current_code_block = ''
37 insertions = []
38
39 for match in line_re.finditer(text):
40 line = match.group()
41 if line.startswith('>') or line.startswith('+'):
42 # Colorize the prompt as such,
43 # then put rest of line into current_code_block
44 insertions.append((len(current_code_block),
45 [(0, Generic.Prompt, line[:2])]))
46 current_code_block += line[2:]
47 else:
48 # We have reached a non-prompt line!
49 # If we have stored prompt lines, need to process them first.
50 if current_code_block:
51 # Weave together the prompts and highlight code.
52 for item in do_insertions(
53 insertions, slexer.get_tokens_unprocessed(current_code_block)):
54 yield item
55 # Reset vars for next code block.
56 current_code_block = ''
57 insertions = []
58 # Now process the actual line itself, this is output from R.
59 yield match.start(), Generic.Output, line
60
61 # If we happen to end on a code block with nothing after it, need to
62 # process the last code block. This is neither elegant nor DRY so
63 # should be changed.
64 if current_code_block:
65 for item in do_insertions(
66 insertions, slexer.get_tokens_unprocessed(current_code_block)):
67 yield item
68
69
70 class SLexer(RegexLexer):
71 """
72 For S, S-plus, and R source code.
73
74 .. versionadded:: 0.10
75 """
76
77 name = 'S'
78 aliases = ['splus', 's', 'r']
79 filenames = ['*.S', '*.R', '.Rhistory', '.Rprofile', '.Renviron']
80 mimetypes = ['text/S-plus', 'text/S', 'text/x-r-source', 'text/x-r',
81 'text/x-R', 'text/x-r-history', 'text/x-r-profile']
82
83 valid_name = r'(?:`[^`\\]*(?:\\.[^`\\]*)*`)|(?:(?:[a-zA-z]|[_.][^0-9])[\w_.]*)'
84 tokens = {
85 'comments': [
86 (r'#.*$', Comment.Single),
87 ],
88 'valid_name': [
89 (valid_name, Name),
90 ],
91 'punctuation': [
92 (r'\[{1,2}|\]{1,2}|\(|\)|;|,', Punctuation),
93 ],
94 'keywords': [
95 (r'(if|else|for|while|repeat|in|next|break|return|switch|function)'
96 r'(?![\w.])',
97 Keyword.Reserved),
98 ],
99 'operators': [
100 (r'<<?-|->>?|-|==|<=|>=|<|>|&&?|!=|\|\|?|\?', Operator),
101 (r'\*|\+|\^|/|!|%[^%]*%|=|~|\$|@|:{1,3}', Operator),
102 ],
103 'builtin_symbols': [
104 (r'(NULL|NA(_(integer|real|complex|character)_)?|'
105 r'letters|LETTERS|Inf|TRUE|FALSE|NaN|pi|\.\.(\.|[0-9]+))'
106 r'(?![\w.])',
107 Keyword.Constant),
108 (r'(T|F)\b', Name.Builtin.Pseudo),
109 ],
110 'numbers': [
111 # hex number
112 (r'0[xX][a-fA-F0-9]+([pP][0-9]+)?[Li]?', Number.Hex),
113 # decimal number
114 (r'[+-]?([0-9]+(\.[0-9]+)?|\.[0-9]+|\.)([eE][+-]?[0-9]+)?[Li]?',
115 Number),
116 ],
117 'statements': [
118 include('comments'),
119 # whitespaces
120 (r'\s+', Text),
121 (r'\'', String, 'string_squote'),
122 (r'\"', String, 'string_dquote'),
123 include('builtin_symbols'),
124 include('valid_name'),
125 include('numbers'),
126 include('keywords'),
127 include('punctuation'),
128 include('operators'),
129 ],
130 'root': [
131 # calls:
132 (r'(%s)\s*(?=\()' % valid_name, Name.Function),
133 include('statements'),
134 # blocks:
135 (r'\{|\}', Punctuation),
136 # (r'\{', Punctuation, 'block'),
137 (r'.', Text),
138 ],
139 # 'block': [
140 # include('statements'),
141 # ('\{', Punctuation, '#push'),
142 # ('\}', Punctuation, '#pop')
143 # ],
144 'string_squote': [
145 (r'([^\'\\]|\\.)*\'', String, '#pop'),
146 ],
147 'string_dquote': [
148 (r'([^"\\]|\\.)*"', String, '#pop'),
149 ],
150 }
151
152 def analyse_text(text):
153 if re.search(r'[a-z0-9_\])\s]<-(?!-)', text):
154 return 0.11
155
156
157 class RdLexer(RegexLexer):
158 """
159 Pygments Lexer for R documentation (Rd) files
160
161 This is a very minimal implementation, highlighting little more
162 than the macros. A description of Rd syntax is found in `Writing R
163 Extensions <http://cran.r-project.org/doc/manuals/R-exts.html>`_
164 and `Parsing Rd files <developer.r-project.org/parseRd.pdf>`_.
165
166 .. versionadded:: 1.6
167 """
168 name = 'Rd'
169 aliases = ['rd']
170 filenames = ['*.Rd']
171 mimetypes = ['text/x-r-doc']
172
173 # To account for verbatim / LaTeX-like / and R-like areas
174 # would require parsing.
175 tokens = {
176 'root': [
177 # catch escaped brackets and percent sign
178 (r'\\[\\{}%]', String.Escape),
179 # comments
180 (r'%.*$', Comment),
181 # special macros with no arguments
182 (r'\\(?:cr|l?dots|R|tab)\b', Keyword.Constant),
183 # macros
184 (r'\\[a-zA-Z]+\b', Keyword),
185 # special preprocessor macros
186 (r'^\s*#(?:ifn?def|endif).*\b', Comment.Preproc),
187 # non-escaped brackets
188 (r'[{}]', Name.Builtin),
189 # everything else
190 (r'[^\\%\n{}]+', Text),
191 (r'.', Text),
192 ]
193 }

eric ide

mercurial