|
1 # -*- coding: utf-8 -*- |
|
2 """ |
|
3 pygments.lexers.r |
|
4 ~~~~~~~~~~~~~~~~~ |
|
5 |
|
6 Lexers for the R/S languages. |
|
7 |
|
8 :copyright: Copyright 2006-2017 by the Pygments team, see AUTHORS. |
|
9 :license: BSD, see LICENSE for details. |
|
10 """ |
|
11 |
|
12 import re |
|
13 |
|
14 from pygments.lexer import Lexer, RegexLexer, include, do_insertions, bygroups |
|
15 from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ |
|
16 Number, Punctuation, Generic |
|
17 |
|
18 __all__ = ['RConsoleLexer', 'SLexer', 'RdLexer'] |
|
19 |
|
20 |
|
21 line_re = re.compile('.*?\n') |
|
22 |
|
23 |
|
24 class RConsoleLexer(Lexer): |
|
25 """ |
|
26 For R console transcripts or R CMD BATCH output files. |
|
27 """ |
|
28 |
|
29 name = 'RConsole' |
|
30 aliases = ['rconsole', 'rout'] |
|
31 filenames = ['*.Rout'] |
|
32 |
|
33 def get_tokens_unprocessed(self, text): |
|
34 slexer = SLexer(**self.options) |
|
35 |
|
36 current_code_block = '' |
|
37 insertions = [] |
|
38 |
|
39 for match in line_re.finditer(text): |
|
40 line = match.group() |
|
41 if line.startswith('>') or line.startswith('+'): |
|
42 # Colorize the prompt as such, |
|
43 # then put rest of line into current_code_block |
|
44 insertions.append((len(current_code_block), |
|
45 [(0, Generic.Prompt, line[:2])])) |
|
46 current_code_block += line[2:] |
|
47 else: |
|
48 # We have reached a non-prompt line! |
|
49 # If we have stored prompt lines, need to process them first. |
|
50 if current_code_block: |
|
51 # Weave together the prompts and highlight code. |
|
52 for item in do_insertions( |
|
53 insertions, slexer.get_tokens_unprocessed(current_code_block)): |
|
54 yield item |
|
55 # Reset vars for next code block. |
|
56 current_code_block = '' |
|
57 insertions = [] |
|
58 # Now process the actual line itself, this is output from R. |
|
59 yield match.start(), Generic.Output, line |
|
60 |
|
61 # If we happen to end on a code block with nothing after it, need to |
|
62 # process the last code block. This is neither elegant nor DRY so |
|
63 # should be changed. |
|
64 if current_code_block: |
|
65 for item in do_insertions( |
|
66 insertions, slexer.get_tokens_unprocessed(current_code_block)): |
|
67 yield item |
|
68 |
|
69 |
|
70 class SLexer(RegexLexer): |
|
71 """ |
|
72 For S, S-plus, and R source code. |
|
73 |
|
74 .. versionadded:: 0.10 |
|
75 """ |
|
76 |
|
77 name = 'S' |
|
78 aliases = ['splus', 's', 'r'] |
|
79 filenames = ['*.S', '*.R', '.Rhistory', '.Rprofile', '.Renviron'] |
|
80 mimetypes = ['text/S-plus', 'text/S', 'text/x-r-source', 'text/x-r', |
|
81 'text/x-R', 'text/x-r-history', 'text/x-r-profile'] |
|
82 |
|
83 valid_name = r'(?:`[^`\\]*(?:\\.[^`\\]*)*`)|(?:(?:[a-zA-z]|[_.][^0-9])[\w_.]*)' |
|
84 tokens = { |
|
85 'comments': [ |
|
86 (r'#.*$', Comment.Single), |
|
87 ], |
|
88 'valid_name': [ |
|
89 (valid_name, Name), |
|
90 ], |
|
91 'punctuation': [ |
|
92 (r'\[{1,2}|\]{1,2}|\(|\)|;|,', Punctuation), |
|
93 ], |
|
94 'keywords': [ |
|
95 (r'(if|else|for|while|repeat|in|next|break|return|switch|function)' |
|
96 r'(?![\w.])', |
|
97 Keyword.Reserved), |
|
98 ], |
|
99 'operators': [ |
|
100 (r'<<?-|->>?|-|==|<=|>=|<|>|&&?|!=|\|\|?|\?', Operator), |
|
101 (r'\*|\+|\^|/|!|%[^%]*%|=|~|\$|@|:{1,3}', Operator), |
|
102 ], |
|
103 'builtin_symbols': [ |
|
104 (r'(NULL|NA(_(integer|real|complex|character)_)?|' |
|
105 r'letters|LETTERS|Inf|TRUE|FALSE|NaN|pi|\.\.(\.|[0-9]+))' |
|
106 r'(?![\w.])', |
|
107 Keyword.Constant), |
|
108 (r'(T|F)\b', Name.Builtin.Pseudo), |
|
109 ], |
|
110 'numbers': [ |
|
111 # hex number |
|
112 (r'0[xX][a-fA-F0-9]+([pP][0-9]+)?[Li]?', Number.Hex), |
|
113 # decimal number |
|
114 (r'[+-]?([0-9]+(\.[0-9]+)?|\.[0-9]+|\.)([eE][+-]?[0-9]+)?[Li]?', |
|
115 Number), |
|
116 ], |
|
117 'statements': [ |
|
118 include('comments'), |
|
119 # whitespaces |
|
120 (r'\s+', Text), |
|
121 (r'\'', String, 'string_squote'), |
|
122 (r'\"', String, 'string_dquote'), |
|
123 include('builtin_symbols'), |
|
124 include('valid_name'), |
|
125 include('numbers'), |
|
126 include('keywords'), |
|
127 include('punctuation'), |
|
128 include('operators'), |
|
129 ], |
|
130 'root': [ |
|
131 # calls: |
|
132 (r'(%s)\s*(?=\()' % valid_name, Name.Function), |
|
133 include('statements'), |
|
134 # blocks: |
|
135 (r'\{|\}', Punctuation), |
|
136 # (r'\{', Punctuation, 'block'), |
|
137 (r'.', Text), |
|
138 ], |
|
139 # 'block': [ |
|
140 # include('statements'), |
|
141 # ('\{', Punctuation, '#push'), |
|
142 # ('\}', Punctuation, '#pop') |
|
143 # ], |
|
144 'string_squote': [ |
|
145 (r'([^\'\\]|\\.)*\'', String, '#pop'), |
|
146 ], |
|
147 'string_dquote': [ |
|
148 (r'([^"\\]|\\.)*"', String, '#pop'), |
|
149 ], |
|
150 } |
|
151 |
|
152 def analyse_text(text): |
|
153 if re.search(r'[a-z0-9_\])\s]<-(?!-)', text): |
|
154 return 0.11 |
|
155 |
|
156 |
|
157 class RdLexer(RegexLexer): |
|
158 """ |
|
159 Pygments Lexer for R documentation (Rd) files |
|
160 |
|
161 This is a very minimal implementation, highlighting little more |
|
162 than the macros. A description of Rd syntax is found in `Writing R |
|
163 Extensions <http://cran.r-project.org/doc/manuals/R-exts.html>`_ |
|
164 and `Parsing Rd files <developer.r-project.org/parseRd.pdf>`_. |
|
165 |
|
166 .. versionadded:: 1.6 |
|
167 """ |
|
168 name = 'Rd' |
|
169 aliases = ['rd'] |
|
170 filenames = ['*.Rd'] |
|
171 mimetypes = ['text/x-r-doc'] |
|
172 |
|
173 # To account for verbatim / LaTeX-like / and R-like areas |
|
174 # would require parsing. |
|
175 tokens = { |
|
176 'root': [ |
|
177 # catch escaped brackets and percent sign |
|
178 (r'\\[\\{}%]', String.Escape), |
|
179 # comments |
|
180 (r'%.*$', Comment), |
|
181 # special macros with no arguments |
|
182 (r'\\(?:cr|l?dots|R|tab)\b', Keyword.Constant), |
|
183 # macros |
|
184 (r'\\[a-zA-Z]+\b', Keyword), |
|
185 # special preprocessor macros |
|
186 (r'^\s*#(?:ifn?def|endif).*\b', Comment.Preproc), |
|
187 # non-escaped brackets |
|
188 (r'[{}]', Name.Builtin), |
|
189 # everything else |
|
190 (r'[^\\%\n{}]+', Text), |
|
191 (r'.', Text), |
|
192 ] |
|
193 } |