1 # -*- coding: utf-8 -*- |
|
2 """ |
|
3 pygments.lexers.php |
|
4 ~~~~~~~~~~~~~~~~~~~ |
|
5 |
|
6 Lexers for PHP and related languages. |
|
7 |
|
8 :copyright: Copyright 2006-2021 by the Pygments team, see AUTHORS. |
|
9 :license: BSD, see LICENSE for details. |
|
10 """ |
|
11 |
|
12 import re |
|
13 |
|
14 from pygments.lexer import Lexer, RegexLexer, include, bygroups, default, \ |
|
15 using, this, words, do_insertions |
|
16 from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ |
|
17 Number, Punctuation, Other, Generic |
|
18 from pygments.util import get_bool_opt, get_list_opt, shebang_matches |
|
19 |
|
20 __all__ = ['ZephirLexer', 'PsyshConsoleLexer', 'PhpLexer'] |
|
21 |
|
22 line_re = re.compile('.*?\n') |
|
23 |
|
24 |
|
25 class ZephirLexer(RegexLexer): |
|
26 """ |
|
27 For `Zephir language <http://zephir-lang.com/>`_ source code. |
|
28 |
|
29 Zephir is a compiled high level language aimed |
|
30 to the creation of C-extensions for PHP. |
|
31 |
|
32 .. versionadded:: 2.0 |
|
33 """ |
|
34 |
|
35 name = 'Zephir' |
|
36 aliases = ['zephir'] |
|
37 filenames = ['*.zep'] |
|
38 |
|
39 zephir_keywords = ['fetch', 'echo', 'isset', 'empty'] |
|
40 zephir_type = ['bit', 'bits', 'string'] |
|
41 |
|
42 flags = re.DOTALL | re.MULTILINE |
|
43 |
|
44 tokens = { |
|
45 'commentsandwhitespace': [ |
|
46 (r'\s+', Text), |
|
47 (r'//.*?\n', Comment.Single), |
|
48 (r'/\*.*?\*/', Comment.Multiline) |
|
49 ], |
|
50 'slashstartsregex': [ |
|
51 include('commentsandwhitespace'), |
|
52 (r'/(\\.|[^[/\\\n]|\[(\\.|[^\]\\\n])*])+/' |
|
53 r'([gim]+\b|\B)', String.Regex, '#pop'), |
|
54 (r'/', Operator, '#pop'), |
|
55 default('#pop') |
|
56 ], |
|
57 'badregex': [ |
|
58 (r'\n', Text, '#pop') |
|
59 ], |
|
60 'root': [ |
|
61 (r'^(?=\s|/)', Text, 'slashstartsregex'), |
|
62 include('commentsandwhitespace'), |
|
63 (r'\+\+|--|~|&&|\?|:|\|\||\\(?=\n)|' |
|
64 r'(<<|>>>?|==?|!=?|->|[-<>+*%&|^/])=?', Operator, 'slashstartsregex'), |
|
65 (r'[{(\[;,]', Punctuation, 'slashstartsregex'), |
|
66 (r'[})\].]', Punctuation), |
|
67 (r'(for|in|while|do|break|return|continue|switch|case|default|if|else|loop|' |
|
68 r'require|inline|throw|try|catch|finally|new|delete|typeof|instanceof|void|' |
|
69 r'namespace|use|extends|this|fetch|isset|unset|echo|fetch|likely|unlikely|' |
|
70 r'empty)\b', Keyword, 'slashstartsregex'), |
|
71 (r'(var|let|with|function)\b', Keyword.Declaration, 'slashstartsregex'), |
|
72 (r'(abstract|boolean|bool|char|class|const|double|enum|export|extends|final|' |
|
73 r'native|goto|implements|import|int|string|interface|long|ulong|char|uchar|' |
|
74 r'float|unsigned|private|protected|public|short|static|self|throws|reverse|' |
|
75 r'transient|volatile)\b', Keyword.Reserved), |
|
76 (r'(true|false|null|undefined)\b', Keyword.Constant), |
|
77 (r'(Array|Boolean|Date|_REQUEST|_COOKIE|_SESSION|' |
|
78 r'_GET|_POST|_SERVER|this|stdClass|range|count|iterator|' |
|
79 r'window)\b', Name.Builtin), |
|
80 (r'[$a-zA-Z_][\w\\]*', Name.Other), |
|
81 (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float), |
|
82 (r'0x[0-9a-fA-F]+', Number.Hex), |
|
83 (r'[0-9]+', Number.Integer), |
|
84 (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double), |
|
85 (r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single), |
|
86 ] |
|
87 } |
|
88 |
|
89 |
|
90 class PsyshConsoleLexer(Lexer): |
|
91 """ |
|
92 For `PsySH`_ console output, such as: |
|
93 |
|
94 .. sourcecode:: psysh |
|
95 |
|
96 >>> $greeting = function($name): string { |
|
97 ... return "Hello, {$name}"; |
|
98 ... }; |
|
99 => Closure($name): string {#2371 …3} |
|
100 >>> $greeting('World') |
|
101 => "Hello, World" |
|
102 |
|
103 .. _PsySH: https://psysh.org/ |
|
104 .. versionadded:: 2.7 |
|
105 """ |
|
106 name = 'PsySH console session for PHP' |
|
107 aliases = ['psysh'] |
|
108 |
|
109 def __init__(self, **options): |
|
110 options['startinline'] = True |
|
111 Lexer.__init__(self, **options) |
|
112 |
|
113 def get_tokens_unprocessed(self, text): |
|
114 phplexer = PhpLexer(**self.options) |
|
115 curcode = '' |
|
116 insertions = [] |
|
117 for match in line_re.finditer(text): |
|
118 line = match.group() |
|
119 if line.startswith('>>> ') or line.startswith('... '): |
|
120 insertions.append((len(curcode), |
|
121 [(0, Generic.Prompt, line[:4])])) |
|
122 curcode += line[4:] |
|
123 elif line.rstrip() == '...': |
|
124 insertions.append((len(curcode), |
|
125 [(0, Generic.Prompt, '...')])) |
|
126 curcode += line[3:] |
|
127 else: |
|
128 if curcode: |
|
129 yield from do_insertions( |
|
130 insertions, phplexer.get_tokens_unprocessed(curcode)) |
|
131 curcode = '' |
|
132 insertions = [] |
|
133 yield match.start(), Generic.Output, line |
|
134 if curcode: |
|
135 yield from do_insertions(insertions, |
|
136 phplexer.get_tokens_unprocessed(curcode)) |
|
137 |
|
138 |
|
139 class PhpLexer(RegexLexer): |
|
140 """ |
|
141 For `PHP <http://www.php.net/>`_ source code. |
|
142 For PHP embedded in HTML, use the `HtmlPhpLexer`. |
|
143 |
|
144 Additional options accepted: |
|
145 |
|
146 `startinline` |
|
147 If given and ``True`` the lexer starts highlighting with |
|
148 php code (i.e.: no starting ``<?php`` required). The default |
|
149 is ``False``. |
|
150 `funcnamehighlighting` |
|
151 If given and ``True``, highlight builtin function names |
|
152 (default: ``True``). |
|
153 `disabledmodules` |
|
154 If given, must be a list of module names whose function names |
|
155 should not be highlighted. By default all modules are highlighted |
|
156 except the special ``'unknown'`` module that includes functions |
|
157 that are known to php but are undocumented. |
|
158 |
|
159 To get a list of allowed modules have a look into the |
|
160 `_php_builtins` module: |
|
161 |
|
162 .. sourcecode:: pycon |
|
163 |
|
164 >>> from pygments.lexers._php_builtins import MODULES |
|
165 >>> MODULES.keys() |
|
166 ['PHP Options/Info', 'Zip', 'dba', ...] |
|
167 |
|
168 In fact the names of those modules match the module names from |
|
169 the php documentation. |
|
170 """ |
|
171 |
|
172 name = 'PHP' |
|
173 aliases = ['php', 'php3', 'php4', 'php5'] |
|
174 filenames = ['*.php', '*.php[345]', '*.inc'] |
|
175 mimetypes = ['text/x-php'] |
|
176 |
|
177 # Note that a backslash is included in the following two patterns |
|
178 # PHP uses a backslash as a namespace separator |
|
179 _ident_char = r'[\\\w]|[^\x00-\x7f]' |
|
180 _ident_begin = r'(?:[\\_a-z]|[^\x00-\x7f])' |
|
181 _ident_end = r'(?:' + _ident_char + ')*' |
|
182 _ident_inner = _ident_begin + _ident_end |
|
183 |
|
184 flags = re.IGNORECASE | re.DOTALL | re.MULTILINE |
|
185 tokens = { |
|
186 'root': [ |
|
187 (r'<\?(php)?', Comment.Preproc, 'php'), |
|
188 (r'[^<]+', Other), |
|
189 (r'<', Other) |
|
190 ], |
|
191 'php': [ |
|
192 (r'\?>', Comment.Preproc, '#pop'), |
|
193 (r'(<<<)([\'"]?)(' + _ident_inner + r')(\2\n.*?\n\s*)(\3)(;?)(\n)', |
|
194 bygroups(String, String, String.Delimiter, String, String.Delimiter, |
|
195 Punctuation, Text)), |
|
196 (r'\s+', Text), |
|
197 (r'#.*?\n', Comment.Single), |
|
198 (r'//.*?\n', Comment.Single), |
|
199 # put the empty comment here, it is otherwise seen as |
|
200 # the start of a docstring |
|
201 (r'/\*\*/', Comment.Multiline), |
|
202 (r'/\*\*.*?\*/', String.Doc), |
|
203 (r'/\*.*?\*/', Comment.Multiline), |
|
204 (r'(->|::)(\s*)(' + _ident_inner + ')', |
|
205 bygroups(Operator, Text, Name.Attribute)), |
|
206 (r'[~!%^&*+=|:.<>/@-]+', Operator), |
|
207 (r'\?', Operator), # don't add to the charclass above! |
|
208 (r'[\[\]{}();,]+', Punctuation), |
|
209 (r'(class)(\s+)', bygroups(Keyword, Text), 'classname'), |
|
210 (r'(function)(\s*)(?=\()', bygroups(Keyword, Text)), |
|
211 (r'(function)(\s+)(&?)(\s*)', |
|
212 bygroups(Keyword, Text, Operator, Text), 'functionname'), |
|
213 (r'(const)(\s+)(' + _ident_inner + ')', |
|
214 bygroups(Keyword, Text, Name.Constant)), |
|
215 (r'(and|E_PARSE|old_function|E_ERROR|or|as|E_WARNING|parent|' |
|
216 r'eval|PHP_OS|break|exit|case|extends|PHP_VERSION|cfunction|' |
|
217 r'FALSE|print|for|require|continue|foreach|require_once|' |
|
218 r'declare|return|default|static|do|switch|die|stdClass|' |
|
219 r'echo|else|TRUE|elseif|var|empty|if|xor|enddeclare|include|' |
|
220 r'virtual|endfor|include_once|while|endforeach|global|' |
|
221 r'endif|list|endswitch|new|endwhile|not|' |
|
222 r'array|E_ALL|NULL|final|php_user_filter|interface|' |
|
223 r'implements|public|private|protected|abstract|clone|try|' |
|
224 r'catch|throw|this|use|namespace|trait|yield|' |
|
225 r'finally)\b', Keyword), |
|
226 (r'(true|false|null)\b', Keyword.Constant), |
|
227 include('magicconstants'), |
|
228 (r'\$\{\$+' + _ident_inner + r'\}', Name.Variable), |
|
229 (r'\$+' + _ident_inner, Name.Variable), |
|
230 (_ident_inner, Name.Other), |
|
231 (r'(\d+\.\d*|\d*\.\d+)(e[+-]?[0-9]+)?', Number.Float), |
|
232 (r'\d+e[+-]?[0-9]+', Number.Float), |
|
233 (r'0[0-7]+', Number.Oct), |
|
234 (r'0x[a-f0-9]+', Number.Hex), |
|
235 (r'\d+', Number.Integer), |
|
236 (r'0b[01]+', Number.Bin), |
|
237 (r"'([^'\\]*(?:\\.[^'\\]*)*)'", String.Single), |
|
238 (r'`([^`\\]*(?:\\.[^`\\]*)*)`', String.Backtick), |
|
239 (r'"', String.Double, 'string'), |
|
240 ], |
|
241 'magicfuncs': [ |
|
242 # source: http://php.net/manual/en/language.oop5.magic.php |
|
243 (words(( |
|
244 '__construct', '__destruct', '__call', '__callStatic', '__get', '__set', |
|
245 '__isset', '__unset', '__sleep', '__wakeup', '__toString', '__invoke', |
|
246 '__set_state', '__clone', '__debugInfo',), suffix=r'\b'), |
|
247 Name.Function.Magic), |
|
248 ], |
|
249 'magicconstants': [ |
|
250 # source: http://php.net/manual/en/language.constants.predefined.php |
|
251 (words(( |
|
252 '__LINE__', '__FILE__', '__DIR__', '__FUNCTION__', '__CLASS__', |
|
253 '__TRAIT__', '__METHOD__', '__NAMESPACE__',), |
|
254 suffix=r'\b'), |
|
255 Name.Constant), |
|
256 ], |
|
257 'classname': [ |
|
258 (_ident_inner, Name.Class, '#pop') |
|
259 ], |
|
260 'functionname': [ |
|
261 include('magicfuncs'), |
|
262 (_ident_inner, Name.Function, '#pop'), |
|
263 default('#pop') |
|
264 ], |
|
265 'string': [ |
|
266 (r'"', String.Double, '#pop'), |
|
267 (r'[^{$"\\]+', String.Double), |
|
268 (r'\\([nrt"$\\]|[0-7]{1,3}|x[0-9a-f]{1,2})', String.Escape), |
|
269 (r'\$' + _ident_inner + r'(\[\S+?\]|->' + _ident_inner + ')?', |
|
270 String.Interpol), |
|
271 (r'(\{\$\{)(.*?)(\}\})', |
|
272 bygroups(String.Interpol, using(this, _startinline=True), |
|
273 String.Interpol)), |
|
274 (r'(\{)(\$.*?)(\})', |
|
275 bygroups(String.Interpol, using(this, _startinline=True), |
|
276 String.Interpol)), |
|
277 (r'(\$\{)(\S+)(\})', |
|
278 bygroups(String.Interpol, Name.Variable, String.Interpol)), |
|
279 (r'[${\\]', String.Double) |
|
280 ], |
|
281 } |
|
282 |
|
283 def __init__(self, **options): |
|
284 self.funcnamehighlighting = get_bool_opt( |
|
285 options, 'funcnamehighlighting', True) |
|
286 self.disabledmodules = get_list_opt( |
|
287 options, 'disabledmodules', ['unknown']) |
|
288 self.startinline = get_bool_opt(options, 'startinline', False) |
|
289 |
|
290 # private option argument for the lexer itself |
|
291 if '_startinline' in options: |
|
292 self.startinline = options.pop('_startinline') |
|
293 |
|
294 # collect activated functions in a set |
|
295 self._functions = set() |
|
296 if self.funcnamehighlighting: |
|
297 from pygments.lexers._php_builtins import MODULES |
|
298 for key, value in MODULES.items(): |
|
299 if key not in self.disabledmodules: |
|
300 self._functions.update(value) |
|
301 RegexLexer.__init__(self, **options) |
|
302 |
|
303 def get_tokens_unprocessed(self, text): |
|
304 stack = ['root'] |
|
305 if self.startinline: |
|
306 stack.append('php') |
|
307 for index, token, value in \ |
|
308 RegexLexer.get_tokens_unprocessed(self, text, stack): |
|
309 if token is Name.Other: |
|
310 if value in self._functions: |
|
311 yield index, Name.Builtin, value |
|
312 continue |
|
313 yield index, token, value |
|
314 |
|
315 def analyse_text(text): |
|
316 if shebang_matches(text, r'php'): |
|
317 return True |
|
318 rv = 0.0 |
|
319 if re.search(r'<\?(?!xml)', text): |
|
320 rv += 0.3 |
|
321 return rv |
|