eric6/Plugins/CheckerPlugins/CodeStyleChecker/Miscellaneous/eradicate.py

changeset 8168
bdb0258faf42
parent 7980
2c3f14a3c595
equal deleted inserted replaced
8167:cdc1b6692766 8168:bdb0258faf42
1 # Copyright (C) 2012-2018 Steven Myint
2 #
3 # Permission is hereby granted, free of charge, to any person obtaining
4 # a copy of this software and associated documentation files (the
5 # "Software"), to deal in the Software without restriction, including
6 # without limitation the rights to use, copy, modify, merge, publish,
7 # distribute, sublicense, and/or sell copies of the Software, and to
8 # permit persons to whom the Software is furnished to do so, subject to
9 # the following conditions:
10 #
11 # The above copyright notice and this permission notice shall be included
12 # in all copies or substantial portions of the Software.
13 #
14 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
17 # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
18 # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
19 # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
20 # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21
22 """Removes commented-out Python code."""
23
24 from __future__ import print_function
25 from __future__ import unicode_literals
26
27 import difflib
28 import io
29 import os
30 import re
31 import tokenize
32
33 __version__ = '2.0.0'
34
35
36 class Eradicator(object):
37 """Eradicate comments."""
38 BRACKET_REGEX = re.compile(r'^[()\[\]{}\s]+$')
39 CODING_COMMENT_REGEX = re.compile(r'.*?coding[:=][ \t]*([-_.a-zA-Z0-9]+)')
40 DEF_STATEMENT_REGEX = re.compile(r"def .+\)[\s]+->[\s]+[a-zA-Z_][a-zA-Z0-9_]*:$")
41 FOR_STATEMENT_REGEX = re.compile(r"for [a-zA-Z_][a-zA-Z0-9_]* in .+:$")
42 HASH_NUMBER = re.compile(r'#[0-9]')
43 MULTILINE_ASSIGNMENT_REGEX = re.compile(r'^\s*\w+\s*=.*[(\[{]$')
44 PARTIAL_DICTIONARY_REGEX = re.compile(r'^\s*[\'"]\w+[\'"]\s*:.+[,{]\s*$')
45 PRINT_RETURN_REGEX = re.compile(r'^(print|return)\b\s*')
46 WITH_STATEMENT_REGEX = re.compile(r"with .+ as [a-zA-Z_][a-zA-Z0-9_]*:$")
47
48 CODE_INDICATORS = ['(', ')', '[', ']', '{', '}', ':', '=', '%',
49 'print', 'return', 'break', 'continue', 'import']
50 CODE_KEYWORDS = [r'elif\s+.*', 'else', 'try', 'finally', r'except\s+.*']
51 CODE_KEYWORDS_AGGR = CODE_KEYWORDS + [r'if\s+.*']
52 WHITESPACE_HASH = ' \t\v\n#'
53
54 DEFAULT_WHITELIST = (
55 r'pylint',
56 r'pyright',
57 r'noqa',
58 r'type:\s*ignore',
59 r'fmt:\s*(on|off)',
60 r'TODO',
61 r'FIXME',
62 r'XXX'
63 )
64 WHITELIST_REGEX = re.compile(r'|'.join(DEFAULT_WHITELIST), flags=re.IGNORECASE)
65
66 def comment_contains_code(self, line, aggressive=True):
67 """Return True comment contains code."""
68 line = line.lstrip()
69 if not line.startswith('#'):
70 return False
71
72 line = line.lstrip(self.WHITESPACE_HASH).strip()
73
74 # Ignore non-comment related hashes. For example, "# Issue #999".
75 if self.HASH_NUMBER.search(line):
76 return False
77
78 # Ignore whitelisted comments
79 if self.WHITELIST_REGEX.search(line):
80 return False
81
82 if self.CODING_COMMENT_REGEX.match(line):
83 return False
84
85 # Check that this is possibly code.
86 for symbol in self.CODE_INDICATORS:
87 if symbol in line:
88 break
89 else:
90 return False
91
92 if self.multiline_case(line, aggressive=aggressive):
93 return True
94
95 for symbol in self.CODE_KEYWORDS_AGGR if aggressive else self.CODE_KEYWORDS:
96 if re.match(r'^\s*' + symbol + r'\s*:\s*$', line):
97 return True
98
99 line = self.PRINT_RETURN_REGEX.sub('', line)
100
101 if self.PARTIAL_DICTIONARY_REGEX.match(line):
102 return True
103
104 try:
105 compile(line, '<string>', 'exec')
106 except (SyntaxError, TypeError, UnicodeDecodeError):
107 return False
108 else:
109 return True
110
111
112 def multiline_case(self, line, aggressive=True):
113 """Return True if line is probably part of some multiline code."""
114 if aggressive:
115 for ending in ')]}':
116 if line.endswith(ending + ':'):
117 return True
118
119 if line.strip() == ending + ',':
120 return True
121
122 # Check whether a function/method definition with return value
123 # annotation
124 if self.DEF_STATEMENT_REGEX.search(line):
125 return True
126
127 # Check weather a with statement
128 if self.WITH_STATEMENT_REGEX.search(line):
129 return True
130
131 # Check weather a for statement
132 if self.FOR_STATEMENT_REGEX.search(line):
133 return True
134
135 if line.endswith('\\'):
136 return True
137
138 if self.MULTILINE_ASSIGNMENT_REGEX.match(line):
139 return True
140
141 if self.BRACKET_REGEX.match(line):
142 return True
143
144 return False
145
146
147 def commented_out_code_line_numbers(self, source, aggressive=True):
148 """Yield line numbers of commented-out code."""
149 sio = io.StringIO(source)
150 try:
151 for token in tokenize.generate_tokens(sio.readline):
152 token_type = token[0]
153 start_row = token[2][0]
154 line = token[4]
155
156 if (token_type == tokenize.COMMENT and
157 line.lstrip().startswith('#') and
158 self.comment_contains_code(line, aggressive)):
159 yield start_row
160 except (tokenize.TokenError, IndentationError):
161 pass
162
163
164 def filter_commented_out_code(self, source, aggressive=True):
165 """Yield code with commented out code removed."""
166 marked_lines = list(self.commented_out_code_line_numbers(source,
167 aggressive))
168 sio = io.StringIO(source)
169 previous_line = ''
170 for line_number, line in enumerate(sio.readlines(), start=1):
171 if (line_number not in marked_lines or
172 previous_line.rstrip().endswith('\\')):
173 yield line
174 previous_line = line
175
176
177 def fix_file(self, filename, args, standard_out):
178 """Run filter_commented_out_code() on file."""
179 encoding = self.detect_encoding(filename)
180 with self.open_with_encoding(filename, encoding=encoding) as input_file:
181 source = input_file.read()
182
183 filtered_source = ''.join(self.filter_commented_out_code(source,
184 args.aggressive))
185
186 if source != filtered_source:
187 if args.in_place:
188 with self.open_with_encoding(filename, mode='w',
189 encoding=encoding) as output_file:
190 output_file.write(filtered_source)
191 else:
192 diff = difflib.unified_diff(
193 source.splitlines(),
194 filtered_source.splitlines(),
195 'before/' + filename,
196 'after/' + filename,
197 lineterm='')
198 standard_out.write('\n'.join(list(diff) + ['']))
199 return True
200
201
202 def open_with_encoding(self, filename, encoding, mode='r'):
203 """Return opened file with a specific encoding."""
204 return io.open(filename, mode=mode, encoding=encoding,
205 newline='') # Preserve line endings
206
207
208 def detect_encoding(self, filename):
209 """Return file encoding."""
210 try:
211 with open(filename, 'rb') as input_file:
212 from lib2to3.pgen2 import tokenize as lib2to3_tokenize
213 encoding = lib2to3_tokenize.detect_encoding(input_file.readline)[0]
214
215 # Check for correctness of encoding.
216 with self.open_with_encoding(filename, encoding) as input_file:
217 input_file.read()
218
219 return encoding
220 except (SyntaxError, LookupError, UnicodeDecodeError):
221 return 'latin-1'
222
223 def update_whitelist(self, new_whitelist, extend_default=True):
224 """Updates the whitelist."""
225 if extend_default:
226 self.WHITELIST_REGEX = re.compile(
227 r'|'.join(list(self.DEFAULT_WHITELIST) + new_whitelist),
228 flags=re.IGNORECASE)
229 else:
230 self.WHITELIST_REGEX = re.compile(
231 r'|'.join(new_whitelist),
232 flags=re.IGNORECASE)
233
234
235 def main(argv, standard_out, standard_error):
236 """Main entry point."""
237 import argparse
238 parser = argparse.ArgumentParser(description=__doc__, prog='eradicate')
239 parser.add_argument('-i', '--in-place', action='store_true',
240 help='make changes to files instead of printing diffs')
241 parser.add_argument('-r', '--recursive', action='store_true',
242 help='drill down directories recursively')
243 parser.add_argument('-a', '--aggressive', action='store_true',
244 help='make more aggressive changes; '
245 'this may result in false positives')
246 parser.add_argument('-e', '--error', action="store_true",
247 help="Exit code based on result of check")
248 parser.add_argument('--version', action='version',
249 version='%(prog)s ' + __version__)
250 parser.add_argument('--whitelist', action="store",
251 help=(
252 'String of "#" separated comment beginnings to whitelist. '
253 'Single parts are interpreted as regex. '
254 'OVERWRITING the default whitelist: {}'
255 ).format(Eradicator.DEFAULT_WHITELIST))
256 parser.add_argument('--whitelist-extend', action="store",
257 help=(
258 'String of "#" separated comment beginnings to whitelist '
259 'Single parts are interpreted as regex. '
260 'Overwrites --whitelist. '
261 'EXTENDING the default whitelist: {} '
262 ).format(Eradicator.DEFAULT_WHITELIST))
263 parser.add_argument('files', nargs='+', help='files to format')
264
265 args = parser.parse_args(argv[1:])
266
267 eradicator = Eradicator()
268
269 if args.whitelist_extend:
270 eradicator.update_whitelist(args.whitelist_extend.split('#'), True)
271 elif args.whitelist:
272 eradicator.update_whitelist(args.whitelist.split('#'), False)
273
274 filenames = list(set(args.files))
275 change_or_error = False
276 while filenames:
277 name = filenames.pop(0)
278 if args.recursive and os.path.isdir(name):
279 for root, directories, children in os.walk('{}'.format(name)):
280 filenames += [os.path.join(root, f) for f in children
281 if f.endswith('.py') and
282 not f.startswith('.')]
283 directories[:] = [d for d in directories
284 if not d.startswith('.')]
285 else:
286 try:
287 change_or_error = eradicator.fix_file(name, args=args, standard_out=standard_out) or change_or_error
288 except IOError as exception:
289 print('{}'.format(exception), file=standard_error)
290 change_or_error = True
291 if change_or_error and args.error:
292 return 1

eric ide

mercurial