eric6/Plugins/CheckerPlugins/CodeStyleChecker/eradicate.py

changeset 7972
4fc11172df1b
parent 7836
2f0d208b8137
child 7978
1e391f977124
equal deleted inserted replaced
7971:ff2971513d6d 7972:4fc11172df1b
1 # Copyright (C) 2012-2015 Steven Myint 1 # Copyright (C) 2012-2018 Steven Myint
2 # 2 #
3 # Permission is hereby granted, free of charge, to any person obtaining 3 # Permission is hereby granted, free of charge, to any person obtaining
4 # a copy of this software and associated documentation files (the 4 # a copy of this software and associated documentation files (the
5 # "Software"), to deal in the Software without restriction, including 5 # "Software"), to deal in the Software without restriction, including
6 # without limitation the rights to use, copy, modify, merge, publish, 6 # without limitation the rights to use, copy, modify, merge, publish,
19 # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 19 # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
20 # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 20 # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 21
22 """Removes commented-out Python code.""" 22 """Removes commented-out Python code."""
23 23
24 from __future__ import print_function
25 from __future__ import unicode_literals
26
24 import difflib 27 import difflib
25 import io 28 import io
26 import os 29 import os
27 import re 30 import re
28 import tokenize 31 import tokenize
29 32
30 __version__ = '1.0' 33 __version__ = '2.0.0'
31 34
32 35
33 MULTILINE_ASSIGNMENT_REGEX = re.compile(r'^\s*\w+\s*=.*[(\[{]$') 36 class Eradicator(object):
34 PARTIAL_DICTIONARY_REGEX = re.compile(r'^\s*[\'"]\w+[\'"]\s*:.+[,{]\s*$') 37 """Eradicate comments."""
35 38 BRACKET_REGEX = re.compile(r'^[()\[\]{}\s]+$')
36 39 CODING_COMMENT_REGEX = re.compile(r'.*?coding[:=][ \t]*([-_.a-zA-Z0-9]+)')
37 def comment_contains_code(line, aggressive=True): 40 DEF_STATEMENT_REGEX = re.compile(r"def .+\)[\s]+->[\s]+[a-zA-Z_][a-zA-Z0-9_]*:$")
38 """Return True comment contains code.""" 41 FOR_STATEMENT_REGEX = re.compile(r"for [a-zA-Z_][a-zA-Z0-9_]* in .+:$")
39 line = line.lstrip() 42 HASH_NUMBER = re.compile(r'#[0-9]')
40 if not line.startswith('#'): 43 MULTILINE_ASSIGNMENT_REGEX = re.compile(r'^\s*\w+\s*=.*[(\[{]$')
44 PARTIAL_DICTIONARY_REGEX = re.compile(r'^\s*[\'"]\w+[\'"]\s*:.+[,{]\s*$')
45 PRINT_RETURN_REGEX = re.compile(r'^(print|return)\b\s*')
46 WITH_STATEMENT_REGEX = re.compile(r"with .+ as [a-zA-Z_][a-zA-Z0-9_]*:$")
47
48 CODE_INDICATORS = ['(', ')', '[', ']', '{', '}', ':', '=', '%',
49 'print', 'return', 'break', 'continue', 'import']
50 CODE_KEYWORDS = [r'elif\s+.*', 'else', 'try', 'finally', r'except\s+.*']
51 CODE_KEYWORDS_AGGR = CODE_KEYWORDS + [r'if\s+.*']
52 WHITESPACE_HASH = ' \t\v\n#'
53
54 DEFAULT_WHITELIST = (
55 r'pylint',
56 r'pyright',
57 r'noqa',
58 r'type:\s*ignore',
59 r'fmt:\s*(on|off)',
60 r'TODO',
61 r'FIXME',
62 r'XXX',
63 r'~ ',
64 r'- ',
65 )
66 WHITELIST_REGEX = re.compile(r'|'.join(DEFAULT_WHITELIST), flags=re.IGNORECASE)
67
68 def comment_contains_code(self, line, aggressive=True):
69 """Return True comment contains code."""
70 line = line.lstrip()
71 if not line.startswith('#'):
72 return False
73
74 line = line.lstrip(self.WHITESPACE_HASH).strip()
75
76 # Ignore non-comment related hashes. For example, "# Issue #999".
77 if self.HASH_NUMBER.search(line):
78 return False
79
80 # Ignore whitelisted comments
81 if self.WHITELIST_REGEX.search(line):
82 return False
83
84 if self.CODING_COMMENT_REGEX.match(line):
85 return False
86
87 # Check that this is possibly code.
88 for symbol in self.CODE_INDICATORS:
89 if symbol in line:
90 break
91 else:
92 return False
93
94 if self.multiline_case(line, aggressive=aggressive):
95 return True
96
97 for symbol in self.CODE_KEYWORDS_AGGR if aggressive else self.CODE_KEYWORDS:
98 if re.match(r'^\s*' + symbol + r'\s*:\s*$', line):
99 return True
100
101 line = self.PRINT_RETURN_REGEX.sub('', line)
102
103 if self.PARTIAL_DICTIONARY_REGEX.match(line):
104 return True
105
106 try:
107 compile(line, '<string>', 'exec')
108 except (SyntaxError, TypeError, UnicodeDecodeError):
109 return False
110 else:
111 return True
112
113
114 def multiline_case(self, line, aggressive=True):
115 """Return True if line is probably part of some multiline code."""
116 if aggressive:
117 for ending in ')]}':
118 if line.endswith(ending + ':'):
119 return True
120
121 if line.strip() == ending + ',':
122 return True
123
124 # Check whether a function/method definition with return value
125 # annotation
126 if self.DEF_STATEMENT_REGEX.search(line):
127 return True
128
129 # Check weather a with statement
130 if self.WITH_STATEMENT_REGEX.search(line):
131 return True
132
133 # Check weather a for statement
134 if self.FOR_STATEMENT_REGEX.search(line):
135 return True
136
137 if line.endswith('\\'):
138 return True
139
140 if self.MULTILINE_ASSIGNMENT_REGEX.match(line):
141 return True
142
143 if self.BRACKET_REGEX.match(line):
144 return True
145
41 return False 146 return False
42 147
43 line = line.lstrip(' \t\v\n#').strip() 148
44 149 def commented_out_code_line_numbers(self, source, aggressive=True):
45 # Ignore non-comment related hashes. For example, "# Issue #999". 150 """Yield line numbers of commented-out code."""
46 if re.search('#[0-9]', line): 151 sio = io.StringIO(source)
47 return False 152 try:
48 153 for token in tokenize.generate_tokens(sio.readline):
49 if line.startswith('pylint:'): 154 token_type = token[0]
50 return False 155 start_row = token[2][0]
51 156 line = token[4]
52 if re.match(r'.*?coding[:=][ \t]*([-_.a-zA-Z0-9]+)', line): 157
53 return False 158 if (token_type == tokenize.COMMENT and
54 159 line.lstrip().startswith('#') and
55 # Check that this is possibly code. 160 self.comment_contains_code(line, aggressive)):
56 for symbol in list('()[]{}:=%') + ['print', 'return', 'break', 'continue', 161 yield start_row
57 'import']: 162 except (tokenize.TokenError, IndentationError):
58 if symbol in line: 163 pass
59 break 164
60 else: 165
61 return False 166 def filter_commented_out_code(self, source, aggressive=True):
62 167 """Yield code with commented out code removed."""
63 if multiline_case(line, aggressive=aggressive): 168 marked_lines = list(self.commented_out_code_line_numbers(source,
64 return True 169 aggressive))
65 170 sio = io.StringIO(source)
66 symbol_list = [r'elif\s+.*', 'else', 'try', 171 previous_line = ''
67 'finally', r'except\s+.*'] 172 for line_number, line in enumerate(sio.readlines(), start=1):
68 if aggressive: 173 if (line_number not in marked_lines or
69 symbol_list.append(r'if\s+.*') 174 previous_line.rstrip().endswith('\\')):
70 175 yield line
71 for symbol in symbol_list: 176 previous_line = line
72 if re.match(r'^\s*' + symbol + r'\s*:\s*$', line): 177
73 return True 178
74 179 def fix_file(self, filename, args, standard_out):
75 line = re.sub(r'^(print|return)\b\s*', '', line) 180 """Run filter_commented_out_code() on file."""
76 181 encoding = self.detect_encoding(filename)
77 if re.match(PARTIAL_DICTIONARY_REGEX, line): 182 with self.open_with_encoding(filename, encoding=encoding) as input_file:
78 return True 183 source = input_file.read()
79 184
80 try: 185 filtered_source = ''.join(self.filter_commented_out_code(source,
81 compile(line, '<string>', 'exec') 186 args.aggressive))
82 return True 187
83 except (SyntaxError, TypeError, UnicodeDecodeError): 188 if source != filtered_source:
84 return False 189 if args.in_place:
85 190 with self.open_with_encoding(filename, mode='w',
86 191 encoding=encoding) as output_file:
87 def multiline_case(line, aggressive=True): 192 output_file.write(filtered_source)
88 """Return True if line is probably part of some multiline code.""" 193 else:
89 if aggressive: 194 diff = difflib.unified_diff(
90 for ending in ')]}': 195 source.splitlines(),
91 if line.endswith(ending + ':'): 196 filtered_source.splitlines(),
92 return True 197 'before/' + filename,
93 198 'after/' + filename,
94 if line.strip() == ending + ',': 199 lineterm='')
95 return True 200 standard_out.write('\n'.join(list(diff) + ['']))
96 201 return True
97 # Check whether a function/method definition with return value 202
98 # annotation 203
99 if re.search(r"def .+\)[\s]+->[\s]+[a-zA-Z_][a-zA-Z0-9_]*:$", line): 204 def open_with_encoding(self, filename, encoding, mode='r'):
100 return True 205 """Return opened file with a specific encoding."""
101 206 return io.open(filename, mode=mode, encoding=encoding,
102 # Check weather a with statement 207 newline='') # Preserve line endings
103 if re.search(r"with .+ as [a-zA-Z_][a-zA-Z0-9_]*:$", line): 208
104 return True 209
105 210 def detect_encoding(self, filename):
106 # Check weather a for statement 211 """Return file encoding."""
107 if re.search(r"for [a-zA-Z_][a-zA-Z0-9_]* in .+:$", line): 212 try:
108 return True 213 with open(filename, 'rb') as input_file:
109 214 from lib2to3.pgen2 import tokenize as lib2to3_tokenize
110 if line.endswith('\\'): 215 encoding = lib2to3_tokenize.detect_encoding(input_file.readline)[0]
111 return True 216
112 217 # Check for correctness of encoding.
113 if re.match(MULTILINE_ASSIGNMENT_REGEX, line): 218 with self.open_with_encoding(filename, encoding) as input_file:
114 return True 219 input_file.read()
115 220
116 if re.match(r'^[()\[\]{}\s]+$', line): 221 return encoding
117 return True 222 except (SyntaxError, LookupError, UnicodeDecodeError):
118 223 return 'latin-1'
119 return False 224
120 225 def update_whitelist(self, new_whitelist, extend_default=True):
121 226 """Updates the whitelist."""
122 def commented_out_code_line_numbers(source, aggressive=True): 227 if extend_default:
123 """Yield line numbers of commented-out code.""" 228 self.WHITELIST_REGEX = re.compile(
124 sio = io.StringIO(source) 229 r'|'.join(list(self.DEFAULT_WHITELIST) + new_whitelist),
125 try: 230 flags=re.IGNORECASE)
126 for token in tokenize.generate_tokens(sio.readline): 231 else:
127 token_type = token[0] 232 self.WHITELIST_REGEX = re.compile(
128 start_row = token[2][0] 233 r'|'.join(new_whitelist),
129 line = token[4] 234 flags=re.IGNORECASE)
130
131 if (token_type == tokenize.COMMENT and
132 line.lstrip().startswith('#') and
133 not line.lstrip().startswith('##') and
134 # modified from original file (line added)
135 comment_contains_code(line, aggressive)):
136 yield start_row
137 except (tokenize.TokenError, IndentationError):
138 pass
139
140
141 def filter_commented_out_code(source, aggressive=True):
142 """Yield code with commented out code removed."""
143 marked_lines = list(commented_out_code_line_numbers(source,
144 aggressive))
145 sio = io.StringIO(source)
146 previous_line = ''
147 for line_number, line in enumerate(sio.readlines(), start=1):
148 if (line_number not in marked_lines or
149 previous_line.rstrip().endswith('\\')):
150 yield line
151 previous_line = line
152
153
154 def fix_file(filename, args, standard_out):
155 """Run filter_commented_out_code() on file."""
156 encoding = detect_encoding(filename)
157 with open_with_encoding(filename, encoding=encoding) as input_file:
158 source = input_file.read()
159
160 filtered_source = ''.join(filter_commented_out_code(source,
161 args.aggressive))
162
163 if source != filtered_source:
164 if args.in_place:
165 with open_with_encoding(filename, mode='w',
166 encoding=encoding) as output_file:
167 output_file.write(filtered_source)
168 else:
169 diff = difflib.unified_diff(
170 source.splitlines(),
171 filtered_source.splitlines(),
172 'before/' + filename,
173 'after/' + filename,
174 lineterm='')
175 standard_out.write('\n'.join(list(diff) + ['']))
176
177
178 def open_with_encoding(filename, encoding, mode='r'):
179 """Return opened file with a specific encoding."""
180 return io.open(filename, mode=mode, encoding=encoding,
181 newline='') # Preserve line endings
182
183
184 def detect_encoding(filename):
185 """Return file encoding."""
186 try:
187 with open(filename, 'rb') as input_file:
188 from lib2to3.pgen2 import tokenize as lib2to3_tokenize
189 encoding = lib2to3_tokenize.detect_encoding(input_file.readline)[0]
190
191 # Check for correctness of encoding.
192 with open_with_encoding(filename, encoding) as input_file:
193 input_file.read()
194
195 return encoding
196 except (SyntaxError, LookupError, UnicodeDecodeError):
197 return 'latin-1'
198 235
199 236
200 def main(argv, standard_out, standard_error): 237 def main(argv, standard_out, standard_error):
201 """Main entry point.""" 238 """Main entry point."""
202 import argparse 239 import argparse
206 parser.add_argument('-r', '--recursive', action='store_true', 243 parser.add_argument('-r', '--recursive', action='store_true',
207 help='drill down directories recursively') 244 help='drill down directories recursively')
208 parser.add_argument('-a', '--aggressive', action='store_true', 245 parser.add_argument('-a', '--aggressive', action='store_true',
209 help='make more aggressive changes; ' 246 help='make more aggressive changes; '
210 'this may result in false positives') 247 'this may result in false positives')
248 parser.add_argument('-e', '--error', action="store_true",
249 help="Exit code based on result of check")
211 parser.add_argument('--version', action='version', 250 parser.add_argument('--version', action='version',
212 version='%(prog)s ' + __version__) 251 version='%(prog)s ' + __version__)
252 parser.add_argument('--whitelist', action="store",
253 help=(
254 'String of "#" separated comment beginnings to whitelist. '
255 'Single parts are interpreted as regex. '
256 'OVERWRITING the default whitelist: {}'
257 ).format(Eradicator.DEFAULT_WHITELIST))
258 parser.add_argument('--whitelist-extend', action="store",
259 help=(
260 'String of "#" separated comment beginnings to whitelist '
261 'Single parts are interpreted as regex. '
262 'Overwrites --whitelist. '
263 'EXTENDING the default whitelist: {} '
264 ).format(Eradicator.DEFAULT_WHITELIST))
213 parser.add_argument('files', nargs='+', help='files to format') 265 parser.add_argument('files', nargs='+', help='files to format')
214 266
215 args = parser.parse_args(argv[1:]) 267 args = parser.parse_args(argv[1:])
216 268
269 eradicator = Eradicator()
270
271 if args.whitelist_extend:
272 eradicator.update_whitelist(args.whitelist_extend.split('#'), True)
273 elif args.whitelist:
274 eradicator.update_whitelist(args.whitelist.split('#'), False)
275
217 filenames = list(set(args.files)) 276 filenames = list(set(args.files))
277 change_or_error = False
218 while filenames: 278 while filenames:
219 name = filenames.pop(0) 279 name = filenames.pop(0)
220 if args.recursive and os.path.isdir(name): 280 if args.recursive and os.path.isdir(name):
221 for root, directories, children in os.walk('{}'.format(name)): 281 for root, directories, children in os.walk('{}'.format(name)):
222 filenames += [os.path.join(root, f) for f in children 282 filenames += [os.path.join(root, f) for f in children
224 not f.startswith('.')] 284 not f.startswith('.')]
225 directories[:] = [d for d in directories 285 directories[:] = [d for d in directories
226 if not d.startswith('.')] 286 if not d.startswith('.')]
227 else: 287 else:
228 try: 288 try:
229 fix_file(name, args=args, standard_out=standard_out) 289 change_or_error = eradicator.fix_file(name, args=args, standard_out=standard_out) or change_or_error
230 except OSError as exception: 290 except IOError as exception:
231 print('{}'.format(exception), file=standard_error) 291 print('{}'.format(exception), file=standard_error)
292 change_or_error = True
293 if change_or_error and args.error:
294 return 1

eric ide

mercurial