eric6/Plugins/CheckerPlugins/CodeStyleChecker/eradicate.py

branch
maintenance
changeset 8043
0acf98cd089a
parent 7980
2c3f14a3c595
equal deleted inserted replaced
7991:866adc8c315b 8043:0acf98cd089a
1 # Copyright (C) 2012-2015 Steven Myint 1 # Copyright (C) 2012-2018 Steven Myint
2 # 2 #
3 # Permission is hereby granted, free of charge, to any person obtaining 3 # Permission is hereby granted, free of charge, to any person obtaining
4 # a copy of this software and associated documentation files (the 4 # a copy of this software and associated documentation files (the
5 # "Software"), to deal in the Software without restriction, including 5 # "Software"), to deal in the Software without restriction, including
6 # without limitation the rights to use, copy, modify, merge, publish, 6 # without limitation the rights to use, copy, modify, merge, publish,
19 # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 19 # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
20 # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 20 # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 21
22 """Removes commented-out Python code.""" 22 """Removes commented-out Python code."""
23 23
24 from __future__ import print_function
25 from __future__ import unicode_literals
26
24 import difflib 27 import difflib
25 import io 28 import io
26 import os 29 import os
27 import re 30 import re
28 import tokenize 31 import tokenize
29 32
30 __version__ = '1.0' 33 __version__ = '2.0.0'
31 34
32 35
33 MULTILINE_ASSIGNMENT_REGEX = re.compile(r'^\s*\w+\s*=.*[(\[{]$') 36 class Eradicator(object):
34 PARTIAL_DICTIONARY_REGEX = re.compile(r'^\s*[\'"]\w+[\'"]\s*:.+[,{]\s*$') 37 """Eradicate comments."""
35 38 BRACKET_REGEX = re.compile(r'^[()\[\]{}\s]+$')
36 39 CODING_COMMENT_REGEX = re.compile(r'.*?coding[:=][ \t]*([-_.a-zA-Z0-9]+)')
37 def comment_contains_code(line, aggressive=True): 40 DEF_STATEMENT_REGEX = re.compile(r"def .+\)[\s]+->[\s]+[a-zA-Z_][a-zA-Z0-9_]*:$")
38 """Return True comment contains code.""" 41 FOR_STATEMENT_REGEX = re.compile(r"for [a-zA-Z_][a-zA-Z0-9_]* in .+:$")
39 line = line.lstrip() 42 HASH_NUMBER = re.compile(r'#[0-9]')
40 if not line.startswith('#'): 43 MULTILINE_ASSIGNMENT_REGEX = re.compile(r'^\s*\w+\s*=.*[(\[{]$')
44 PARTIAL_DICTIONARY_REGEX = re.compile(r'^\s*[\'"]\w+[\'"]\s*:.+[,{]\s*$')
45 PRINT_RETURN_REGEX = re.compile(r'^(print|return)\b\s*')
46 WITH_STATEMENT_REGEX = re.compile(r"with .+ as [a-zA-Z_][a-zA-Z0-9_]*:$")
47
48 CODE_INDICATORS = ['(', ')', '[', ']', '{', '}', ':', '=', '%',
49 'print', 'return', 'break', 'continue', 'import']
50 CODE_KEYWORDS = [r'elif\s+.*', 'else', 'try', 'finally', r'except\s+.*']
51 CODE_KEYWORDS_AGGR = CODE_KEYWORDS + [r'if\s+.*']
52 WHITESPACE_HASH = ' \t\v\n#'
53
54 DEFAULT_WHITELIST = (
55 r'pylint',
56 r'pyright',
57 r'noqa',
58 r'type:\s*ignore',
59 r'fmt:\s*(on|off)',
60 r'TODO',
61 r'FIXME',
62 r'XXX'
63 )
64 WHITELIST_REGEX = re.compile(r'|'.join(DEFAULT_WHITELIST), flags=re.IGNORECASE)
65
66 def comment_contains_code(self, line, aggressive=True):
67 """Return True comment contains code."""
68 line = line.lstrip()
69 if not line.startswith('#'):
70 return False
71
72 line = line.lstrip(self.WHITESPACE_HASH).strip()
73
74 # Ignore non-comment related hashes. For example, "# Issue #999".
75 if self.HASH_NUMBER.search(line):
76 return False
77
78 # Ignore whitelisted comments
79 if self.WHITELIST_REGEX.search(line):
80 return False
81
82 if self.CODING_COMMENT_REGEX.match(line):
83 return False
84
85 # Check that this is possibly code.
86 for symbol in self.CODE_INDICATORS:
87 if symbol in line:
88 break
89 else:
90 return False
91
92 if self.multiline_case(line, aggressive=aggressive):
93 return True
94
95 for symbol in self.CODE_KEYWORDS_AGGR if aggressive else self.CODE_KEYWORDS:
96 if re.match(r'^\s*' + symbol + r'\s*:\s*$', line):
97 return True
98
99 line = self.PRINT_RETURN_REGEX.sub('', line)
100
101 if self.PARTIAL_DICTIONARY_REGEX.match(line):
102 return True
103
104 try:
105 compile(line, '<string>', 'exec')
106 except (SyntaxError, TypeError, UnicodeDecodeError):
107 return False
108 else:
109 return True
110
111
112 def multiline_case(self, line, aggressive=True):
113 """Return True if line is probably part of some multiline code."""
114 if aggressive:
115 for ending in ')]}':
116 if line.endswith(ending + ':'):
117 return True
118
119 if line.strip() == ending + ',':
120 return True
121
122 # Check whether a function/method definition with return value
123 # annotation
124 if self.DEF_STATEMENT_REGEX.search(line):
125 return True
126
127 # Check weather a with statement
128 if self.WITH_STATEMENT_REGEX.search(line):
129 return True
130
131 # Check weather a for statement
132 if self.FOR_STATEMENT_REGEX.search(line):
133 return True
134
135 if line.endswith('\\'):
136 return True
137
138 if self.MULTILINE_ASSIGNMENT_REGEX.match(line):
139 return True
140
141 if self.BRACKET_REGEX.match(line):
142 return True
143
41 return False 144 return False
42 145
43 line = line.lstrip(' \t\v\n#').strip() 146
44 147 def commented_out_code_line_numbers(self, source, aggressive=True):
45 # Ignore non-comment related hashes. For example, "# Issue #999". 148 """Yield line numbers of commented-out code."""
46 if re.search('#[0-9]', line): 149 sio = io.StringIO(source)
47 return False 150 try:
48 151 for token in tokenize.generate_tokens(sio.readline):
49 if line.startswith('pylint:'): 152 token_type = token[0]
50 return False 153 start_row = token[2][0]
51 154 line = token[4]
52 if re.match(r'.*?coding[:=][ \t]*([-_.a-zA-Z0-9]+)', line): 155
53 return False 156 if (token_type == tokenize.COMMENT and
54 157 line.lstrip().startswith('#') and
55 # Check that this is possibly code. 158 self.comment_contains_code(line, aggressive)):
56 for symbol in list('()[]{}:=%') + ['print', 'return', 'break', 'continue', 159 yield start_row
57 'import']: 160 except (tokenize.TokenError, IndentationError):
58 if symbol in line: 161 pass
59 break 162
60 else: 163
61 return False 164 def filter_commented_out_code(self, source, aggressive=True):
62 165 """Yield code with commented out code removed."""
63 if multiline_case(line, aggressive=aggressive): 166 marked_lines = list(self.commented_out_code_line_numbers(source,
64 return True 167 aggressive))
65 168 sio = io.StringIO(source)
66 symbol_list = [r'elif\s+.*', 'else', 'try', 169 previous_line = ''
67 'finally', r'except\s+.*'] 170 for line_number, line in enumerate(sio.readlines(), start=1):
68 if aggressive: 171 if (line_number not in marked_lines or
69 symbol_list.append(r'if\s+.*') 172 previous_line.rstrip().endswith('\\')):
70 173 yield line
71 for symbol in symbol_list: 174 previous_line = line
72 if re.match(r'^\s*' + symbol + r'\s*:\s*$', line): 175
73 return True 176
74 177 def fix_file(self, filename, args, standard_out):
75 line = re.sub(r'^(print|return)\b\s*', '', line) 178 """Run filter_commented_out_code() on file."""
76 179 encoding = self.detect_encoding(filename)
77 if re.match(PARTIAL_DICTIONARY_REGEX, line): 180 with self.open_with_encoding(filename, encoding=encoding) as input_file:
78 return True 181 source = input_file.read()
79 182
80 try: 183 filtered_source = ''.join(self.filter_commented_out_code(source,
81 compile(line, '<string>', 'exec') 184 args.aggressive))
82 return True 185
83 except (SyntaxError, TypeError, UnicodeDecodeError): 186 if source != filtered_source:
84 return False 187 if args.in_place:
85 188 with self.open_with_encoding(filename, mode='w',
86 189 encoding=encoding) as output_file:
87 def multiline_case(line, aggressive=True): 190 output_file.write(filtered_source)
88 """Return True if line is probably part of some multiline code.""" 191 else:
89 if aggressive: 192 diff = difflib.unified_diff(
90 for ending in ')]}': 193 source.splitlines(),
91 if line.endswith(ending + ':'): 194 filtered_source.splitlines(),
92 return True 195 'before/' + filename,
93 196 'after/' + filename,
94 if line.strip() == ending + ',': 197 lineterm='')
95 return True 198 standard_out.write('\n'.join(list(diff) + ['']))
96 199 return True
97 # Check whether a function/method definition with return value 200
98 # annotation 201
99 if re.search(r"def .+\)[\s]+->[\s]+[a-zA-Z_][a-zA-Z0-9_]*:$", line): 202 def open_with_encoding(self, filename, encoding, mode='r'):
100 return True 203 """Return opened file with a specific encoding."""
101 204 return io.open(filename, mode=mode, encoding=encoding,
102 # Check weather a with statement 205 newline='') # Preserve line endings
103 if re.search(r"with .+ as [a-zA-Z_][a-zA-Z0-9_]*:$", line): 206
104 return True 207
105 208 def detect_encoding(self, filename):
106 # Check weather a for statement 209 """Return file encoding."""
107 if re.search(r"for [a-zA-Z_][a-zA-Z0-9_]* in .+:$", line): 210 try:
108 return True 211 with open(filename, 'rb') as input_file:
109 212 from lib2to3.pgen2 import tokenize as lib2to3_tokenize
110 if line.endswith('\\'): 213 encoding = lib2to3_tokenize.detect_encoding(input_file.readline)[0]
111 return True 214
112 215 # Check for correctness of encoding.
113 if re.match(MULTILINE_ASSIGNMENT_REGEX, line): 216 with self.open_with_encoding(filename, encoding) as input_file:
114 return True 217 input_file.read()
115 218
116 if re.match(r'^[()\[\]{}\s]+$', line): 219 return encoding
117 return True 220 except (SyntaxError, LookupError, UnicodeDecodeError):
118 221 return 'latin-1'
119 return False 222
120 223 def update_whitelist(self, new_whitelist, extend_default=True):
121 224 """Updates the whitelist."""
122 def commented_out_code_line_numbers(source, aggressive=True): 225 if extend_default:
123 """Yield line numbers of commented-out code.""" 226 self.WHITELIST_REGEX = re.compile(
124 sio = io.StringIO(source) 227 r'|'.join(list(self.DEFAULT_WHITELIST) + new_whitelist),
125 try: 228 flags=re.IGNORECASE)
126 for token in tokenize.generate_tokens(sio.readline): 229 else:
127 token_type = token[0] 230 self.WHITELIST_REGEX = re.compile(
128 start_row = token[2][0] 231 r'|'.join(new_whitelist),
129 line = token[4] 232 flags=re.IGNORECASE)
130
131 if (token_type == tokenize.COMMENT and
132 line.lstrip().startswith('#') and
133 not line.lstrip().startswith('##') and
134 # modified from original file (line added)
135 comment_contains_code(line, aggressive)):
136 yield start_row
137 except (tokenize.TokenError, IndentationError):
138 pass
139
140
141 def filter_commented_out_code(source, aggressive=True):
142 """Yield code with commented out code removed."""
143 marked_lines = list(commented_out_code_line_numbers(source,
144 aggressive))
145 sio = io.StringIO(source)
146 previous_line = ''
147 for line_number, line in enumerate(sio.readlines(), start=1):
148 if (line_number not in marked_lines or
149 previous_line.rstrip().endswith('\\')):
150 yield line
151 previous_line = line
152
153
154 def fix_file(filename, args, standard_out):
155 """Run filter_commented_out_code() on file."""
156 encoding = detect_encoding(filename)
157 with open_with_encoding(filename, encoding=encoding) as input_file:
158 source = input_file.read()
159
160 filtered_source = ''.join(filter_commented_out_code(source,
161 args.aggressive))
162
163 if source != filtered_source:
164 if args.in_place:
165 with open_with_encoding(filename, mode='w',
166 encoding=encoding) as output_file:
167 output_file.write(filtered_source)
168 else:
169 diff = difflib.unified_diff(
170 source.splitlines(),
171 filtered_source.splitlines(),
172 'before/' + filename,
173 'after/' + filename,
174 lineterm='')
175 standard_out.write('\n'.join(list(diff) + ['']))
176
177
178 def open_with_encoding(filename, encoding, mode='r'):
179 """Return opened file with a specific encoding."""
180 return io.open(filename, mode=mode, encoding=encoding,
181 newline='') # Preserve line endings
182
183
184 def detect_encoding(filename):
185 """Return file encoding."""
186 try:
187 with open(filename, 'rb') as input_file:
188 from lib2to3.pgen2 import tokenize as lib2to3_tokenize
189 encoding = lib2to3_tokenize.detect_encoding(input_file.readline)[0]
190
191 # Check for correctness of encoding.
192 with open_with_encoding(filename, encoding) as input_file:
193 input_file.read()
194
195 return encoding
196 except (SyntaxError, LookupError, UnicodeDecodeError):
197 return 'latin-1'
198 233
199 234
200 def main(argv, standard_out, standard_error): 235 def main(argv, standard_out, standard_error):
201 """Main entry point.""" 236 """Main entry point."""
202 import argparse 237 import argparse
206 parser.add_argument('-r', '--recursive', action='store_true', 241 parser.add_argument('-r', '--recursive', action='store_true',
207 help='drill down directories recursively') 242 help='drill down directories recursively')
208 parser.add_argument('-a', '--aggressive', action='store_true', 243 parser.add_argument('-a', '--aggressive', action='store_true',
209 help='make more aggressive changes; ' 244 help='make more aggressive changes; '
210 'this may result in false positives') 245 'this may result in false positives')
246 parser.add_argument('-e', '--error', action="store_true",
247 help="Exit code based on result of check")
211 parser.add_argument('--version', action='version', 248 parser.add_argument('--version', action='version',
212 version='%(prog)s ' + __version__) 249 version='%(prog)s ' + __version__)
250 parser.add_argument('--whitelist', action="store",
251 help=(
252 'String of "#" separated comment beginnings to whitelist. '
253 'Single parts are interpreted as regex. '
254 'OVERWRITING the default whitelist: {}'
255 ).format(Eradicator.DEFAULT_WHITELIST))
256 parser.add_argument('--whitelist-extend', action="store",
257 help=(
258 'String of "#" separated comment beginnings to whitelist '
259 'Single parts are interpreted as regex. '
260 'Overwrites --whitelist. '
261 'EXTENDING the default whitelist: {} '
262 ).format(Eradicator.DEFAULT_WHITELIST))
213 parser.add_argument('files', nargs='+', help='files to format') 263 parser.add_argument('files', nargs='+', help='files to format')
214 264
215 args = parser.parse_args(argv[1:]) 265 args = parser.parse_args(argv[1:])
216 266
267 eradicator = Eradicator()
268
269 if args.whitelist_extend:
270 eradicator.update_whitelist(args.whitelist_extend.split('#'), True)
271 elif args.whitelist:
272 eradicator.update_whitelist(args.whitelist.split('#'), False)
273
217 filenames = list(set(args.files)) 274 filenames = list(set(args.files))
275 change_or_error = False
218 while filenames: 276 while filenames:
219 name = filenames.pop(0) 277 name = filenames.pop(0)
220 if args.recursive and os.path.isdir(name): 278 if args.recursive and os.path.isdir(name):
221 for root, directories, children in os.walk('{}'.format(name)): 279 for root, directories, children in os.walk('{}'.format(name)):
222 filenames += [os.path.join(root, f) for f in children 280 filenames += [os.path.join(root, f) for f in children
224 not f.startswith('.')] 282 not f.startswith('.')]
225 directories[:] = [d for d in directories 283 directories[:] = [d for d in directories
226 if not d.startswith('.')] 284 if not d.startswith('.')]
227 else: 285 else:
228 try: 286 try:
229 fix_file(name, args=args, standard_out=standard_out) 287 change_or_error = eradicator.fix_file(name, args=args, standard_out=standard_out) or change_or_error
230 except OSError as exception: 288 except IOError as exception:
231 print('{}'.format(exception), file=standard_error) 289 print('{}'.format(exception), file=standard_error)
290 change_or_error = True
291 if change_or_error and args.error:
292 return 1

eric ide

mercurial