eric6/Plugins/CheckerPlugins/CodeStyleChecker/eradicate.py

changeset 7040
f89952e5fc11
child 7639
422fd05e9c91
equal deleted inserted replaced
7039:73cb6384a71f 7040:f89952e5fc11
1 # Copyright (C) 2012-2015 Steven Myint
2 #
3 # Permission is hereby granted, free of charge, to any person obtaining
4 # a copy of this software and associated documentation files (the
5 # "Software"), to deal in the Software without restriction, including
6 # without limitation the rights to use, copy, modify, merge, publish,
7 # distribute, sublicense, and/or sell copies of the Software, and to
8 # permit persons to whom the Software is furnished to do so, subject to
9 # the following conditions:
10 #
11 # The above copyright notice and this permission notice shall be included
12 # in all copies or substantial portions of the Software.
13 #
14 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
17 # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
18 # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
19 # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
20 # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21
22 """Removes commented-out Python code."""
23
24 from __future__ import print_function
25 from __future__ import unicode_literals
26
27 import difflib
28 import io
29 import os
30 import re
31 import tokenize
32
33 __version__ = '1.0'
34
35
36 MULTILINE_ASSIGNMENT_REGEX = re.compile(r'^\s*\w+\s*=.*[(\[{]$')
37 PARTIAL_DICTIONARY_REGEX = re.compile(r'^\s*[\'"]\w+[\'"]\s*:.+[,{]\s*$')
38
39
40 def comment_contains_code(line, aggressive=True):
41 """Return True comment contains code."""
42 line = line.lstrip()
43 if not line.startswith('#'):
44 return False
45
46 line = line.lstrip(' \t\v\n#').strip()
47
48 # Ignore non-comment related hashes. For example, "# Issue #999".
49 if re.search('#[0-9]', line):
50 return False
51
52 if line.startswith('pylint:'):
53 return False
54
55 if re.match(r'.*?coding[:=][ \t]*([-_.a-zA-Z0-9]+)', line):
56 return False
57
58 # Check that this is possibly code.
59 for symbol in list('()[]{}:=%') + ['print', 'return', 'break', 'continue',
60 'import']:
61 if symbol in line:
62 break
63 else:
64 return False
65
66 if multiline_case(line, aggressive=aggressive):
67 return True
68
69 symbol_list = [r'elif\s+.*', 'else', 'try',
70 'finally', r'except\s+.*']
71 if aggressive:
72 symbol_list.append(r'if\s+.*')
73
74 for symbol in symbol_list:
75 if re.match(r'^\s*' + symbol + r'\s*:\s*$', line):
76 return True
77
78 line = re.sub(r'^(print|return)\b\s*', '', line)
79
80 if re.match(PARTIAL_DICTIONARY_REGEX, line):
81 return True
82
83 try:
84 compile(line, '<string>', 'exec')
85 return True
86 except (SyntaxError, TypeError, UnicodeDecodeError):
87 return False
88
89
90 def multiline_case(line, aggressive=True):
91 """Return True if line is probably part of some multiline code."""
92 if aggressive:
93 for ending in ')]}':
94 if line.endswith(ending + ':'):
95 return True
96
97 if line.strip() == ending + ',':
98 return True
99
100 # Check whether a function/method definition with return value
101 # annotation
102 if re.search(r"def .+\)[\s]+->[\s]+[a-zA-Z_][a-zA-Z0-9_]*:$", line):
103 return True
104
105 # Check weather a with statement
106 if re.search(r"with .+ as [a-zA-Z_][a-zA-Z0-9_]*:$", line):
107 return True
108
109 # Check weather a for statement
110 if re.search(r"for [a-zA-Z_][a-zA-Z0-9_]* in .+:$", line):
111 return True
112
113 if line.endswith('\\'):
114 return True
115
116 if re.match(MULTILINE_ASSIGNMENT_REGEX, line):
117 return True
118
119 if re.match(r'^[()\[\]{}\s]+$', line):
120 return True
121
122 return False
123
124
125 def commented_out_code_line_numbers(source, aggressive=True):
126 """Yield line numbers of commented-out code."""
127 sio = io.StringIO(source)
128 try:
129 for token in tokenize.generate_tokens(sio.readline):
130 token_type = token[0]
131 start_row = token[2][0]
132 line = token[4]
133
134 if (token_type == tokenize.COMMENT and
135 line.lstrip().startswith('#') and
136 not line.lstrip().startswith('##') and
137 # modified from original file (line added)
138 comment_contains_code(line, aggressive)):
139 yield start_row
140 except (tokenize.TokenError, IndentationError):
141 pass
142
143
144 def filter_commented_out_code(source, aggressive=True):
145 """Yield code with commented out code removed."""
146 marked_lines = list(commented_out_code_line_numbers(source,
147 aggressive))
148 sio = io.StringIO(source)
149 previous_line = ''
150 for line_number, line in enumerate(sio.readlines(), start=1):
151 if (line_number not in marked_lines or
152 previous_line.rstrip().endswith('\\')):
153 yield line
154 previous_line = line
155
156
157 def fix_file(filename, args, standard_out):
158 """Run filter_commented_out_code() on file."""
159 encoding = detect_encoding(filename)
160 with open_with_encoding(filename, encoding=encoding) as input_file:
161 source = input_file.read()
162
163 filtered_source = ''.join(filter_commented_out_code(source,
164 args.aggressive))
165
166 if source != filtered_source:
167 if args.in_place:
168 with open_with_encoding(filename, mode='w',
169 encoding=encoding) as output_file:
170 output_file.write(filtered_source)
171 else:
172 diff = difflib.unified_diff(
173 source.splitlines(),
174 filtered_source.splitlines(),
175 'before/' + filename,
176 'after/' + filename,
177 lineterm='')
178 standard_out.write('\n'.join(list(diff) + ['']))
179
180
181 def open_with_encoding(filename, encoding, mode='r'):
182 """Return opened file with a specific encoding."""
183 return io.open(filename, mode=mode, encoding=encoding,
184 newline='') # Preserve line endings
185
186
187 def detect_encoding(filename):
188 """Return file encoding."""
189 try:
190 with open(filename, 'rb') as input_file:
191 from lib2to3.pgen2 import tokenize as lib2to3_tokenize
192 encoding = lib2to3_tokenize.detect_encoding(input_file.readline)[0]
193
194 # Check for correctness of encoding.
195 with open_with_encoding(filename, encoding) as input_file:
196 input_file.read()
197
198 return encoding
199 except (SyntaxError, LookupError, UnicodeDecodeError):
200 return 'latin-1'
201
202
203 def main(argv, standard_out, standard_error):
204 """Main entry point."""
205 import argparse
206 parser = argparse.ArgumentParser(description=__doc__, prog='eradicate')
207 parser.add_argument('-i', '--in-place', action='store_true',
208 help='make changes to files instead of printing diffs')
209 parser.add_argument('-r', '--recursive', action='store_true',
210 help='drill down directories recursively')
211 parser.add_argument('-a', '--aggressive', action='store_true',
212 help='make more aggressive changes; '
213 'this may result in false positives')
214 parser.add_argument('--version', action='version',
215 version='%(prog)s ' + __version__)
216 parser.add_argument('files', nargs='+', help='files to format')
217
218 args = parser.parse_args(argv[1:])
219
220 filenames = list(set(args.files))
221 while filenames:
222 name = filenames.pop(0)
223 if args.recursive and os.path.isdir(name):
224 for root, directories, children in os.walk('{}'.format(name)):
225 filenames += [os.path.join(root, f) for f in children
226 if f.endswith('.py') and
227 not f.startswith('.')]
228 directories[:] = [d for d in directories
229 if not d.startswith('.')]
230 else:
231 try:
232 fix_file(name, args=args, standard_out=standard_out)
233 except IOError as exception:
234 print('{}'.format(exception), file=standard_error)

eric ide

mercurial