|
1 # Copyright (C) 2012-2015 Steven Myint |
|
2 # |
|
3 # Permission is hereby granted, free of charge, to any person obtaining |
|
4 # a copy of this software and associated documentation files (the |
|
5 # "Software"), to deal in the Software without restriction, including |
|
6 # without limitation the rights to use, copy, modify, merge, publish, |
|
7 # distribute, sublicense, and/or sell copies of the Software, and to |
|
8 # permit persons to whom the Software is furnished to do so, subject to |
|
9 # the following conditions: |
|
10 # |
|
11 # The above copyright notice and this permission notice shall be included |
|
12 # in all copies or substantial portions of the Software. |
|
13 # |
|
14 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
|
15 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
|
16 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
|
17 # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY |
|
18 # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
|
19 # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
|
20 # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
|
21 |
|
22 """Removes commented-out Python code.""" |
|
23 |
|
24 from __future__ import print_function |
|
25 from __future__ import unicode_literals |
|
26 |
|
27 import difflib |
|
28 import io |
|
29 import os |
|
30 import re |
|
31 import tokenize |
|
32 |
|
33 __version__ = '1.0' |
|
34 |
|
35 |
|
36 MULTILINE_ASSIGNMENT_REGEX = re.compile(r'^\s*\w+\s*=.*[(\[{]$') |
|
37 PARTIAL_DICTIONARY_REGEX = re.compile(r'^\s*[\'"]\w+[\'"]\s*:.+[,{]\s*$') |
|
38 |
|
39 |
|
40 def comment_contains_code(line, aggressive=True): |
|
41 """Return True comment contains code.""" |
|
42 line = line.lstrip() |
|
43 if not line.startswith('#'): |
|
44 return False |
|
45 |
|
46 line = line.lstrip(' \t\v\n#').strip() |
|
47 |
|
48 # Ignore non-comment related hashes. For example, "# Issue #999". |
|
49 if re.search('#[0-9]', line): |
|
50 return False |
|
51 |
|
52 if line.startswith('pylint:'): |
|
53 return False |
|
54 |
|
55 if re.match(r'.*?coding[:=][ \t]*([-_.a-zA-Z0-9]+)', line): |
|
56 return False |
|
57 |
|
58 # Check that this is possibly code. |
|
59 for symbol in list('()[]{}:=%') + ['print', 'return', 'break', 'continue', |
|
60 'import']: |
|
61 if symbol in line: |
|
62 break |
|
63 else: |
|
64 return False |
|
65 |
|
66 if multiline_case(line, aggressive=aggressive): |
|
67 return True |
|
68 |
|
69 symbol_list = [r'elif\s+.*', 'else', 'try', |
|
70 'finally', r'except\s+.*'] |
|
71 if aggressive: |
|
72 symbol_list.append(r'if\s+.*') |
|
73 |
|
74 for symbol in symbol_list: |
|
75 if re.match(r'^\s*' + symbol + r'\s*:\s*$', line): |
|
76 return True |
|
77 |
|
78 line = re.sub(r'^(print|return)\b\s*', '', line) |
|
79 |
|
80 if re.match(PARTIAL_DICTIONARY_REGEX, line): |
|
81 return True |
|
82 |
|
83 try: |
|
84 compile(line, '<string>', 'exec') |
|
85 return True |
|
86 except (SyntaxError, TypeError, UnicodeDecodeError): |
|
87 return False |
|
88 |
|
89 |
|
90 def multiline_case(line, aggressive=True): |
|
91 """Return True if line is probably part of some multiline code.""" |
|
92 if aggressive: |
|
93 for ending in ')]}': |
|
94 if line.endswith(ending + ':'): |
|
95 return True |
|
96 |
|
97 if line.strip() == ending + ',': |
|
98 return True |
|
99 |
|
100 # Check whether a function/method definition with return value |
|
101 # annotation |
|
102 if re.search(r"def .+\)[\s]+->[\s]+[a-zA-Z_][a-zA-Z0-9_]*:$", line): |
|
103 return True |
|
104 |
|
105 # Check weather a with statement |
|
106 if re.search(r"with .+ as [a-zA-Z_][a-zA-Z0-9_]*:$", line): |
|
107 return True |
|
108 |
|
109 # Check weather a for statement |
|
110 if re.search(r"for [a-zA-Z_][a-zA-Z0-9_]* in .+:$", line): |
|
111 return True |
|
112 |
|
113 if line.endswith('\\'): |
|
114 return True |
|
115 |
|
116 if re.match(MULTILINE_ASSIGNMENT_REGEX, line): |
|
117 return True |
|
118 |
|
119 if re.match(r'^[()\[\]{}\s]+$', line): |
|
120 return True |
|
121 |
|
122 return False |
|
123 |
|
124 |
|
125 def commented_out_code_line_numbers(source, aggressive=True): |
|
126 """Yield line numbers of commented-out code.""" |
|
127 sio = io.StringIO(source) |
|
128 try: |
|
129 for token in tokenize.generate_tokens(sio.readline): |
|
130 token_type = token[0] |
|
131 start_row = token[2][0] |
|
132 line = token[4] |
|
133 |
|
134 if (token_type == tokenize.COMMENT and |
|
135 line.lstrip().startswith('#') and |
|
136 not line.lstrip().startswith('##') and |
|
137 # modified from original file (line added) |
|
138 comment_contains_code(line, aggressive)): |
|
139 yield start_row |
|
140 except (tokenize.TokenError, IndentationError): |
|
141 pass |
|
142 |
|
143 |
|
144 def filter_commented_out_code(source, aggressive=True): |
|
145 """Yield code with commented out code removed.""" |
|
146 marked_lines = list(commented_out_code_line_numbers(source, |
|
147 aggressive)) |
|
148 sio = io.StringIO(source) |
|
149 previous_line = '' |
|
150 for line_number, line in enumerate(sio.readlines(), start=1): |
|
151 if (line_number not in marked_lines or |
|
152 previous_line.rstrip().endswith('\\')): |
|
153 yield line |
|
154 previous_line = line |
|
155 |
|
156 |
|
157 def fix_file(filename, args, standard_out): |
|
158 """Run filter_commented_out_code() on file.""" |
|
159 encoding = detect_encoding(filename) |
|
160 with open_with_encoding(filename, encoding=encoding) as input_file: |
|
161 source = input_file.read() |
|
162 |
|
163 filtered_source = ''.join(filter_commented_out_code(source, |
|
164 args.aggressive)) |
|
165 |
|
166 if source != filtered_source: |
|
167 if args.in_place: |
|
168 with open_with_encoding(filename, mode='w', |
|
169 encoding=encoding) as output_file: |
|
170 output_file.write(filtered_source) |
|
171 else: |
|
172 diff = difflib.unified_diff( |
|
173 source.splitlines(), |
|
174 filtered_source.splitlines(), |
|
175 'before/' + filename, |
|
176 'after/' + filename, |
|
177 lineterm='') |
|
178 standard_out.write('\n'.join(list(diff) + [''])) |
|
179 |
|
180 |
|
181 def open_with_encoding(filename, encoding, mode='r'): |
|
182 """Return opened file with a specific encoding.""" |
|
183 return io.open(filename, mode=mode, encoding=encoding, |
|
184 newline='') # Preserve line endings |
|
185 |
|
186 |
|
187 def detect_encoding(filename): |
|
188 """Return file encoding.""" |
|
189 try: |
|
190 with open(filename, 'rb') as input_file: |
|
191 from lib2to3.pgen2 import tokenize as lib2to3_tokenize |
|
192 encoding = lib2to3_tokenize.detect_encoding(input_file.readline)[0] |
|
193 |
|
194 # Check for correctness of encoding. |
|
195 with open_with_encoding(filename, encoding) as input_file: |
|
196 input_file.read() |
|
197 |
|
198 return encoding |
|
199 except (SyntaxError, LookupError, UnicodeDecodeError): |
|
200 return 'latin-1' |
|
201 |
|
202 |
|
203 def main(argv, standard_out, standard_error): |
|
204 """Main entry point.""" |
|
205 import argparse |
|
206 parser = argparse.ArgumentParser(description=__doc__, prog='eradicate') |
|
207 parser.add_argument('-i', '--in-place', action='store_true', |
|
208 help='make changes to files instead of printing diffs') |
|
209 parser.add_argument('-r', '--recursive', action='store_true', |
|
210 help='drill down directories recursively') |
|
211 parser.add_argument('-a', '--aggressive', action='store_true', |
|
212 help='make more aggressive changes; ' |
|
213 'this may result in false positives') |
|
214 parser.add_argument('--version', action='version', |
|
215 version='%(prog)s ' + __version__) |
|
216 parser.add_argument('files', nargs='+', help='files to format') |
|
217 |
|
218 args = parser.parse_args(argv[1:]) |
|
219 |
|
220 filenames = list(set(args.files)) |
|
221 while filenames: |
|
222 name = filenames.pop(0) |
|
223 if args.recursive and os.path.isdir(name): |
|
224 for root, directories, children in os.walk('{}'.format(name)): |
|
225 filenames += [os.path.join(root, f) for f in children |
|
226 if f.endswith('.py') and |
|
227 not f.startswith('.')] |
|
228 directories[:] = [d for d in directories |
|
229 if not d.startswith('.')] |
|
230 else: |
|
231 try: |
|
232 fix_file(name, args=args, standard_out=standard_out) |
|
233 except IOError as exception: |
|
234 print('{}'.format(exception), file=standard_error) |