eric: diff eric6/Plugins/CheckerPlugins/CodeStyleChecker/eradicate.py

diff -r ff2971513d6d -r 4fc11172df1b eric6/Plugins/CheckerPlugins/CodeStyleChecker/eradicate.py
--- a/eric6/Plugins/CheckerPlugins/CodeStyleChecker/eradicate.py	Wed Jan 13 17:46:13 2021 +0100
+++ b/eric6/Plugins/CheckerPlugins/CodeStyleChecker/eradicate.py	Wed Jan 13 19:02:30 2021 +0100
@@ -1,4 +1,4 @@
-# Copyright (C) 2012-2015 Steven Myint
+# Copyright (C) 2012-2018 Steven Myint
 #
 # Permission is hereby granted, free of charge, to any person obtaining
 # a copy of this software and associated documentation files (the
@@ -21,180 +21,217 @@

 """Removes commented-out Python code."""

+from __future__ import print_function
+from __future__ import unicode_literals
+
 import difflib
 import io
 import os
 import re
 import tokenize

-__version__ = '1.0'
-
-
-MULTILINE_ASSIGNMENT_REGEX = re.compile(r'^\s*\w+\s*=.*[(\[{]$')
-PARTIAL_DICTIONARY_REGEX = re.compile(r'^\s*[\'"]\w+[\'"]\s*:.+[,{]\s*$')
+__version__ = '2.0.0'


-def comment_contains_code(line, aggressive=True):
-    """Return True comment contains code."""
-    line = line.lstrip()
-    if not line.startswith('#'):
-        return False
-
-    line = line.lstrip(' \t\v\n#').strip()
+class Eradicator(object):
+    """Eradicate comments."""
+    BRACKET_REGEX = re.compile(r'^[()\[\]{}\s]+$')
+    CODING_COMMENT_REGEX = re.compile(r'.*?coding[:=][ \t]*([-_.a-zA-Z0-9]+)')
+    DEF_STATEMENT_REGEX = re.compile(r"def .+\)[\s]+->[\s]+[a-zA-Z_][a-zA-Z0-9_]*:$")
+    FOR_STATEMENT_REGEX = re.compile(r"for [a-zA-Z_][a-zA-Z0-9_]* in .+:$")
+    HASH_NUMBER = re.compile(r'#[0-9]')
+    MULTILINE_ASSIGNMENT_REGEX = re.compile(r'^\s*\w+\s*=.*[(\[{]$')
+    PARTIAL_DICTIONARY_REGEX = re.compile(r'^\s*[\'"]\w+[\'"]\s*:.+[,{]\s*$')
+    PRINT_RETURN_REGEX = re.compile(r'^(print|return)\b\s*')
+    WITH_STATEMENT_REGEX = re.compile(r"with .+ as [a-zA-Z_][a-zA-Z0-9_]*:$")

-    # Ignore non-comment related hashes. For example, "# Issue #999".
-    if re.search('#[0-9]', line):
-        return False
-
-    if line.startswith('pylint:'):
-        return False
-
-    if re.match(r'.*?coding[:=][ \t]*([-_.a-zA-Z0-9]+)', line):
-        return False
+    CODE_INDICATORS = ['(', ')', '[', ']', '{', '}', ':', '=', '%',
+                       'print', 'return', 'break', 'continue', 'import']
+    CODE_KEYWORDS = [r'elif\s+.*', 'else', 'try', 'finally', r'except\s+.*']
+    CODE_KEYWORDS_AGGR = CODE_KEYWORDS + [r'if\s+.*']
+    WHITESPACE_HASH = ' \t\v\n#'

-    # Check that this is possibly code.
-    for symbol in list('()[]{}:=%') + ['print', 'return', 'break', 'continue',
-                                       'import']:
-        if symbol in line:
-            break
-    else:
-        return False
+    DEFAULT_WHITELIST = (
+        r'pylint',
+        r'pyright',
+        r'noqa',
+        r'type:\s*ignore',
+        r'fmt:\s*(on|off)',
+        r'TODO',
+        r'FIXME',
+        r'XXX',
+        r'~ ',
+        r'- ',
+    )
+    WHITELIST_REGEX = re.compile(r'|'.join(DEFAULT_WHITELIST), flags=re.IGNORECASE)
+
+    def comment_contains_code(self, line, aggressive=True):
+        """Return True comment contains code."""
+        line = line.lstrip()
+        if not line.startswith('#'):
+            return False

-    if multiline_case(line, aggressive=aggressive):
-        return True
+        line = line.lstrip(self.WHITESPACE_HASH).strip()
+
+        # Ignore non-comment related hashes. For example, "# Issue #999".
+        if self.HASH_NUMBER.search(line):
+            return False
+
+        # Ignore whitelisted comments
+        if self.WHITELIST_REGEX.search(line):
+            return False

-    symbol_list = [r'elif\s+.*', 'else', 'try',
-                   'finally', r'except\s+.*']
-    if aggressive:
-        symbol_list.append(r'if\s+.*')
+        if self.CODING_COMMENT_REGEX.match(line):
+            return False

-    for symbol in symbol_list:
-        if re.match(r'^\s*' + symbol + r'\s*:\s*$', line):
+        # Check that this is possibly code.
+        for symbol in self.CODE_INDICATORS:
+            if symbol in line:
+                break
+        else:
+            return False
+
+        if self.multiline_case(line, aggressive=aggressive):
             return True

-    line = re.sub(r'^(print|return)\b\s*', '', line)
+        for symbol in self.CODE_KEYWORDS_AGGR if aggressive else self.CODE_KEYWORDS:
+            if re.match(r'^\s*' + symbol + r'\s*:\s*$', line):
+                return True
+
+        line = self.PRINT_RETURN_REGEX.sub('', line)
+
+        if self.PARTIAL_DICTIONARY_REGEX.match(line):
+            return True

-    if re.match(PARTIAL_DICTIONARY_REGEX, line):
-        return True
+        try:
+            compile(line, '<string>', 'exec')
+        except (SyntaxError, TypeError, UnicodeDecodeError):
+            return False
+        else:
+            return True
+
+
+    def multiline_case(self, line, aggressive=True):
+        """Return True if line is probably part of some multiline code."""
+        if aggressive:
+            for ending in ')]}':
+                if line.endswith(ending + ':'):
+                    return True

-    try:
-        compile(line, '<string>', 'exec')
-        return True
-    except (SyntaxError, TypeError, UnicodeDecodeError):
+                if line.strip() == ending + ',':
+                    return True
+
+            # Check whether a function/method definition with return value
+            # annotation
+            if self.DEF_STATEMENT_REGEX.search(line):
+                return True
+
+            # Check weather a with statement
+            if self.WITH_STATEMENT_REGEX.search(line):
+                return True
+
+            # Check weather a for statement
+            if self.FOR_STATEMENT_REGEX.search(line):
+                return True
+
+        if line.endswith('\\'):
+            return True
+
+        if self.MULTILINE_ASSIGNMENT_REGEX.match(line):
+            return True
+
+        if self.BRACKET_REGEX.match(line):
+            return True
+
         return False


-def multiline_case(line, aggressive=True):
-    """Return True if line is probably part of some multiline code."""
-    if aggressive:
-        for ending in ')]}':
-            if line.endswith(ending + ':'):
-                return True
-
-            if line.strip() == ending + ',':
-                return True
-
-        # Check whether a function/method definition with return value
-        # annotation
-        if re.search(r"def .+\)[\s]+->[\s]+[a-zA-Z_][a-zA-Z0-9_]*:$", line):
-            return True
+    def commented_out_code_line_numbers(self, source, aggressive=True):
+        """Yield line numbers of commented-out code."""
+        sio = io.StringIO(source)
+        try:
+            for token in tokenize.generate_tokens(sio.readline):
+                token_type = token[0]
+                start_row = token[2][0]
+                line = token[4]

-        # Check weather a with statement
-        if re.search(r"with .+ as [a-zA-Z_][a-zA-Z0-9_]*:$", line):
-            return True
-
-        # Check weather a for statement
-        if re.search(r"for [a-zA-Z_][a-zA-Z0-9_]* in .+:$", line):
-            return True
-
-    if line.endswith('\\'):
-        return True
-
-    if re.match(MULTILINE_ASSIGNMENT_REGEX, line):
-        return True
-
-    if re.match(r'^[()\[\]{}\s]+$', line):
-        return True
-
-    return False
+                if (token_type == tokenize.COMMENT and
+                        line.lstrip().startswith('#') and
+                        self.comment_contains_code(line, aggressive)):
+                    yield start_row
+        except (tokenize.TokenError, IndentationError):
+            pass


-def commented_out_code_line_numbers(source, aggressive=True):
-    """Yield line numbers of commented-out code."""
-    sio = io.StringIO(source)
-    try:
-        for token in tokenize.generate_tokens(sio.readline):
-            token_type = token[0]
-            start_row = token[2][0]
-            line = token[4]
-
-            if (token_type == tokenize.COMMENT and
-                    line.lstrip().startswith('#') and
-                    not line.lstrip().startswith('##') and
-                    # modified from original file (line added)
-                    comment_contains_code(line, aggressive)):
-                yield start_row
-    except (tokenize.TokenError, IndentationError):
-        pass
+    def filter_commented_out_code(self, source, aggressive=True):
+        """Yield code with commented out code removed."""
+        marked_lines = list(self.commented_out_code_line_numbers(source,
+                                                            aggressive))
+        sio = io.StringIO(source)
+        previous_line = ''
+        for line_number, line in enumerate(sio.readlines(), start=1):
+            if (line_number not in marked_lines or
+                    previous_line.rstrip().endswith('\\')):
+                yield line
+            previous_line = line


-def filter_commented_out_code(source, aggressive=True):
-    """Yield code with commented out code removed."""
-    marked_lines = list(commented_out_code_line_numbers(source,
-                                                        aggressive))
-    sio = io.StringIO(source)
-    previous_line = ''
-    for line_number, line in enumerate(sio.readlines(), start=1):
-        if (line_number not in marked_lines or
-                previous_line.rstrip().endswith('\\')):
-            yield line
-        previous_line = line
+    def fix_file(self, filename, args, standard_out):
+        """Run filter_commented_out_code() on file."""
+        encoding = self.detect_encoding(filename)
+        with self.open_with_encoding(filename, encoding=encoding) as input_file:
+            source = input_file.read()
+
+        filtered_source = ''.join(self.filter_commented_out_code(source,
+                                                            args.aggressive))
+
+        if source != filtered_source:
+            if args.in_place:
+                with self.open_with_encoding(filename, mode='w',
+                                        encoding=encoding) as output_file:
+                    output_file.write(filtered_source)
+            else:
+                diff = difflib.unified_diff(
+                    source.splitlines(),
+                    filtered_source.splitlines(),
+                    'before/' + filename,
+                    'after/' + filename,
+                    lineterm='')
+                standard_out.write('\n'.join(list(diff) + ['']))
+            return True


-def fix_file(filename, args, standard_out):
-    """Run filter_commented_out_code() on file."""
-    encoding = detect_encoding(filename)
-    with open_with_encoding(filename, encoding=encoding) as input_file:
-        source = input_file.read()
-
-    filtered_source = ''.join(filter_commented_out_code(source,
-                                                        args.aggressive))
-
-    if source != filtered_source:
-        if args.in_place:
-            with open_with_encoding(filename, mode='w',
-                                    encoding=encoding) as output_file:
-                output_file.write(filtered_source)
-        else:
-            diff = difflib.unified_diff(
-                source.splitlines(),
-                filtered_source.splitlines(),
-                'before/' + filename,
-                'after/' + filename,
-                lineterm='')
-            standard_out.write('\n'.join(list(diff) + ['']))
+    def open_with_encoding(self, filename, encoding, mode='r'):
+        """Return opened file with a specific encoding."""
+        return io.open(filename, mode=mode, encoding=encoding,
+                       newline='')  # Preserve line endings


-def open_with_encoding(filename, encoding, mode='r'):
-    """Return opened file with a specific encoding."""
-    return io.open(filename, mode=mode, encoding=encoding,
-                   newline='')  # Preserve line endings
+    def detect_encoding(self, filename):
+        """Return file encoding."""
+        try:
+            with open(filename, 'rb') as input_file:
+                from lib2to3.pgen2 import tokenize as lib2to3_tokenize
+                encoding = lib2to3_tokenize.detect_encoding(input_file.readline)[0]

+                # Check for correctness of encoding.
+                with self.open_with_encoding(filename, encoding) as input_file:
+                    input_file.read()

-def detect_encoding(filename):
-    """Return file encoding."""
-    try:
-        with open(filename, 'rb') as input_file:
-            from lib2to3.pgen2 import tokenize as lib2to3_tokenize
-            encoding = lib2to3_tokenize.detect_encoding(input_file.readline)[0]
+            return encoding
+        except (SyntaxError, LookupError, UnicodeDecodeError):
+            return 'latin-1'

-            # Check for correctness of encoding.
-            with open_with_encoding(filename, encoding) as input_file:
-                input_file.read()
-
-        return encoding
-    except (SyntaxError, LookupError, UnicodeDecodeError):
-        return 'latin-1'
+    def update_whitelist(self, new_whitelist, extend_default=True):
+        """Updates the whitelist."""
+        if extend_default:
+            self.WHITELIST_REGEX = re.compile(
+                r'|'.join(list(self.DEFAULT_WHITELIST) + new_whitelist),
+                flags=re.IGNORECASE)
+        else:
+            self.WHITELIST_REGEX = re.compile(
+                r'|'.join(new_whitelist),
+                flags=re.IGNORECASE)


 def main(argv, standard_out, standard_error):
@@ -208,13 +245,36 @@
     parser.add_argument('-a', '--aggressive', action='store_true',
                         help='make more aggressive changes; '
                              'this may result in false positives')
+    parser.add_argument('-e', '--error', action="store_true",
+                        help="Exit code based on result of check")
     parser.add_argument('--version', action='version',
                         version='%(prog)s ' + __version__)
+    parser.add_argument('--whitelist', action="store",
+                        help=(
+                            'String of "#" separated comment beginnings to whitelist. '
+                            'Single parts are interpreted as regex. '
+                            'OVERWRITING the default whitelist: {}'
+                        ).format(Eradicator.DEFAULT_WHITELIST))
+    parser.add_argument('--whitelist-extend', action="store",
+                        help=(
+                            'String of "#" separated comment beginnings to whitelist '
+                            'Single parts are interpreted as regex. '
+                            'Overwrites --whitelist. '
+                            'EXTENDING the default whitelist: {} '
+                        ).format(Eradicator.DEFAULT_WHITELIST))
     parser.add_argument('files', nargs='+', help='files to format')

     args = parser.parse_args(argv[1:])

+    eradicator = Eradicator()
+
+    if args.whitelist_extend:
+        eradicator.update_whitelist(args.whitelist_extend.split('#'), True)
+    elif args.whitelist:
+        eradicator.update_whitelist(args.whitelist.split('#'), False)
+
     filenames = list(set(args.files))
+    change_or_error = False
     while filenames:
         name = filenames.pop(0)
         if args.recursive and os.path.isdir(name):
@@ -226,6 +286,9 @@
                                   if not d.startswith('.')]
         else:
             try:
-                fix_file(name, args=args, standard_out=standard_out)
-            except OSError as exception:
+                change_or_error = eradicator.fix_file(name, args=args, standard_out=standard_out) or change_or_error
+            except IOError as exception:
                 print('{}'.format(exception), file=standard_error)
+                change_or_error = True
+    if change_or_error and args.error:
+        return 1