ThirdParty/CharDet/chardet/chardetect.py

changeset 5310
f2b774d78b4a
parent 3537
7662053c3906
diff -r 79b6a38edfc7 -r f2b774d78b4a ThirdParty/CharDet/chardet/chardetect.py
--- a/ThirdParty/CharDet/chardet/chardetect.py	Thu Nov 10 18:54:02 2016 +0100
+++ b/ThirdParty/CharDet/chardet/chardetect.py	Thu Nov 10 18:57:50 2016 +0100
@@ -12,34 +12,68 @@
 If no paths are provided, it takes its input from stdin.
 
 """
+
+from __future__ import absolute_import, print_function, unicode_literals
+
+import argparse
+import sys
 from io import open
-from sys import argv, stdin
 
+from chardet import __version__
 from chardet.universaldetector import UniversalDetector
 
 
-def description_of(file, name='stdin'):
-    """Return a string describing the probable encoding of a file."""
+def description_of(lines, name='stdin'):
+    """
+    Return a string describing the probable encoding of a file or
+    list of strings.
+
+    :param lines: The lines to get the encoding of.
+    :type lines: Iterable of bytes
+    :param name: Name of file or collection of lines
+    :type name: str
+    """
     u = UniversalDetector()
-    for line in file:
+    for line in lines:
         u.feed(line)
     u.close()
     result = u.result
     if result['encoding']:
-        return '%s: %s with confidence %s' % (name,
-                                              result['encoding'],
-                                              result['confidence'])
+        return '{0}: {1} with confidence {2}'.format(name, result['encoding'],
+                                                     result['confidence'])
     else:
-        return '%s: no result' % name
+        return '{0}: no result'.format(name)
 
 
-def main():
-    if len(argv) <= 1:
-        print(description_of(stdin))
-    else:
-        for path in argv[1:]:
-            with open(path, 'rb') as f:
-                print(description_of(f, path))
+def main(argv=None):
+    '''
+    Handles command line arguments and gets things started.
+
+    :param argv: List of arguments, as if specified on the command-line.
+                 If None, ``sys.argv[1:]`` is used instead.
+    :type argv: list of str
+    '''
+    # Get command line arguments
+    parser = argparse.ArgumentParser(
+        description="Takes one or more file paths and reports their detected \
+                     encodings",
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+        conflict_handler='resolve')
+    parser.add_argument('input',
+                        help='File whose encoding we would like to determine.',
+                        type=argparse.FileType('rb'), nargs='*',
+                        default=[sys.stdin])
+    parser.add_argument('--version', action='version',
+                        version='%(prog)s {0}'.format(__version__))
+    args = parser.parse_args(argv)
+
+    for f in args.input:
+        if f.isatty():
+            print("You are running chardetect interactively. Press " +
+                  "CTRL-D twice at the start of a blank line to signal the " +
+                  "end of your input. If you want help, run chardetect " +
+                  "--help\n", file=sys.stderr)
+        print(description_of(f, f.name))
 
 
 if __name__ == '__main__':

eric ide

mercurial