|
1 #!/usr/bin/env python |
|
2 """ |
|
3 Script which takes one or more file paths and reports on their detected |
|
4 encodings |
|
5 |
|
6 Example:: |
|
7 |
|
8 % chardetect somefile someotherfile |
|
9 somefile: windows-1252 with confidence 0.5 |
|
10 someotherfile: ascii with confidence 1.0 |
|
11 |
|
12 If no paths are provided, it takes its input from stdin. |
|
13 |
|
14 """ |
|
15 |
|
16 from __future__ import absolute_import, print_function, unicode_literals |
|
17 |
|
18 import argparse |
|
19 import sys |
|
20 from io import open |
|
21 |
|
22 from chardet import __version__ |
|
23 from chardet.universaldetector import UniversalDetector |
|
24 |
|
25 |
|
26 def description_of(lines, name='stdin'): |
|
27 """ |
|
28 Return a string describing the probable encoding of a file or |
|
29 list of strings. |
|
30 |
|
31 :param lines: The lines to get the encoding of. |
|
32 :type lines: Iterable of bytes |
|
33 :param name: Name of file or collection of lines |
|
34 :type name: str |
|
35 """ |
|
36 u = UniversalDetector() |
|
37 for line in lines: |
|
38 u.feed(line) |
|
39 u.close() |
|
40 result = u.result |
|
41 if result['encoding']: |
|
42 return '{0}: {1} with confidence {2}'.format(name, result['encoding'], |
|
43 result['confidence']) |
|
44 else: |
|
45 return '{0}: no result'.format(name) |
|
46 |
|
47 |
|
48 def main(argv=None): |
|
49 ''' |
|
50 Handles command line arguments and gets things started. |
|
51 |
|
52 :param argv: List of arguments, as if specified on the command-line. |
|
53 If None, ``sys.argv[1:]`` is used instead. |
|
54 :type argv: list of str |
|
55 ''' |
|
56 # Get command line arguments |
|
57 parser = argparse.ArgumentParser( |
|
58 description="Takes one or more file paths and reports their detected \ |
|
59 encodings", |
|
60 formatter_class=argparse.ArgumentDefaultsHelpFormatter, |
|
61 conflict_handler='resolve') |
|
62 parser.add_argument('input', |
|
63 help='File whose encoding we would like to determine.', |
|
64 type=argparse.FileType('rb'), nargs='*', |
|
65 default=[sys.stdin]) |
|
66 parser.add_argument('--version', action='version', |
|
67 version='%(prog)s {0}'.format(__version__)) |
|
68 args = parser.parse_args(argv) |
|
69 |
|
70 for f in args.input: |
|
71 if f.isatty(): |
|
72 print("You are running chardetect interactively. Press " + |
|
73 "CTRL-D twice at the start of a blank line to signal the " + |
|
74 "end of your input. If you want help, run chardetect " + |
|
75 "--help\n", file=sys.stderr) |
|
76 print(description_of(f, f.name)) |
|
77 |
|
78 |
|
79 if __name__ == '__main__': |
|
80 main() |