eric7/E5Network/E5RFC6266.py

branch
eric7
changeset 8354
12ebd3934fef
parent 8353
799196d0b05d
child 8355
8a7677a63c8d
--- a/eric7/E5Network/E5RFC6266.py	Sat May 22 12:54:57 2021 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,358 +0,0 @@
-# -*- coding: utf-8 -*-
-
-# Copyright (c) 2015 - 2021 Detlev Offenbach <detlev@die-offenbachs.de>
-#
-
-"""
-Module implementing a Content-Disposition parser iaw. RFC 6266.
-"""
-
-#
-# This code is adapted from the rfc6266.py module of qutebrowser.
-# Original copyright 2014-2015 Florian Bruhin (The Compiler)
-# <mail@qutebrowser.org>
-#
-
-import urllib.parse as parse
-import collections
-import string
-import re
-
-try:
-    import pypeg2 as peg
-
-    class UniqueNamespace(peg.Namespace):
-        """
-        A pyPEG2 namespace which prevents setting a value twice.
-        """
-        def __setitem__(self, key, value):
-            """
-            Special method to set an item.
-            
-            @param key key for the item
-            @param value value of the item
-            """
-            if key in self:
-                raise DuplicateParamError(key)
-            super().__setitem__(key, value)
-
-    # RFC 2616
-    separator_chars = "()<>@,;:\\\"/[]?={} \t"      # __IGNORE_WARNING_M613__
-    ctl_chars = ''.join(chr(i) for i in range(32)) + chr(127)
-    nontoken_chars = separator_chars + ctl_chars
-
-    # RFC 5987
-    attr_chars_nonalnum = '!#$&+-.^_`|~'
-    attr_chars = string.ascii_letters + string.digits + attr_chars_nonalnum
-
-    # RFC 5987 gives this alternative construction of the token character class
-    token_chars = attr_chars + "*'%"        # __IGNORE_WARNING_M601__
-
-    # Definitions from https://tools.ietf.org/html/rfc2616#section-2.2
-    # token was redefined from attr_chars to avoid using AnyBut,
-    # which might include non-ascii octets.
-    token_re = '[{0}]+'.format(re.escape(token_chars))
-
-    class Token(str):
-        """
-        A token (RFC 2616, Section 2.2).
-        """
-        grammar = re.compile(token_re)
-
-    # RFC 2616 says some linear whitespace (LWS) is in fact allowed in text
-    # and qdtext; however it also mentions folding that whitespace into
-    # a single SP (which isn't in CTL) before interpretation.
-    # Assume the caller already that folding when parsing headers.
-
-    # Note: qdtext also allows non-ascii, which we choose to parse
-    # as ISO-8859-1; rejecting it entirely would also be permitted.
-    # Some broken browsers attempt encoding-sniffing, which is broken
-    # because the spec only allows iso, and because encoding-sniffing
-    # can mangle valid values.
-    # Everything else in this grammar (including RFC 5987 ext values)
-    # is in an ascii-safe encoding.
-
-    qdtext_re = r'[^"{0}]'.format(re.escape(ctl_chars))
-    quoted_pair_re = r'\\[{0}]'.format(re.escape(
-        ''.join(chr(i) for i in range(128))))
-
-    class QuotedString(str):
-        """
-        A quoted string (RFC 2616, Section 2.2).
-        """
-        grammar = re.compile(r'"({0}|{1})+"'.format(quoted_pair_re, qdtext_re))
-
-        def __str__(self):
-            s = super().__str__()
-            s = s[1:-1]  # remove quotes
-            s = re.sub(r'\\(.)', r'\1', s)  # drop backslashes
-            return s
-
-    class Value(str):
-        """
-        A value. (RFC 2616, Section 3.6).
-        """
-        grammar = [re.compile(token_re), QuotedString]
-
-    class Charset(str):
-        """
-        A charset (RFC5987, Section 3.2.1).
-        """
-        # Other charsets are forbidden, the spec reserves them
-        # for future evolutions.
-        grammar = re.compile('UTF-8|ISO-8859-1', re.I)
-
-    class Language(str):
-        """
-        A language-tag (RFC 5646, Section 2.1).
-
-        Fixme: This grammar is not 100% correct yet.
-        https://github.com/The-Compiler/qutebrowser/issues/105
-        """
-        grammar = re.compile('[A-Za-z0-9-]+')
-
-    attr_char_re = '[{0}]'.format(re.escape(attr_chars))
-    hex_digit_re = '%[' + string.hexdigits + ']{2}'
-
-    class ValueChars(str):
-        """
-        A value of an attribute.
-
-        Fixme: Can we merge this with Value?
-        https://github.com/The-Compiler/qutebrowser/issues/105
-        """
-        grammar = re.compile('({0}|{1})*'.format(attr_char_re, hex_digit_re))
-
-    class ExtValue(peg.List):
-        """
-        An ext-value of an attribute (RFC 5987, Section 3.2).
-        """
-        grammar = peg.contiguous(Charset, "'", peg.optional(Language), "'",
-                                 ValueChars)
-
-    class ExtToken(peg.Symbol):
-        """
-        A token introducing an extended value (RFC 6266, Section 4.1).
-        """
-        regex = re.compile(token_re + r'\*')
-
-        def __str__(self):
-            return super().__str__().lower()
-
-    class NoExtToken(peg.Symbol):
-        """
-        A token introducing a normal value (RFC 6266, Section 4.1).
-        """
-        regex = re.compile(token_re + r'(?<!\*)')
-
-        def __str__(self):
-            return super().__str__().lower()
-
-    class DispositionParm(str):
-        """
-        A parameter for the Disposition-Type header (RFC6266, Section 4.1).
-        """
-        grammar = peg.attr('name', NoExtToken), '=', Value
-
-    class ExtDispositionParm:
-        """
-        An extended parameter (RFC6266, Section 4.1).
-        """
-        grammar = peg.attr('name', ExtToken), '=', ExtValue
-
-        def __init__(self, value, name=None):
-            self.name = name
-            self.value = value
-
-    class DispositionType(peg.List):
-        """
-        The disposition type (RFC6266, Section 4.1).
-        """
-        grammar = [re.compile('(inline|attachment)', re.I), Token]
-
-    class DispositionParmList(UniqueNamespace):
-        """
-        A list of disposition parameters (RFC6266, Section 4.1).
-        """
-        grammar = peg.maybe_some(';', [ExtDispositionParm, DispositionParm])
-
-    class ContentDispositionValue:
-        """
-        A complete Content-Disposition value (RFC 6266, Section 4.1).
-        """
-        # Allows nonconformant final semicolon
-        # I've seen it in the wild, and browsers accept it
-        # http://greenbytes.de/tech/tc2231/#attwithasciifilenamenqs
-        grammar = (peg.attr('dtype', DispositionType),
-                   peg.attr('params', DispositionParmList),
-                   peg.optional(';'))
-
-    LangTagged = collections.namedtuple('LangTagged', ['string', 'langtag'])
-
-    class DuplicateParamError(Exception):
-        """
-        Exception raised when a parameter has been given twice.
-        """
-
-    class InvalidISO8859Error(Exception):
-        """
-        Exception raised when a byte is invalid in ISO-8859-1.
-        """
-
-    class ContentDisposition:
-        """
-        Records various indications and hints about content disposition.
-
-        These can be used to know if a file should be downloaded or
-        displayed directly, and to hint what filename it should have
-        in the download case.
-        """
-        def __init__(self, disposition='inline', assocs=None):
-            """
-            Used internally after parsing the header.
-
-            Instances should generally be created from a factory
-            function, such as parse_headers and its variants.
-            """
-            if len(disposition) != 1:
-                self.disposition = 'inline'
-            else:
-                self.disposition = disposition[0]
-            if assocs is None:
-                self.assocs = {}
-            else:
-                self.assocs = dict(assocs)  # So we can change values
-                if 'filename*' in self.assocs:
-                    param = self.assocs['filename*']
-                    if isinstance(param, ExtDispositionParm):
-                        self.assocs['filename*'] = (
-                            parse_ext_value(param.value).string
-                        )
-
-        def filename(self):
-            """
-            The filename from the Content-Disposition header or None.
-
-            On safety:
-            This property records the intent of the sender.
-
-            You shouldn't use this sender-controlled value as a filesystem
-            path, it can be insecure. Serving files with this filename can be
-            dangerous as well, due to a certain browser using the part after
-            the dot for mime-sniffing.  Saving it to a database is fine by
-            itself though.
-            """
-            if 'filename*' in self.assocs:
-                return self.assocs['filename*']
-            elif 'filename' in self.assocs:
-                # XXX Reject non-ascii (parsed via qdtext) here?
-                return self.assocs['filename']
-            else:
-                return None
-
-        def is_inline(self):
-            """
-            Return if the file should be handled inline.
-
-            If not, and unless your application supports other dispositions
-            than the standard inline and attachment, it should be handled
-            as an attachment.
-            """
-            return self.disposition.lower() == 'inline'
-
-    def normalize_ws(text):
-        """
-        Do LWS (linear whitespace) folding.
-        """
-        return ' '.join(text.split())
-
-    def parse_headers(content_disposition):
-        """
-        Build a ContentDisposition from header values.
-        
-        @param content_disposition contents of the disposition header
-        @type bytes
-        """
-        # We allow non-ascii here (it will only be parsed inside of qdtext, and
-        # rejected by the grammar if it appears in other places), although
-        # parsing it can be ambiguous.  Parsing it ensures that a non-ambiguous
-        # filename* value won't get dismissed because of an unrelated ambiguity
-        # in the filename parameter. But it does mean we occasionally give
-        # less-than-certain values for some legacy senders.
-        content_disposition = content_disposition.decode('iso-8859-1')
-        
-        # Our parsing is relaxed in these regards:
-        # - The grammar allows a final ';' in the header;
-        # - We do LWS-folding, and possibly normalise other broken
-        #   whitespace, instead of rejecting non-lws-safe text.
-        # XXX Would prefer to accept only the quoted whitespace
-        # case, rather than normalising everything.
-        content_disposition = normalize_ws(content_disposition)
-        try:
-            parsed = peg.parse(content_disposition, ContentDispositionValue)
-        except (SyntaxError, DuplicateParamError, InvalidISO8859Error):
-            return ContentDisposition()
-        else:
-            return ContentDisposition(disposition=parsed.dtype,
-                                      assocs=parsed.params)
-
-    def parse_ext_value(val):
-        """
-        Parse the value of an extended attribute.
-        """
-        if len(val) == 3:
-            charset, langtag, coded = val
-        else:
-            charset, coded = val
-            langtag = None
-        decoded = parse.unquote(coded, charset, errors='strict')
-        if charset == 'iso-8859-1':
-            # Fail if the filename contains an invalid ISO-8859-1 char
-            for c in decoded:
-                if 0x7F <= ord(c) <= 0x9F:
-                    raise InvalidISO8859Error(c)
-        return LangTagged(decoded, langtag)
-
-except ImportError:
-    class ContentDisposition:
-        """
-        Records various indications and hints about content disposition.
-
-        These can be used to know if a file should be downloaded or
-        displayed directly, and to hint what filename it should have
-        in the download case.
-        """
-        def __init__(self, filename):
-            """
-            Constructor
-            
-            @param filename file name to be stored in this surrogate class
-            @type str
-            """
-            self.__filename = filename
-        
-        def filename(self):
-            """
-            Public method to get the stored file name
-            
-            @return file name
-            @rtype str
-            """
-            return self.__filename
-    
-    def parse_headers(content_disposition):
-        """
-        Build a ContentDisposition from header values.
-        
-        @param content_disposition contents of the disposition header
-        @type bytes
-        """
-        header = content_disposition.decode()
-        if header:
-            pos = header.find("filename=")
-            if pos != -1:
-                path = header[pos + 9:]
-                if path.startswith('"') and path.endswith('"'):
-                    path = path[1:-1]
-                return ContentDisposition(path)
-        return ContentDisposition("")

eric ide

mercurial