Utilities/__init__.py

branch
5_1_x
changeset 1107
cad60e83a962
parent 882
34b86be88bf0
child 1125
4ecd7e08246b
equal deleted inserted replaced
1105:8a1ec3fa9d2b 1107:cad60e83a962
319 index += 4 319 index += 4
320 else: 320 else:
321 buf += bytes(text[index], encoding="ascii") 321 buf += bytes(text[index], encoding="ascii")
322 index += 1 322 index += 1
323 buf = buf.replace(b"\x00", b"") 323 buf = buf.replace(b"\x00", b"")
324 return decodeBytes(buf)
325
326 def decodeBytes(buffer):
327 """
328 Function to decode some byte text into a string.
329
330 @param buffer byte buffer to decode (bytes)
331 @return decoded text (string)
332 """
333 # try UTF with BOM
334 try:
335 if buffer.startswith(BOM_UTF8):
336 # UTF-8 with BOM
337 return str(buffer[len(BOM_UTF8):], encoding='utf-8')
338 elif buffer.startswith(BOM_UTF16):
339 # UTF-16 with BOM
340 return str(buffer[len(BOM_UTF16):], encoding='utf-16')
341 elif buffer.startswith(BOM_UTF32):
342 # UTF-32 with BOM
343 return str(buffer[len(BOM_UTF32):], encoding='utf-32')
344 except (UnicodeError, LookupError):
345 pass
324 346
325 # try UTF-8 347 # try UTF-8
326 try: 348 try:
327 return str(buf, encoding="utf-8") 349 return str(buffer, encoding="utf-8")
328 except UnicodeError: 350 except UnicodeError:
329 pass 351 pass
330 352
331 # try codec detection 353 # try codec detection
332 try: 354 try:
333 import ThirdParty.CharDet.chardet 355 import ThirdParty.CharDet.chardet
334 guess = ThirdParty.CharDet.chardet.detect(buf) 356 guess = ThirdParty.CharDet.chardet.detect(buffer)
335 if guess and guess['encoding'] is not None: 357 if guess and guess['encoding'] is not None:
336 codec = guess['encoding'].lower() 358 codec = guess['encoding'].lower()
337 return str(buf, codec) 359 return str(buffer, encoding=codec)
338 except (UnicodeError, LookupError): 360 except (UnicodeError, LookupError):
339 pass 361 pass
340 except ImportError: 362 except ImportError:
341 pass 363 pass
342 364
343 return str(text, "utf-8", "ignore") 365 return str(buffer, encoding="utf-8", errors="ignore")
344 366
345 _escape = re.compile("[&<>\"\u0080-\uffff]") 367 _escape = re.compile("[&<>\"\u0080-\uffff]")
346 368
347 _escape_map = { 369 _escape_map = {
348 "&": "&amp;", 370 "&": "&amp;",

eric ide

mercurial