319 index += 4 |
319 index += 4 |
320 else: |
320 else: |
321 buf += bytes(text[index], encoding="ascii") |
321 buf += bytes(text[index], encoding="ascii") |
322 index += 1 |
322 index += 1 |
323 buf = buf.replace(b"\x00", b"") |
323 buf = buf.replace(b"\x00", b"") |
|
324 return decodeBytes(buf) |
|
325 |
|
326 def decodeBytes(buffer): |
|
327 """ |
|
328 Function to decode some byte text into a string. |
|
329 |
|
330 @param buffer byte buffer to decode (bytes) |
|
331 @return decoded text (string) |
|
332 """ |
|
333 # try UTF with BOM |
|
334 try: |
|
335 if buffer.startswith(BOM_UTF8): |
|
336 # UTF-8 with BOM |
|
337 return str(buffer[len(BOM_UTF8):], encoding='utf-8') |
|
338 elif buffer.startswith(BOM_UTF16): |
|
339 # UTF-16 with BOM |
|
340 return str(buffer[len(BOM_UTF16):], encoding='utf-16') |
|
341 elif buffer.startswith(BOM_UTF32): |
|
342 # UTF-32 with BOM |
|
343 return str(buffer[len(BOM_UTF32):], encoding='utf-32') |
|
344 except (UnicodeError, LookupError): |
|
345 pass |
324 |
346 |
325 # try UTF-8 |
347 # try UTF-8 |
326 try: |
348 try: |
327 return str(buf, encoding="utf-8") |
349 return str(buffer, encoding="utf-8") |
328 except UnicodeError: |
350 except UnicodeError: |
329 pass |
351 pass |
330 |
352 |
331 # try codec detection |
353 # try codec detection |
332 try: |
354 try: |
333 import ThirdParty.CharDet.chardet |
355 import ThirdParty.CharDet.chardet |
334 guess = ThirdParty.CharDet.chardet.detect(buf) |
356 guess = ThirdParty.CharDet.chardet.detect(buffer) |
335 if guess and guess['encoding'] is not None: |
357 if guess and guess['encoding'] is not None: |
336 codec = guess['encoding'].lower() |
358 codec = guess['encoding'].lower() |
337 return str(buf, codec) |
359 return str(buffer, encoding=codec) |
338 except (UnicodeError, LookupError): |
360 except (UnicodeError, LookupError): |
339 pass |
361 pass |
340 except ImportError: |
362 except ImportError: |
341 pass |
363 pass |
342 |
364 |
343 return str(text, "utf-8", "ignore") |
365 return str(buffer, encoding="utf-8", errors="ignore") |
344 |
366 |
345 _escape = re.compile("[&<>\"\u0080-\uffff]") |
367 _escape = re.compile("[&<>\"\u0080-\uffff]") |
346 |
368 |
347 _escape_map = { |
369 _escape_map = { |
348 "&": "&", |
370 "&": "&", |