33 qVersion, |
31 qVersion, |
34 ) |
32 ) |
35 |
33 |
36 from eric7 import Preferences |
34 from eric7 import Preferences |
37 from eric7.__version__ import Version |
35 from eric7.__version__ import Version |
|
36 from eric7.EricUtilities import ( # noqa |
|
37 decodeBytes, |
|
38 decodeString, |
|
39 html_encode, |
|
40 html_udecode, |
|
41 html_uencode, |
|
42 readStringFromStream, |
|
43 ) |
38 from eric7.EricWidgets.EricApplication import ericApp |
44 from eric7.EricWidgets.EricApplication import ericApp |
39 from eric7.SystemUtilities import DesktopUtilities, FileSystemUtilities, OSUtilities |
45 from eric7.SystemUtilities import DesktopUtilities, FileSystemUtilities, OSUtilities |
40 from eric7.UI.Info import Program |
46 from eric7.UI.Info import Program |
41 |
47 |
42 |
48 |
288 @type bytes |
294 @type bytes |
289 @return tuple of decoded text and encoding |
295 @return tuple of decoded text and encoding |
290 @rtype tuple of (str, str) |
296 @rtype tuple of (str, str) |
291 """ |
297 """ |
292 with contextlib.suppress(UnicodeError, LookupError): |
298 with contextlib.suppress(UnicodeError, LookupError): |
293 if text.startswith(BOM_UTF8): |
299 if text.startswith(codecs.BOM_UTF8): |
294 # UTF-8 with BOM |
300 # UTF-8 with BOM |
295 return str(text[len(BOM_UTF8) :], "utf-8"), "utf-8-bom" |
301 return str(text[len(codecs.BOM_UTF8) :], "utf-8"), "utf-8-bom" |
296 elif text.startswith(BOM_UTF16): |
302 elif text.startswith(codecs.BOM_UTF16): |
297 # UTF-16 with BOM |
303 # UTF-16 with BOM |
298 return str(text[len(BOM_UTF16) :], "utf-16"), "utf-16" |
304 return str(text[len(codecs.BOM_UTF16) :], "utf-16"), "utf-16" |
299 elif text.startswith(BOM_UTF32): |
305 elif text.startswith(codecs.BOM_UTF32): |
300 # UTF-32 with BOM |
306 # UTF-32 with BOM |
301 return str(text[len(BOM_UTF32) :], "utf-32"), "utf-32" |
307 return str(text[len(codecs.BOM_UTF32) :], "utf-32"), "utf-32" |
302 coding = get_codingBytes(text) |
308 coding = get_codingBytes(text) |
303 if coding: |
309 if coding: |
304 return str(text, coding), coding |
310 return str(text, coding), coding |
305 |
311 |
306 # Assume UTF-8 |
312 # Assume UTF-8 |
420 @rtype tuple of (bytes, str) |
426 @rtype tuple of (bytes, str) |
421 @exception CodingError raised to indicate an invalid encoding |
427 @exception CodingError raised to indicate an invalid encoding |
422 """ |
428 """ |
423 encoding = None |
429 encoding = None |
424 if origEncoding == "utf-8-bom": |
430 if origEncoding == "utf-8-bom": |
425 etext, encoding = BOM_UTF8 + text.encode("utf-8"), "utf-8-bom" |
431 etext, encoding = codecs.BOM_UTF8 + text.encode("utf-8"), "utf-8-bom" |
426 else: |
432 else: |
427 # Try declared coding spec |
433 # Try declared coding spec |
428 coding = get_coding(text) |
434 coding = get_coding(text) |
429 if coding: |
435 if coding: |
430 try: |
436 try: |
468 etext, encoding = text.encode("utf-8"), "utf-8" |
474 etext, encoding = text.encode("utf-8"), "utf-8" |
469 |
475 |
470 return etext, encoding |
476 return etext, encoding |
471 |
477 |
472 |
478 |
473 def decodeString(text): |
|
474 """ |
|
475 Function to decode a string containing Unicode encoded characters. |
|
476 |
|
477 @param text text containing encoded chars |
|
478 @type str |
|
479 @return decoded text |
|
480 @rtype str |
|
481 """ |
|
482 buf = b"" |
|
483 index = 0 |
|
484 while index < len(text): |
|
485 if text[index] == "\\": |
|
486 qb = QByteArray.fromHex(text[index : index + 4].encode()) |
|
487 buf += bytes(qb) |
|
488 index += 4 |
|
489 else: |
|
490 buf += codecs.encode(text[index], "utf-8") |
|
491 index += 1 |
|
492 buf = buf.replace(b"\x00", b"") |
|
493 return decodeBytes(buf) |
|
494 |
|
495 |
|
496 def decodeBytes(buffer): |
|
497 """ |
|
498 Function to decode some byte text into a string. |
|
499 |
|
500 @param buffer byte buffer to decode |
|
501 @type bytes |
|
502 @return decoded text |
|
503 @rtype str |
|
504 """ |
|
505 # try UTF with BOM |
|
506 with contextlib.suppress(UnicodeError, LookupError): |
|
507 if buffer.startswith(BOM_UTF8): |
|
508 # UTF-8 with BOM |
|
509 return str(buffer[len(BOM_UTF8) :], encoding="utf-8") |
|
510 elif buffer.startswith(BOM_UTF16): |
|
511 # UTF-16 with BOM |
|
512 return str(buffer[len(BOM_UTF16) :], encoding="utf-16") |
|
513 elif buffer.startswith(BOM_UTF32): |
|
514 # UTF-32 with BOM |
|
515 return str(buffer[len(BOM_UTF32) :], encoding="utf-32") |
|
516 |
|
517 # try UTF-8 |
|
518 with contextlib.suppress(UnicodeError): |
|
519 return str(buffer, encoding="utf-8") |
|
520 |
|
521 # try codec detection |
|
522 try: |
|
523 guess = chardet.detect(buffer) |
|
524 if guess and guess["encoding"] is not None: |
|
525 codec = guess["encoding"].lower() |
|
526 return str(buffer, encoding=codec) |
|
527 except (LookupError, UnicodeError): |
|
528 pass |
|
529 except ImportError: |
|
530 pass |
|
531 |
|
532 return str(buffer, encoding="utf-8", errors="ignore") |
|
533 |
|
534 |
|
535 def readStringFromStream(stream): |
|
536 """ |
|
537 Module function to read a string from the given stream. |
|
538 |
|
539 @param stream data stream opened for reading |
|
540 @type QDataStream |
|
541 @return string read from the stream |
|
542 @rtype str |
|
543 """ |
|
544 data = stream.readString() |
|
545 if data is None: |
|
546 data = b"" |
|
547 return data.decode("utf-8") |
|
548 |
|
549 |
|
550 def normalizeCode(codestring): |
479 def normalizeCode(codestring): |
551 """ |
480 """ |
552 Function to normalize the given code. |
481 Function to normalize the given code. |
553 |
482 |
554 @param codestring code to be normalized |
483 @param codestring code to be normalized |
560 |
489 |
561 if codestring and codestring[-1] != "\n": |
490 if codestring and codestring[-1] != "\n": |
562 codestring += "\n" |
491 codestring += "\n" |
563 |
492 |
564 return codestring |
493 return codestring |
565 |
|
566 |
|
567 _escape = re.compile("[&<>\"'\u0080-\uffff]") |
|
568 |
|
569 _escape_map = { |
|
570 "&": "&", |
|
571 "<": "<", |
|
572 ">": ">", |
|
573 '"': """, |
|
574 "'": "'", |
|
575 } |
|
576 |
|
577 |
|
578 def escape_entities(m, escmap=_escape_map): |
|
579 """ |
|
580 Function to encode html entities. |
|
581 |
|
582 @param m the match object |
|
583 @type re.Match |
|
584 @param escmap the map of entities to encode |
|
585 @type dict |
|
586 @return the converted text |
|
587 @rtype str |
|
588 """ |
|
589 char = m.group() |
|
590 text = escmap.get(char) |
|
591 if text is None: |
|
592 text = "&#{0:d};".format(ord(char)) |
|
593 return text |
|
594 |
|
595 |
|
596 def html_encode(text, pattern=_escape): |
|
597 """ |
|
598 Function to correctly encode a text for html. |
|
599 |
|
600 @param text text to be encoded |
|
601 @type str |
|
602 @param pattern search pattern for text to be encoded |
|
603 @type str |
|
604 @return the encoded text |
|
605 @rtype str |
|
606 """ |
|
607 if not text: |
|
608 return "" |
|
609 text = pattern.sub(escape_entities, text) |
|
610 return text |
|
611 |
|
612 |
|
613 _uescape = re.compile("[\u0080-\uffff]") |
|
614 |
|
615 |
|
616 def escape_uentities(m): |
|
617 """ |
|
618 Function to encode html entities. |
|
619 |
|
620 @param m the match object |
|
621 @type re.Match |
|
622 @return the converted text |
|
623 @rtype str |
|
624 """ |
|
625 char = m.group() |
|
626 text = "&#{0:d};".format(ord(char)) |
|
627 return text |
|
628 |
|
629 |
|
630 def html_uencode(text, pattern=_uescape): |
|
631 """ |
|
632 Function to correctly encode a unicode text for html. |
|
633 |
|
634 @param text text to be encoded |
|
635 @type str |
|
636 @param pattern search pattern for text to be encoded |
|
637 @type str |
|
638 @return the encoded text |
|
639 @rtype str |
|
640 """ |
|
641 if not text: |
|
642 return "" |
|
643 text = pattern.sub(escape_uentities, text) |
|
644 return text |
|
645 |
|
646 |
|
647 _uunescape = re.compile(r"&#\d+;") |
|
648 |
|
649 |
|
650 def unescape_uentities(m): |
|
651 """ |
|
652 Function to decode html entities. |
|
653 |
|
654 @param m the match object |
|
655 @type re.Match |
|
656 @return the converted text |
|
657 @rtype str |
|
658 """ |
|
659 char = m.group() |
|
660 ordinal = int(char[2:-1]) |
|
661 return chr(ordinal) |
|
662 |
|
663 |
|
664 def html_udecode(text, pattern=_uunescape): |
|
665 """ |
|
666 Function to correctly decode a html text to a unicode text. |
|
667 |
|
668 @param text text to be decoded |
|
669 @type str |
|
670 @param pattern search pattern for text to be decoded |
|
671 @type str |
|
672 @return the decoded text |
|
673 @rtype str |
|
674 """ |
|
675 if not text: |
|
676 return "" |
|
677 text = pattern.sub(unescape_uentities, text) |
|
678 return text |
|
679 |
494 |
680 |
495 |
681 def convertLineEnds(text, eol): |
496 def convertLineEnds(text, eol): |
682 """ |
497 """ |
683 Function to convert the end of line characters. |
498 Function to convert the end of line characters. |