src/eric7/Utilities/__init__.py

branch
eric7-maintenance
changeset 10941
07cad049002c
parent 10814
ba20efe10336
parent 10928
46651e194fbe
child 11019
27cd57e98461
equal deleted inserted replaced
10893:ea32acb9764c 10941:07cad049002c
16 import re 16 import re
17 import shlex 17 import shlex
18 import sys 18 import sys
19 import warnings 19 import warnings
20 20
21 from codecs import BOM_UTF8, BOM_UTF16, BOM_UTF32
22
23 import chardet 21 import chardet
24 22
25 from PyQt6 import sip 23 from PyQt6 import sip
26 from PyQt6.Qsci import QSCINTILLA_VERSION_STR, QsciScintilla 24 from PyQt6.Qsci import QSCINTILLA_VERSION_STR, QsciScintilla
27 from PyQt6.QtCore import ( 25 from PyQt6.QtCore import (
33 qVersion, 31 qVersion,
34 ) 32 )
35 33
36 from eric7 import Preferences 34 from eric7 import Preferences
37 from eric7.__version__ import Version 35 from eric7.__version__ import Version
36 from eric7.EricUtilities import ( # noqa
37 decodeBytes,
38 decodeString,
39 html_encode,
40 html_udecode,
41 html_uencode,
42 readStringFromStream,
43 )
38 from eric7.EricWidgets.EricApplication import ericApp 44 from eric7.EricWidgets.EricApplication import ericApp
39 from eric7.SystemUtilities import DesktopUtilities, FileSystemUtilities, OSUtilities 45 from eric7.SystemUtilities import DesktopUtilities, FileSystemUtilities, OSUtilities
40 from eric7.UI.Info import Program 46 from eric7.UI.Info import Program
41 47
42 48
288 @type bytes 294 @type bytes
289 @return tuple of decoded text and encoding 295 @return tuple of decoded text and encoding
290 @rtype tuple of (str, str) 296 @rtype tuple of (str, str)
291 """ 297 """
292 with contextlib.suppress(UnicodeError, LookupError): 298 with contextlib.suppress(UnicodeError, LookupError):
293 if text.startswith(BOM_UTF8): 299 if text.startswith(codecs.BOM_UTF8):
294 # UTF-8 with BOM 300 # UTF-8 with BOM
295 return str(text[len(BOM_UTF8) :], "utf-8"), "utf-8-bom" 301 return str(text[len(codecs.BOM_UTF8) :], "utf-8"), "utf-8-bom"
296 elif text.startswith(BOM_UTF16): 302 elif text.startswith(codecs.BOM_UTF16):
297 # UTF-16 with BOM 303 # UTF-16 with BOM
298 return str(text[len(BOM_UTF16) :], "utf-16"), "utf-16" 304 return str(text[len(codecs.BOM_UTF16) :], "utf-16"), "utf-16"
299 elif text.startswith(BOM_UTF32): 305 elif text.startswith(codecs.BOM_UTF32):
300 # UTF-32 with BOM 306 # UTF-32 with BOM
301 return str(text[len(BOM_UTF32) :], "utf-32"), "utf-32" 307 return str(text[len(codecs.BOM_UTF32) :], "utf-32"), "utf-32"
302 coding = get_codingBytes(text) 308 coding = get_codingBytes(text)
303 if coding: 309 if coding:
304 return str(text, coding), coding 310 return str(text, coding), coding
305 311
306 # Assume UTF-8 312 # Assume UTF-8
420 @rtype tuple of (bytes, str) 426 @rtype tuple of (bytes, str)
421 @exception CodingError raised to indicate an invalid encoding 427 @exception CodingError raised to indicate an invalid encoding
422 """ 428 """
423 encoding = None 429 encoding = None
424 if origEncoding == "utf-8-bom": 430 if origEncoding == "utf-8-bom":
425 etext, encoding = BOM_UTF8 + text.encode("utf-8"), "utf-8-bom" 431 etext, encoding = codecs.BOM_UTF8 + text.encode("utf-8"), "utf-8-bom"
426 else: 432 else:
427 # Try declared coding spec 433 # Try declared coding spec
428 coding = get_coding(text) 434 coding = get_coding(text)
429 if coding: 435 if coding:
430 try: 436 try:
468 etext, encoding = text.encode("utf-8"), "utf-8" 474 etext, encoding = text.encode("utf-8"), "utf-8"
469 475
470 return etext, encoding 476 return etext, encoding
471 477
472 478
473 def decodeString(text):
474 """
475 Function to decode a string containing Unicode encoded characters.
476
477 @param text text containing encoded chars
478 @type str
479 @return decoded text
480 @rtype str
481 """
482 buf = b""
483 index = 0
484 while index < len(text):
485 if text[index] == "\\":
486 qb = QByteArray.fromHex(text[index : index + 4].encode())
487 buf += bytes(qb)
488 index += 4
489 else:
490 buf += codecs.encode(text[index], "utf-8")
491 index += 1
492 buf = buf.replace(b"\x00", b"")
493 return decodeBytes(buf)
494
495
496 def decodeBytes(buffer):
497 """
498 Function to decode some byte text into a string.
499
500 @param buffer byte buffer to decode
501 @type bytes
502 @return decoded text
503 @rtype str
504 """
505 # try UTF with BOM
506 with contextlib.suppress(UnicodeError, LookupError):
507 if buffer.startswith(BOM_UTF8):
508 # UTF-8 with BOM
509 return str(buffer[len(BOM_UTF8) :], encoding="utf-8")
510 elif buffer.startswith(BOM_UTF16):
511 # UTF-16 with BOM
512 return str(buffer[len(BOM_UTF16) :], encoding="utf-16")
513 elif buffer.startswith(BOM_UTF32):
514 # UTF-32 with BOM
515 return str(buffer[len(BOM_UTF32) :], encoding="utf-32")
516
517 # try UTF-8
518 with contextlib.suppress(UnicodeError):
519 return str(buffer, encoding="utf-8")
520
521 # try codec detection
522 try:
523 guess = chardet.detect(buffer)
524 if guess and guess["encoding"] is not None:
525 codec = guess["encoding"].lower()
526 return str(buffer, encoding=codec)
527 except (LookupError, UnicodeError):
528 pass
529 except ImportError:
530 pass
531
532 return str(buffer, encoding="utf-8", errors="ignore")
533
534
535 def readStringFromStream(stream):
536 """
537 Module function to read a string from the given stream.
538
539 @param stream data stream opened for reading
540 @type QDataStream
541 @return string read from the stream
542 @rtype str
543 """
544 data = stream.readString()
545 if data is None:
546 data = b""
547 return data.decode("utf-8")
548
549
550 def normalizeCode(codestring): 479 def normalizeCode(codestring):
551 """ 480 """
552 Function to normalize the given code. 481 Function to normalize the given code.
553 482
554 @param codestring code to be normalized 483 @param codestring code to be normalized
560 489
561 if codestring and codestring[-1] != "\n": 490 if codestring and codestring[-1] != "\n":
562 codestring += "\n" 491 codestring += "\n"
563 492
564 return codestring 493 return codestring
565
566
567 _escape = re.compile("[&<>\"'\u0080-\uffff]")
568
569 _escape_map = {
570 "&": "&amp;",
571 "<": "&lt;",
572 ">": "&gt;",
573 '"': "&quot;",
574 "'": "&#x27;",
575 }
576
577
578 def escape_entities(m, escmap=_escape_map):
579 """
580 Function to encode html entities.
581
582 @param m the match object
583 @type re.Match
584 @param escmap the map of entities to encode
585 @type dict
586 @return the converted text
587 @rtype str
588 """
589 char = m.group()
590 text = escmap.get(char)
591 if text is None:
592 text = "&#{0:d};".format(ord(char))
593 return text
594
595
596 def html_encode(text, pattern=_escape):
597 """
598 Function to correctly encode a text for html.
599
600 @param text text to be encoded
601 @type str
602 @param pattern search pattern for text to be encoded
603 @type str
604 @return the encoded text
605 @rtype str
606 """
607 if not text:
608 return ""
609 text = pattern.sub(escape_entities, text)
610 return text
611
612
613 _uescape = re.compile("[\u0080-\uffff]")
614
615
616 def escape_uentities(m):
617 """
618 Function to encode html entities.
619
620 @param m the match object
621 @type re.Match
622 @return the converted text
623 @rtype str
624 """
625 char = m.group()
626 text = "&#{0:d};".format(ord(char))
627 return text
628
629
630 def html_uencode(text, pattern=_uescape):
631 """
632 Function to correctly encode a unicode text for html.
633
634 @param text text to be encoded
635 @type str
636 @param pattern search pattern for text to be encoded
637 @type str
638 @return the encoded text
639 @rtype str
640 """
641 if not text:
642 return ""
643 text = pattern.sub(escape_uentities, text)
644 return text
645
646
647 _uunescape = re.compile(r"&#\d+;")
648
649
650 def unescape_uentities(m):
651 """
652 Function to decode html entities.
653
654 @param m the match object
655 @type re.Match
656 @return the converted text
657 @rtype str
658 """
659 char = m.group()
660 ordinal = int(char[2:-1])
661 return chr(ordinal)
662
663
664 def html_udecode(text, pattern=_uunescape):
665 """
666 Function to correctly decode a html text to a unicode text.
667
668 @param text text to be decoded
669 @type str
670 @param pattern search pattern for text to be decoded
671 @type str
672 @return the decoded text
673 @rtype str
674 """
675 if not text:
676 return ""
677 text = pattern.sub(unescape_uentities, text)
678 return text
679 494
680 495
681 def convertLineEnds(text, eol): 496 def convertLineEnds(text, eol):
682 """ 497 """
683 Function to convert the end of line characters. 498 Function to convert the end of line characters.

eric ide

mercurial