Utilities/__init__.py

changeset 1732
b140a24e147a
parent 1659
d5215347c209
child 1814
2da3b3749cac
equal deleted inserted replaced
1731:56cf9c150dbf 1732:b140a24e147a
435 @return the encoded text (string) 435 @return the encoded text (string)
436 """ 436 """
437 if not text: 437 if not text:
438 return "" 438 return ""
439 text = pattern.sub(escape_uentities, text) 439 text = pattern.sub(escape_uentities, text)
440 return text
441
442 _uunescape = re.compile('&#\d+;')
443
444
445 def unescape_uentities(m):
446 """
447 Function to decode html entities.
448
449 @param m the match object
450 @return the converted text (string)
451 """
452 char = m.group()
453 ord = int(char[2:-1])
454 return chr(ord)
455
456
457 def html_udecode(text, pattern=_uunescape):
458 """
459 Function to correctly decode a html text to a unicode text.
460
461 @param text text to be decoded (string)
462 @param pattern search pattern for text to be decoded (string)
463 @return the decoded text (string)
464 """
465 if not text:
466 return ""
467 text = pattern.sub(unescape_uentities, text)
440 return text 468 return text
441 469
442 470
443 def convertLineEnds(text, eol): 471 def convertLineEnds(text, eol):
444 """ 472 """

eric ide

mercurial