435 @return the encoded text (string) |
435 @return the encoded text (string) |
436 """ |
436 """ |
437 if not text: |
437 if not text: |
438 return "" |
438 return "" |
439 text = pattern.sub(escape_uentities, text) |
439 text = pattern.sub(escape_uentities, text) |
|
440 return text |
|
441 |
|
442 _uunescape = re.compile('&#\d+;') |
|
443 |
|
444 |
|
445 def unescape_uentities(m): |
|
446 """ |
|
447 Function to decode html entities. |
|
448 |
|
449 @param m the match object |
|
450 @return the converted text (string) |
|
451 """ |
|
452 char = m.group() |
|
453 ord = int(char[2:-1]) |
|
454 return chr(ord) |
|
455 |
|
456 |
|
457 def html_udecode(text, pattern=_uunescape): |
|
458 """ |
|
459 Function to correctly decode a html text to a unicode text. |
|
460 |
|
461 @param text text to be decoded (string) |
|
462 @param pattern search pattern for text to be decoded (string) |
|
463 @return the decoded text (string) |
|
464 """ |
|
465 if not text: |
|
466 return "" |
|
467 text = pattern.sub(unescape_uentities, text) |
440 return text |
468 return text |
441 |
469 |
442 |
470 |
443 def convertLineEnds(text, eol): |
471 def convertLineEnds(text, eol): |
444 """ |
472 """ |