Thu, 10 Jan 2019 14:23:49 +0100
Created global tag <release-19.01>.
5843
76eee727ccd9
Updated jasy to 1.5-beta5 (latest release available).
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
1 | # |
76eee727ccd9
Updated jasy to 1.5-beta5 (latest release available).
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
2 | # Jasy - Web Tooling Framework |
76eee727ccd9
Updated jasy to 1.5-beta5 (latest release available).
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
3 | # Copyright 2010-2012 Zynga Inc. |
76eee727ccd9
Updated jasy to 1.5-beta5 (latest release available).
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
4 | # |
76eee727ccd9
Updated jasy to 1.5-beta5 (latest release available).
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
5 | |
76eee727ccd9
Updated jasy to 1.5-beta5 (latest release available).
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
6 | from __future__ import unicode_literals |
76eee727ccd9
Updated jasy to 1.5-beta5 (latest release available).
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
7 | |
76eee727ccd9
Updated jasy to 1.5-beta5 (latest release available).
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
8 | import re |
76eee727ccd9
Updated jasy to 1.5-beta5 (latest release available).
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
9 | import jasy.core.Console as Console |
76eee727ccd9
Updated jasy to 1.5-beta5 (latest release available).
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
10 | |
76eee727ccd9
Updated jasy to 1.5-beta5 (latest release available).
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
11 | __all__ = ["extractSummary"] |
76eee727ccd9
Updated jasy to 1.5-beta5 (latest release available).
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
12 | |
76eee727ccd9
Updated jasy to 1.5-beta5 (latest release available).
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
13 | # Used to filter first paragraph from HTML |
76eee727ccd9
Updated jasy to 1.5-beta5 (latest release available).
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
14 | paragraphExtract = re.compile(r"^(.*?)(\. |\? |\! |$)") |
76eee727ccd9
Updated jasy to 1.5-beta5 (latest release available).
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
15 | newlineMatcher = re.compile(r"\n") |
76eee727ccd9
Updated jasy to 1.5-beta5 (latest release available).
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
16 | |
76eee727ccd9
Updated jasy to 1.5-beta5 (latest release available).
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
17 | # Used to remove markup sequences after doc processing of comment text |
76eee727ccd9
Updated jasy to 1.5-beta5 (latest release available).
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
18 | stripMarkup = re.compile(r"<.*?>") |
76eee727ccd9
Updated jasy to 1.5-beta5 (latest release available).
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
19 | |
76eee727ccd9
Updated jasy to 1.5-beta5 (latest release available).
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
20 | def extractSummary(text): |
76eee727ccd9
Updated jasy to 1.5-beta5 (latest release available).
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
21 | try: |
76eee727ccd9
Updated jasy to 1.5-beta5 (latest release available).
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
22 | text = stripMarkup.sub("", newlineMatcher.sub(" ", text)) |
76eee727ccd9
Updated jasy to 1.5-beta5 (latest release available).
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
23 | matched = paragraphExtract.match(text) |
76eee727ccd9
Updated jasy to 1.5-beta5 (latest release available).
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
24 | except TypeError: |
76eee727ccd9
Updated jasy to 1.5-beta5 (latest release available).
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
25 | matched = None |
76eee727ccd9
Updated jasy to 1.5-beta5 (latest release available).
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
26 | |
76eee727ccd9
Updated jasy to 1.5-beta5 (latest release available).
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
27 | if matched: |
76eee727ccd9
Updated jasy to 1.5-beta5 (latest release available).
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
28 | summary = matched.group(1) |
76eee727ccd9
Updated jasy to 1.5-beta5 (latest release available).
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
29 | if summary is not None: |
76eee727ccd9
Updated jasy to 1.5-beta5 (latest release available).
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
30 | if not summary.endswith((".", "!", "?")): |
76eee727ccd9
Updated jasy to 1.5-beta5 (latest release available).
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
31 | summary = summary.strip() + "." |
76eee727ccd9
Updated jasy to 1.5-beta5 (latest release available).
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
32 | return summary |
76eee727ccd9
Updated jasy to 1.5-beta5 (latest release available).
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
33 | |
76eee727ccd9
Updated jasy to 1.5-beta5 (latest release available).
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
34 | else: |
76eee727ccd9
Updated jasy to 1.5-beta5 (latest release available).
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
35 | Console.warn("Unable to extract summary for: %s", text) |
76eee727ccd9
Updated jasy to 1.5-beta5 (latest release available).
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
36 | |
76eee727ccd9
Updated jasy to 1.5-beta5 (latest release available).
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
37 | return None |
76eee727ccd9
Updated jasy to 1.5-beta5 (latest release available).
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
38 |