|
1 # |
|
2 # Jasy - Web Tooling Framework |
|
3 # Copyright 2010-2012 Zynga Inc. |
|
4 # |
|
5 |
|
6 from __future__ import unicode_literals |
|
7 |
|
8 import re |
|
9 import jasy.core.Console as Console |
|
10 |
|
11 __all__ = ["extractSummary"] |
|
12 |
|
13 # Used to filter first paragraph from HTML |
|
14 paragraphExtract = re.compile(r"^(.*?)(\. |\? |\! |$)") |
|
15 newlineMatcher = re.compile(r"\n") |
|
16 |
|
17 # Used to remove markup sequences after doc processing of comment text |
|
18 stripMarkup = re.compile(r"<.*?>") |
|
19 |
|
20 def extractSummary(text): |
|
21 try: |
|
22 text = stripMarkup.sub("", newlineMatcher.sub(" ", text)) |
|
23 matched = paragraphExtract.match(text) |
|
24 except TypeError: |
|
25 matched = None |
|
26 |
|
27 if matched: |
|
28 summary = matched.group(1) |
|
29 if summary is not None: |
|
30 if not summary.endswith((".", "!", "?")): |
|
31 summary = summary.strip() + "." |
|
32 return summary |
|
33 |
|
34 else: |
|
35 Console.warn("Unable to extract summary for: %s", text) |
|
36 |
|
37 return None |
|
38 |