|
1 # -*- coding: utf-8 -*- |
|
2 |
|
3 # Copyright (c) 2004 - 2009 Detlev Offenbach <detlev@die-offenbachs.de> |
|
4 # |
|
5 |
|
6 """ |
|
7 Module implementing a base class for all of eric4s XML handlers. |
|
8 """ |
|
9 |
|
10 import sys |
|
11 from types import UnicodeType |
|
12 try: |
|
13 import cPickle as pickle |
|
14 except ImportError: |
|
15 import pickle |
|
16 |
|
17 from xml.sax.handler import ContentHandler |
|
18 |
|
19 class XMLHandlerBase(ContentHandler): |
|
20 """ |
|
21 Class implementing the base class for al of eric4s XML handlers. |
|
22 """ |
|
23 def __init__(self): |
|
24 """ |
|
25 Constructor |
|
26 """ |
|
27 self.startDocumentSpecific = None |
|
28 |
|
29 self.elements = { |
|
30 'none' : (self.defaultStartElement, self.endNone), |
|
31 'int' : (self.defaultStartElement, self.endInt), |
|
32 'long' : (self.defaultStartElement, self.endLong), |
|
33 'float' : (self.defaultStartElement, self.endFloat), |
|
34 'complex' : (self.defaultStartElement, self.endComplex), |
|
35 'bool' : (self.defaultStartElement, self.endBool), |
|
36 'string' : (self.defaultStartElement, self.endString), |
|
37 'unicode' : (self.defaultStartElement, self.endUnicode), |
|
38 'tuple' : (self.startTuple, self.endTuple), |
|
39 'list' : (self.startList, self.endList), |
|
40 'dict' : (self.startDictionary, self.endDictionary), |
|
41 'pickle' : (self.startPickle, self.endPickle), |
|
42 } |
|
43 |
|
44 self.buffer = "" |
|
45 self.stack = [] |
|
46 self._marker = '__MARKER__' |
|
47 |
|
48 self.NEWPARA = unichr(0x2029) |
|
49 self.NEWLINE = unichr(0x2028) |
|
50 |
|
51 def utf8_to_code(self, text): |
|
52 """ |
|
53 Public method to convert a string to unicode and encode it for XML. |
|
54 |
|
55 @param text the text to encode (string) |
|
56 """ |
|
57 if type(text) is not UnicodeType: |
|
58 text = unicode(text, "utf-8") |
|
59 return text |
|
60 |
|
61 def unescape(self, text, attribute = False): |
|
62 """ |
|
63 Public method used to unescape certain characters. |
|
64 |
|
65 @param text the text to unescape (string) |
|
66 @param attribute flag indicating unescaping is done for an attribute |
|
67 """ |
|
68 if attribute: |
|
69 return text.replace(""",'"').replace(">",">")\ |
|
70 .replace("<","<").replace("&","&") |
|
71 else: |
|
72 return text.replace(">",">").replace("<","<").replace("&","&") |
|
73 |
|
74 def decodedNewLines(self, text): |
|
75 """ |
|
76 Public method to decode newlines and paragraph breaks. |
|
77 |
|
78 @param text text to decode (string or QString) |
|
79 """ |
|
80 return text.replace(self.NEWPARA, "\n\n").replace(self.NEWLINE, "\n") |
|
81 |
|
82 def startDocument(self): |
|
83 """ |
|
84 Handler called, when the document parsing is started. |
|
85 """ |
|
86 self.buffer = "" |
|
87 if self.startDocumentSpecific is not None: |
|
88 self.startDocumentSpecific() |
|
89 |
|
90 def startElement(self, name, attrs): |
|
91 """ |
|
92 Handler called, when a starting tag is found. |
|
93 |
|
94 @param name name of the tag (string) |
|
95 @param attrs list of tag attributes |
|
96 """ |
|
97 try: |
|
98 self.elements[name][0](attrs) |
|
99 except KeyError: |
|
100 pass |
|
101 |
|
102 def endElement(self, name): |
|
103 """ |
|
104 Handler called, when an ending tag is found. |
|
105 |
|
106 @param name name of the tag (string) |
|
107 """ |
|
108 try: |
|
109 self.elements[name][1]() |
|
110 except KeyError: |
|
111 pass |
|
112 |
|
113 def characters(self, chars): |
|
114 """ |
|
115 Handler called for ordinary text. |
|
116 |
|
117 @param chars the scanned text (string) |
|
118 """ |
|
119 self.buffer += chars |
|
120 |
|
121 def defaultStartElement(self, attrs): |
|
122 """ |
|
123 Handler method for common start tags. |
|
124 |
|
125 @param attrs list of tag attributes |
|
126 """ |
|
127 self.buffer = "" |
|
128 |
|
129 def defaultEndElement(self): |
|
130 """ |
|
131 Handler method for the common end tags. |
|
132 """ |
|
133 pass |
|
134 |
|
135 def _prepareBasics(self): |
|
136 """ |
|
137 Protected method to prepare the parsing of XML for basic python types. |
|
138 """ |
|
139 self.stack = [] |
|
140 |
|
141 ############################################################################ |
|
142 ## The various handler methods for basic types |
|
143 ############################################################################ |
|
144 |
|
145 def endNone(self): |
|
146 """ |
|
147 Handler method for the "none" end tag. |
|
148 """ |
|
149 self.stack.append(None) |
|
150 |
|
151 def endInt(self): |
|
152 """ |
|
153 Handler method for the "int" end tag. |
|
154 """ |
|
155 self.stack.append(int(self.buffer.strip())) |
|
156 |
|
157 def endLong(self): |
|
158 """ |
|
159 Handler method for the "long" end tag. |
|
160 """ |
|
161 self.stack.append(long(self.buffer.strip())) |
|
162 |
|
163 def endBool(self): |
|
164 """ |
|
165 Handler method for the "bool" end tag. |
|
166 """ |
|
167 if self.buffer.strip() == "True": |
|
168 self.stack.append(True) |
|
169 else: |
|
170 self.stack.append(False) |
|
171 |
|
172 def endFloat(self): |
|
173 """ |
|
174 Handler method for the "float" end tag. |
|
175 """ |
|
176 self.stack.append(float(self.buffer.strip())) |
|
177 |
|
178 def endComplex(self): |
|
179 """ |
|
180 Handler method for the "complex" end tag. |
|
181 """ |
|
182 real, imag = self.buffer.strip().split() |
|
183 self.stack.append(float(real) + float(imag)*1j) |
|
184 |
|
185 def endString(self): |
|
186 """ |
|
187 Handler method for the "string" end tag. |
|
188 """ |
|
189 s = str(self.utf8_to_code(self.unescape(self.buffer))) |
|
190 self.stack.append(s) |
|
191 |
|
192 def endUnicode(self): |
|
193 """ |
|
194 Handler method for the "unicode" end tag. |
|
195 """ |
|
196 u = unicode(self.utf8_to_code(self.unescape(self.buffer))) |
|
197 self.stack.append(u) |
|
198 |
|
199 def startList(self, attrs): |
|
200 """ |
|
201 Handler method for the "list" start tag. |
|
202 |
|
203 @param attrs list of tag attributes |
|
204 """ |
|
205 self.stack.append(self._marker) |
|
206 self.stack.append([]) |
|
207 |
|
208 def endList(self): |
|
209 """ |
|
210 Handler method for the "list" end tag. |
|
211 """ |
|
212 for i in range(len(self.stack) - 1, -1, -1): |
|
213 if self.stack[i] is self._marker: |
|
214 break |
|
215 assert i != -1 |
|
216 l = self.stack[i + 1] |
|
217 l[:] = self.stack[i + 2:len(self.stack)] |
|
218 self.stack[i:] = [l] |
|
219 |
|
220 def startTuple(self, attrs): |
|
221 """ |
|
222 Handler method for the "tuple" start tag. |
|
223 |
|
224 @param attrs list of tag attributes |
|
225 """ |
|
226 self.stack.append(self._marker) |
|
227 |
|
228 def endTuple(self): |
|
229 """ |
|
230 Handler method for the "tuple" end tag. |
|
231 """ |
|
232 for i in range(len(self.stack) - 1, -1, -1): |
|
233 if self.stack[i] is self._marker: |
|
234 break |
|
235 assert i != -1 |
|
236 t = tuple(self.stack[i + 1:len(self.stack)]) |
|
237 self.stack[i:] = [t] |
|
238 |
|
239 def startDictionary(self, attrs): |
|
240 """ |
|
241 Handler method for the "dictionary" start tag. |
|
242 |
|
243 @param attrs list of tag attributes |
|
244 """ |
|
245 self.stack.append(self._marker) |
|
246 self.stack.append({}) |
|
247 |
|
248 def endDictionary(self): |
|
249 """ |
|
250 Handler method for the "dictionary" end tag. |
|
251 """ |
|
252 for i in range(len(self.stack) - 1, -1, -1): |
|
253 if self.stack[i] is self._marker: |
|
254 break |
|
255 assert i != -1 |
|
256 d = self.stack[i + 1] |
|
257 for j in range(i + 2, len(self.stack), 2): |
|
258 d[self.stack[j]] = self.stack[j + 1] |
|
259 self.stack[i:] = [d] |
|
260 |
|
261 def startPickle(self, attrs): |
|
262 """ |
|
263 Handler method for the "pickle" start tag. |
|
264 |
|
265 @param attrs list of tag attributes |
|
266 """ |
|
267 self.pickleEnc = attrs.get("encoding", "base64") |
|
268 |
|
269 def endPickle(self): |
|
270 """ |
|
271 Handler method for the "pickle" end tag. |
|
272 """ |
|
273 pic = self.utf8_to_code(self.buffer).decode(self.pickleEnc) |
|
274 self.stack.append(pickle.loads(pic)) |