E4XML/XMLHandlerBase.py

changeset 0
de9c2efb9d02
child 12
1d8dd9706f46
equal deleted inserted replaced
-1:000000000000 0:de9c2efb9d02
1 # -*- coding: utf-8 -*-
2
3 # Copyright (c) 2004 - 2009 Detlev Offenbach <detlev@die-offenbachs.de>
4 #
5
6 """
7 Module implementing a base class for all of eric4s XML handlers.
8 """
9
10 import sys
11 from types import UnicodeType
12 try:
13 import cPickle as pickle
14 except ImportError:
15 import pickle
16
17 from xml.sax.handler import ContentHandler
18
19 class XMLHandlerBase(ContentHandler):
20 """
21 Class implementing the base class for al of eric4s XML handlers.
22 """
23 def __init__(self):
24 """
25 Constructor
26 """
27 self.startDocumentSpecific = None
28
29 self.elements = {
30 'none' : (self.defaultStartElement, self.endNone),
31 'int' : (self.defaultStartElement, self.endInt),
32 'long' : (self.defaultStartElement, self.endLong),
33 'float' : (self.defaultStartElement, self.endFloat),
34 'complex' : (self.defaultStartElement, self.endComplex),
35 'bool' : (self.defaultStartElement, self.endBool),
36 'string' : (self.defaultStartElement, self.endString),
37 'unicode' : (self.defaultStartElement, self.endUnicode),
38 'tuple' : (self.startTuple, self.endTuple),
39 'list' : (self.startList, self.endList),
40 'dict' : (self.startDictionary, self.endDictionary),
41 'pickle' : (self.startPickle, self.endPickle),
42 }
43
44 self.buffer = ""
45 self.stack = []
46 self._marker = '__MARKER__'
47
48 self.NEWPARA = unichr(0x2029)
49 self.NEWLINE = unichr(0x2028)
50
51 def utf8_to_code(self, text):
52 """
53 Public method to convert a string to unicode and encode it for XML.
54
55 @param text the text to encode (string)
56 """
57 if type(text) is not UnicodeType:
58 text = unicode(text, "utf-8")
59 return text
60
61 def unescape(self, text, attribute = False):
62 """
63 Public method used to unescape certain characters.
64
65 @param text the text to unescape (string)
66 @param attribute flag indicating unescaping is done for an attribute
67 """
68 if attribute:
69 return text.replace("&quot;",'"').replace("&gt;",">")\
70 .replace("&lt;","<").replace("&amp;","&")
71 else:
72 return text.replace("&gt;",">").replace("&lt;","<").replace("&amp;","&")
73
74 def decodedNewLines(self, text):
75 """
76 Public method to decode newlines and paragraph breaks.
77
78 @param text text to decode (string or QString)
79 """
80 return text.replace(self.NEWPARA, "\n\n").replace(self.NEWLINE, "\n")
81
82 def startDocument(self):
83 """
84 Handler called, when the document parsing is started.
85 """
86 self.buffer = ""
87 if self.startDocumentSpecific is not None:
88 self.startDocumentSpecific()
89
90 def startElement(self, name, attrs):
91 """
92 Handler called, when a starting tag is found.
93
94 @param name name of the tag (string)
95 @param attrs list of tag attributes
96 """
97 try:
98 self.elements[name][0](attrs)
99 except KeyError:
100 pass
101
102 def endElement(self, name):
103 """
104 Handler called, when an ending tag is found.
105
106 @param name name of the tag (string)
107 """
108 try:
109 self.elements[name][1]()
110 except KeyError:
111 pass
112
113 def characters(self, chars):
114 """
115 Handler called for ordinary text.
116
117 @param chars the scanned text (string)
118 """
119 self.buffer += chars
120
121 def defaultStartElement(self, attrs):
122 """
123 Handler method for common start tags.
124
125 @param attrs list of tag attributes
126 """
127 self.buffer = ""
128
129 def defaultEndElement(self):
130 """
131 Handler method for the common end tags.
132 """
133 pass
134
135 def _prepareBasics(self):
136 """
137 Protected method to prepare the parsing of XML for basic python types.
138 """
139 self.stack = []
140
141 ############################################################################
142 ## The various handler methods for basic types
143 ############################################################################
144
145 def endNone(self):
146 """
147 Handler method for the "none" end tag.
148 """
149 self.stack.append(None)
150
151 def endInt(self):
152 """
153 Handler method for the "int" end tag.
154 """
155 self.stack.append(int(self.buffer.strip()))
156
157 def endLong(self):
158 """
159 Handler method for the "long" end tag.
160 """
161 self.stack.append(long(self.buffer.strip()))
162
163 def endBool(self):
164 """
165 Handler method for the "bool" end tag.
166 """
167 if self.buffer.strip() == "True":
168 self.stack.append(True)
169 else:
170 self.stack.append(False)
171
172 def endFloat(self):
173 """
174 Handler method for the "float" end tag.
175 """
176 self.stack.append(float(self.buffer.strip()))
177
178 def endComplex(self):
179 """
180 Handler method for the "complex" end tag.
181 """
182 real, imag = self.buffer.strip().split()
183 self.stack.append(float(real) + float(imag)*1j)
184
185 def endString(self):
186 """
187 Handler method for the "string" end tag.
188 """
189 s = str(self.utf8_to_code(self.unescape(self.buffer)))
190 self.stack.append(s)
191
192 def endUnicode(self):
193 """
194 Handler method for the "unicode" end tag.
195 """
196 u = unicode(self.utf8_to_code(self.unescape(self.buffer)))
197 self.stack.append(u)
198
199 def startList(self, attrs):
200 """
201 Handler method for the "list" start tag.
202
203 @param attrs list of tag attributes
204 """
205 self.stack.append(self._marker)
206 self.stack.append([])
207
208 def endList(self):
209 """
210 Handler method for the "list" end tag.
211 """
212 for i in range(len(self.stack) - 1, -1, -1):
213 if self.stack[i] is self._marker:
214 break
215 assert i != -1
216 l = self.stack[i + 1]
217 l[:] = self.stack[i + 2:len(self.stack)]
218 self.stack[i:] = [l]
219
220 def startTuple(self, attrs):
221 """
222 Handler method for the "tuple" start tag.
223
224 @param attrs list of tag attributes
225 """
226 self.stack.append(self._marker)
227
228 def endTuple(self):
229 """
230 Handler method for the "tuple" end tag.
231 """
232 for i in range(len(self.stack) - 1, -1, -1):
233 if self.stack[i] is self._marker:
234 break
235 assert i != -1
236 t = tuple(self.stack[i + 1:len(self.stack)])
237 self.stack[i:] = [t]
238
239 def startDictionary(self, attrs):
240 """
241 Handler method for the "dictionary" start tag.
242
243 @param attrs list of tag attributes
244 """
245 self.stack.append(self._marker)
246 self.stack.append({})
247
248 def endDictionary(self):
249 """
250 Handler method for the "dictionary" end tag.
251 """
252 for i in range(len(self.stack) - 1, -1, -1):
253 if self.stack[i] is self._marker:
254 break
255 assert i != -1
256 d = self.stack[i + 1]
257 for j in range(i + 2, len(self.stack), 2):
258 d[self.stack[j]] = self.stack[j + 1]
259 self.stack[i:] = [d]
260
261 def startPickle(self, attrs):
262 """
263 Handler method for the "pickle" start tag.
264
265 @param attrs list of tag attributes
266 """
267 self.pickleEnc = attrs.get("encoding", "base64")
268
269 def endPickle(self):
270 """
271 Handler method for the "pickle" end tag.
272 """
273 pic = self.utf8_to_code(self.buffer).decode(self.pickleEnc)
274 self.stack.append(pickle.loads(pic))

eric ide

mercurial