root/trunk/proj/w3c/xmlBuilder.py

Revision 399, 18.0 kB (checked in by sholloway, 5 years ago)

Some bugs revealed by removing 'init' from namespace set to a module's name

Line 
1 #!/usr/bin/env python
2 ##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
3 ##~ Copyright (C) 2002-2004  TechGame Networks, LLC.
4 ##~
5 ##~ This library is free software; you can redistribute it and/or
6 ##~ modify it under the terms of the BSD style License as found in the
7 ##~ LICENSE file included with this distribution.
8 ##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
9
10 """Abstract base classes for building Python object trees from an XML stream."""
11
12 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
13 #~ Imports                                           
14 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
15
16 import weakref
17 from timeit import default_timer as systemTimer
18
19 from xmlNamespaceMap import XMLNamespaceMap
20 from xmlParserContext import ExpatParserCmd
21
22 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
23 #~ Definitions
24 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
25
26 class ElementFactoryError(Exception):
27     def __init__(self, message, builder=None):
28         self.message = message
29         if builder is not None:
30             self.srcref = builder.getSourceFilenameAndLineNumber()
31         else:
32             self.srcref = '<unknown>'
33
34     def __str__(self):
35         return '%s in %s' % (self.srcref, self.message)
36
37 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
38 #~ Element Stack definition
39 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
40
41 class ElementStackAbstract(object):
42     topOffset = -1
43
44     def __init__(self, builder, stack=None):
45         self.setBuilder(builder)
46         if stack is None: self.stack = []
47         else: self.stack = stack
48
49     def __len__(self):
50         return len(self.stack)
51
52     def saveState(self):
53         return self.__class__(self.getBuilder(), self.stack[:])
54
55     def topElementOrNone(self):
56         if self.stack:
57             return self.stack[self.topOffset]
58         else: return None
59
60     def topElement(self):
61         return self.stack[self.topOffset]
62
63     def getElement(self, idx=-1, wrapped=True):
64         element = self.stack[idx]
65         if wrapped:
66             return element.xmlGetElement(self)
67         else: return element
68     __getitem__ = getElement
69
70     #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
71
72     def getBuilder(self):
73         return self.xmlBuilder()
74     def setBuilder(self, xmlBuilder):
75         self.xmlBuilder = xmlBuilder.asWeakRef()
76
77     #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
78
79     def buildElement(node, attributes, namespacemap):
80         raise NotImplementedError('Subclass Responsibility: %r' % (self,))
81     def push(self, element, node, nodeAttributes, nodeNSChain, srcref):
82         raise NotImplementedError('Subclass Responsibility: %r' % (self,))
83     def pushRaw(self, element):
84         raise NotImplementedError('Subclass Responsibility: %r' % (self,))
85     def topInitialize(self):
86         raise NotImplementedError('Subclass Responsibility: %r' % (self,))
87     def topAddData(self, data, srcref):
88         raise NotImplementedError('Subclass Responsibility: %r' % (self,))
89     def topAddComment(self, comment):
90         raise NotImplementedError('Subclass Responsibility: %r' % (self,))
91     def topFinalize(self):
92         raise NotImplementedError('Subclass Responsibility: %r' % (self,))
93     def pop(self, node):
94         raise NotImplementedError('Subclass Responsibility: %r' % (self,))
95     def popRaw(self):
96         raise NotImplementedError('Subclass Responsibility: %r' % (self,))
97
98 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
99 #~Reference Element Stack Implementation
100 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
101
102 class ElementStack(ElementStackAbstract):
103     """This implementation is tied closely with ElementBase"""
104
105     def buildElement(self, node, attributes, namespacemap):
106         elemBuilder = self.getBuilder()
107         args = (elemBuilder, self.topElementOrNone(), node, attributes, namespacemap)
108         buildFactory = elemBuilder.getElementFactory(*args)
109         element = buildFactory(*args)
110         element.xmlBuildCreate(elemBuilder)
111         return element
112
113     def push(self, element, node, nodeAttributes, nodeNSChain, srcref):
114         if self.stack:
115             self.topElement().xmlAddElement(self.getBuilder(), node, element.xmlGetElement(self), srcref)
116         self.pushRaw(element)
117     def pushRaw(self, element):
118         self.stack.append(element)
119
120     def topInitialize(self):
121         self.topElement().xmlInitStarted(self.getBuilder())
122     def topPreAddElement(self, node, attributes, srcref):
123         if self.stack:
124             self.topElement().xmlPreAddElement(self.getBuilder(), node, attributes, srcref)
125     def topAddData(self, data, srcref):
126         self.topElement().xmlAddData(self.getBuilder(), data, srcref)
127     def topAddComment(self, comment):
128         self.topElement().xmlAddComment(self.getBuilder(), comment)
129     def topFinalize(self):
130         self.topElement().xmlInitFinalized(self.getBuilder())
131
132     def pop(self):
133         element = self.popRaw()
134         element.xmlBuildComplete(self.getBuilder())
135         result = element.xmlGetElement(self.getBuilder())
136         if self.stack:
137             self.topElement().xmlPostAddElement(self.getBuilder(), result)
138         return result
139     def popRaw(self):
140         return self.stack.pop()
141
142 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
143
144 class ElementBase(object):
145     """Base class for objects created by ElementStack and related classes."""
146     def __init__(self, elemBuilder, parentElem, node, attributes, namespaceMap):
147         """This initialzer gets called only if the element class is used as it's factory"""
148     def xmlBuildCreate(self, elemBuilder):
149         """Called just after creation of the element, but before the element is pushed onto the stack"""
150     def xmlInitStarted(self, elemBuilder):
151         """Called after the element is pushed onto the stack, but before subnodes are explored"""
152     def xmlPreAddElement(self, elemBuilder, node, attributes, srcref):
153         """Called whenever a subelement is encountered for this element, just after creation"""
154     def xmlAddElement(self, elemBuilder, node, obj, srcref):
155         """Called whenever a subelement is encountered for this element, just after creation"""
156     def xmlPostAddElement(self, elemBuilder, obj):
157         """Called whenever a subelement for this element has completed building"""
158     def xmlAddData(self, elemBuilder, data, srcref):
159         """Called whenever CDATA is encountered for this element"""
160     def xmlAddComment(self, elemBuilder, comment):
161         """Called whenever Comment data is encountered for this element"""
162     def xmlInitFinalized(self, elemBuilder):
163         """Called after all subnodes have been iterated, but before the element is popped off the stack"""
164     def xmlBuildComplete(self, elemBuilder):
165         """Called after the element is popped off the stack, before it goes out of 'scope'"""
166     def xmlGetElement(self, elemBuilder):
167         """Called whenever the "resultant" element is requested.  Allows for delegation"""
168         return self
169
170     #~ WeakRef Callbacks ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
171
172     def asWeakRef(self):
173         return weakref.ref(self)
174     def asWeakProxy(self):
175         return weakref.proxy(self)
176
177 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
178 #~ XMLBuilder
179 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
180
181 class BuilderStatistics(object):
182     markTime = staticmethod(systemTimer)
183
184     def getResults(self, incHidden=False):
185         results = vars(self).items()
186         if not incHidden:
187             results = [(name, value) for name, value in results if not name.startswith('_')]
188         results.sort()
189         return results
190
191     def onBegin(self, skinner):
192         self.__dict__.clear()
193
194     def onEnd(self, skinner):
195         pass
196
197 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
198
199 class Attributes(dict):
200     def __init__(self, *args, **kw):
201         dict.__init__(self, *args, **kw)
202
203     def setMetaInfo(self, **kw):
204         for n,v in kw.items():
205             setattr(self, n, v)
206
207 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
208
209 class BuilderState(object):
210     def __init__(self, xmlbuilder):
211         self.elementStack = xmlbuilder.elementStack.saveState()
212         self.namespaceChain = xmlbuilder.namespaceChain
213         self._lastCompleteElement = xmlbuilder._lastCompleteElement
214         self._parserCmd = xmlbuilder._parserCmd
215
216     def restore(self, xmlbuilder):
217         xmlbuilder.elementStack = self.elementStack
218         xmlbuilder.namespaceChain = self.namespaceChain
219         xmlbuilder._lastCompleteElement = self._lastCompleteElement
220         xmlbuilder._parserCmd = self._parserCmd
221         return True
222
223 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
224
225 class XMLBuilder(object):
226     """Guides the building of python objects from XML. 
227
228     Depends upon the interface defined by ElementBase.
229
230     See xmlNode or xmlClassBuilder for more concrete builders.
231     """
232
233     #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
234     #~ Constants / Variables / Etc.
235     #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
236
237     seperator = ' '
238     statistics = None
239
240     xmlnsSynonyms = {}
241
242     ElementFactory = ElementBase
243     AttributeFactory = Attributes
244     ElementStackFactory = ElementStack
245     BuilderStateFactory = BuilderState
246     StatisticsFactory = BuilderStatistics
247     XMLParserFactory = ExpatParserCmd
248
249     #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
250     #~ Protected Methods
251     #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
252
253     def _xmlnsGetSynonym(self, uri):
254         # Don't forget to substitute our namespace synonyms!
255         return self.xmlnsSynonyms.get(uri or None, uri) or None
256
257     def _startNamespaceDeclHandler(self, prefix, uri):
258         """Part of the tree-style template method, called at the before the beginning of an XML node parse
259         to manage namespaces."""
260         # Add the prefix/uri to our current namespace mapping
261         self.namespaceChain.setxmlns(prefix, self._xmlnsGetSynonym(uri))
262
263     def _endNamespaceDeclHandler(self, prefix):
264         """Part of the tree-style template method, called at the after the end of an XML node parse
265         to manage namespaces."""
266         pass
267
268     def _getAttributes(self, attributes):
269         return self.AttributeFactory(attributes)
270
271     def getElementFactory(self, elemBuilder, element, node, attributes, namespacemap):
272         return self.ElementFactory
273
274     def getElementStack(self):
275         return self.elementStack
276
277     def _startElement(self, name, attributes):
278         srcref = self._getSourceFilenameAndLineNumber()
279         return self._startElementEx(name, attributes, srcref)
280
281     def _startElementEx(self, name, attributes, srcref):
282         """Part of the tree-style template method, called at the beginning of an XML node parse.
283         Instantiates the element returned by getElementFactory."""
284         node = self._splitQualifiedName(name)
285         nodeNSChain = self.namespaceChain
286
287         nodeAttributes = self._getAttributes(attributes)
288         nodeAttributes.setMetaInfo(node=node, srcref=srcref)
289
290         self.elementStack.topPreAddElement(node, attributes, srcref)
291
292         self.namespaceChain = self.namespaceChain.newChain()
293
294         element = self.elementStack.buildElement(node, nodeAttributes, nodeNSChain)
295         self.elementStack.push(element, node, nodeAttributes, nodeNSChain, srcref)
296         self.elementStack.topInitialize()
297         return element
298
299     def _charData(self, data):
300         """Part of the tree-style template method, called when CData is found."""
301         srcref = self._getSourceFilenameAndLineNumber()
302         self._charDataEx(data, srcref)
303
304     def _charDataEx(self, data, srcref):
305         return self.elementStack.topAddData(data, srcref)
306
307     def _commentData(self, data):
308         """Part of the tree-style template method, called when Comment data is found."""
309         return self.elementStack.topAddComment(data)
310
311     def _endElement(self, name):
312         """Part of the tree-style template method, called at the closing of an XML node parse.
313         Simply notifies the element that it is complete."""
314         if self.elementStack:
315             self.elementStack.topFinalize()
316             self._lastCompleteElement = self.elementStack.pop()
317             self.namespaceChain = self.namespaceChain.next().copy()
318         else:
319             self.namespaceChain = None
320             self._lastCompleteElement = None
321
322         self.statistics.elements += 1
323         return self._lastCompleteElement
324
325     def _splitQualifiedName(self, combined):
326         # rsplit
327         idx = combined.rfind(self.seperator)
328         if idx < 0: result = [None, combined]
329         else: result = [combined[:idx], combined[idx+len(self.seperator):]]
330         # end rsplit
331        
332         return self._xmlnsGetSynonym(result[0]), result[1]
333
334     #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
335     #~ state save and restore - for advanced usage
336     #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
337
338     def saveState(self, *args, **kw):
339         """Returns a state token that can be given to restoreState"""
340         return self.BuilderStateFactory(self, *args, **kw)
341
342     def restoreState(self, state, *args, **kw):
343         """Uses the stack state token returned by saveState to restore the
344         parsing stack to a prior level."""
345         return state.restore(self, *args, **kw)
346
347     #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
348     #~ parse template method
349     #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
350    
351     def parse(self, xmlString, parseContext=None, **kw):
352         """Starts the building of python objects using the XML parser.  Assumes
353         first argument is string-like object."""
354         return self.parseEx(xmlString, 'data', parseContext, **kw)
355
356     def parseFile(self, xmlFile, parseContext=None, **kw):
357         """Starts the building of python objects using the XML parser.  Assumes
358         first argument is a file-like object."""
359         return self.parseEx(xmlFile, 'file', parseContext, **kw)
360
361     #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
362
363     def getBuildResult(self):
364         return self._lastCompleteElement
365
366     #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
367     #~ Weakref Utils
368     #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
369
370     def asWeakRef(self):
371         return weakref.ref(self)
372     def asWeakProxy(self):
373         return weakref.proxy(self)
374
375     #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
376     #~ Current Settings
377     #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
378
379     _uri = None
380     def getURI(self):
381         parserCmd = self.getParserCmd()
382         if parserCmd is not None:
383             return parserCmd.getURI()
384         else:
385             return None
386     uri = property(getURI)
387
388     def _getSourceFilenameAndLineNumber(self):
389         parserCmd = self.getParserCmd()
390         if parserCmd is not None:
391             return parserCmd.getSourceFilenameAndLineNumber()
392         else:
393             return None
394
395     _parserCmd = None
396     def getParserCmd(self):
397         return self._parserCmd
398     def setParserCmd(self, parserCmd):
399         previous = self._parserCmd
400         self._parserCmd = parserCmd
401         return previous
402    
403     #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
404     #~ Protected support
405     #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
406
407     def _createParserCmd(self, xmlSource, parserInterface, parseContext):
408         """Creates a callable object with interface specified by parserInterface using self.XMLParserFactory
409
410         parserInterface:
411             'data': returns a callable that takes a string of the XML document
412             'file': returns a callable that takes a file-like interface to the XML document
413             'raw': returns the raw parser object
414         """
415         return self.XMLParserFactory(self, xmlSource, parserInterface, parseContext)
416
417     #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
418
419     def parseEx(self, xmlSource, parserInterface, parseContext, **kw):
420         parserCmd = self._createParserCmd(xmlSource, parserInterface, parseContext)
421         return self.parseCmd(parserCmd, **kw)
422
423     def parseExRaw(self, xmlSource, parserInterface, parseContext, **kw):
424         parserCmd = self._createParserCmd(xmlSource, parserInterface, parseContext)
425         return self.parseCmdRaw(parserCmd, **kw)
426
427     #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
428
429     def parseCmd(self, parserCmd, **kw):
430         self._preParse()
431         try:
432             result = self.parseCmdRaw(parserCmd, **kw)
433         except:
434             # shutdown the parser, and reraise the execption
435             self._abortParse()
436             raise
437         else:
438             self._postParse()
439
440         return result
441
442     def parseCmdRaw(self, parserCmd, onBeforeParse=None, onAfterParse=None):
443         previous = self.setParserCmd(parserCmd)
444         try:
445             if onBeforeParse:
446                 onBeforeParse(self, parserCmd)
447
448             parserCmd()
449
450             if onAfterParse:
451                 onAfterParse(self, parserCmd)
452         finally:
453             self.setParserCmd(previous)
454
455         return self.getBuildResult()
456
457     #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
458
459     def _preParse(self):
460         if hasattr(self, '_lastCompleteElement'):
461             raise RuntimeError("An xml parse is already in progress from this instance!")
462
463         self.elementStack = self.ElementStackFactory(self)
464         self.namespaceChain = XMLNamespaceMap()
465         self._lastCompleteElement = None
466
467         if self.statistics is None:
468             self.statistics = self.StatisticsFactory()
469         self.statistics.onBegin(self)
470         self.statistics.uri = self.getURI()
471         self.statistics.elements = 0
472         self.statistics._startTime = self.statistics.markTime()
473
474     def _postParse(self):
475         del self.elementStack
476         del self.namespaceChain
477         del self._lastCompleteElement
478
479         ## Statistics related
480         self.statistics._endTime = self.statistics.markTime()
481         self.statistics.deltaTime = self.statistics._endTime - self.statistics._startTime
482         self.statistics.elemPerSec = self.statistics.elements / max(1e-10, self.statistics.deltaTime)
483         self.statistics.secPerElem = self.statistics.deltaTime / max(1, self.statistics.elements)
484         self.statistics.onEnd(self)
485
486     def _abortParse(self):
487         if hasattr(self, '_lastCompleteElement'):
488             del self._lastCompleteElement
489
Note: See TracBrowser for help on using the browser.