Package CedarBackup2 :: Module xmlutil
[hide private]
[frames] | no frames]

Source Code for Module CedarBackup2.xmlutil

  1  # -*- coding: iso-8859-1 -*- 
  2  # vim: set ft=python ts=3 sw=3 expandtab: 
  3  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
  4  # 
  5  #              C E D A R 
  6  #          S O L U T I O N S       "Software done right." 
  7  #           S O F T W A R E 
  8  # 
  9  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
 10  # 
 11  # Copyright (c) 2004-2006,2010 Kenneth J. Pronovici. 
 12  # All rights reserved. 
 13  # 
 14  # Portions Copyright (c) 2000 Fourthought Inc, USA. 
 15  # All Rights Reserved. 
 16  # 
 17  # This program is free software; you can redistribute it and/or 
 18  # modify it under the terms of the GNU General Public License, 
 19  # Version 2, as published by the Free Software Foundation. 
 20  # 
 21  # This program is distributed in the hope that it will be useful, 
 22  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 23  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 
 24  # 
 25  # Copies of the GNU General Public License are available from 
 26  # the Free Software Foundation website, http://www.gnu.org/. 
 27  # 
 28  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
 29  # 
 30  # Author   : Kenneth J. Pronovici <pronovic@ieee.org> 
 31  # Language : Python (>= 2.5) 
 32  # Project  : Cedar Backup, release 2 
 33  # Revision : $Id: xmlutil.py 1096 2015-01-05 20:24:25Z pronovic $ 
 34  # Purpose  : Provides general XML-related functionality. 
 35  # 
 36  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
 37   
 38  ######################################################################## 
 39  # Module documentation 
 40  ######################################################################## 
 41   
 42  """ 
 43  Provides general XML-related functionality. 
 44   
 45  What I'm trying to do here is abstract much of the functionality that directly 
 46  accesses the DOM tree.  This is not so much to "protect" the other code from 
 47  the DOM, but to standardize the way it's used.  It will also help extension 
 48  authors write code that easily looks more like the rest of Cedar Backup. 
 49   
 50  @sort: createInputDom, createOutputDom, serializeDom, isElement, readChildren,  
 51         readFirstChild, readStringList, readString, readInteger, readBoolean, 
 52         addContainerNode, addStringNode, addIntegerNode, addBooleanNode, 
 53         TRUE_BOOLEAN_VALUES, FALSE_BOOLEAN_VALUES, VALID_BOOLEAN_VALUES 
 54   
 55  @var TRUE_BOOLEAN_VALUES: List of boolean values in XML representing C{True}. 
 56  @var FALSE_BOOLEAN_VALUES: List of boolean values in XML representing C{False}. 
 57  @var VALID_BOOLEAN_VALUES: List of valid boolean values in XML. 
 58   
 59  @author: Kenneth J. Pronovici <pronovic@ieee.org> 
 60  """ 
 61  # pylint: disable=C0111,C0103,W0511,W0104,W0106 
 62   
 63  ######################################################################## 
 64  # Imported modules 
 65  ######################################################################## 
 66   
 67  # System modules 
 68  import sys 
 69  import re 
 70  import logging 
 71  import codecs 
 72  from types import UnicodeType 
 73  from StringIO import StringIO 
 74   
 75  # XML-related modules 
 76  from xml.parsers.expat import ExpatError 
 77  from xml.dom.minidom import Node 
 78  from xml.dom.minidom import getDOMImplementation 
 79  from xml.dom.minidom import parseString 
 80   
 81   
 82  ######################################################################## 
 83  # Module-wide constants and variables 
 84  ######################################################################## 
 85   
 86  logger = logging.getLogger("CedarBackup2.log.xml") 
 87   
 88  TRUE_BOOLEAN_VALUES   = [ "Y", "y", ] 
 89  FALSE_BOOLEAN_VALUES  = [ "N", "n", ] 
 90  VALID_BOOLEAN_VALUES  = TRUE_BOOLEAN_VALUES + FALSE_BOOLEAN_VALUES 
 91   
 92   
 93  ######################################################################## 
 94  # Functions for creating and parsing DOM trees 
 95  ######################################################################## 
 96   
97 -def createInputDom(xmlData, name="cb_config"):
98 """ 99 Creates a DOM tree based on reading an XML string. 100 @param name: Assumed base name of the document (root node name). 101 @return: Tuple (xmlDom, parentNode) for the parsed document 102 @raise ValueError: If the document can't be parsed. 103 """ 104 try: 105 xmlDom = parseString(xmlData) 106 parentNode = readFirstChild(xmlDom, name) 107 return (xmlDom, parentNode) 108 except (IOError, ExpatError), e: 109 raise ValueError("Unable to parse XML document: %s" % e)
110
111 -def createOutputDom(name="cb_config"):
112 """ 113 Creates a DOM tree used for writing an XML document. 114 @param name: Base name of the document (root node name). 115 @return: Tuple (xmlDom, parentNode) for the new document 116 """ 117 impl = getDOMImplementation() 118 xmlDom = impl.createDocument(None, name, None) 119 return (xmlDom, xmlDom.documentElement)
120 121 122 ######################################################################## 123 # Functions for reading values out of XML documents 124 ######################################################################## 125
126 -def isElement(node):
127 """ 128 Returns True or False depending on whether the XML node is an element node. 129 """ 130 return node.nodeType == Node.ELEMENT_NODE
131
132 -def readChildren(parent, name):
133 """ 134 Returns a list of nodes with a given name immediately beneath the 135 parent. 136 137 By "immediately beneath" the parent, we mean from among nodes that are 138 direct children of the passed-in parent node. 139 140 Underneath, we use the Python C{getElementsByTagName} method, which is 141 pretty cool, but which (surprisingly?) returns a list of all children 142 with a given name below the parent, at any level. We just prune that 143 list to include only children whose C{parentNode} matches the passed-in 144 parent. 145 146 @param parent: Parent node to search beneath. 147 @param name: Name of nodes to search for. 148 149 @return: List of child nodes with correct parent, or an empty list if 150 no matching nodes are found. 151 """ 152 lst = [] 153 if parent is not None: 154 result = parent.getElementsByTagName(name) 155 for entry in result: 156 if entry.parentNode is parent: 157 lst.append(entry) 158 return lst
159
160 -def readFirstChild(parent, name):
161 """ 162 Returns the first child with a given name immediately beneath the parent. 163 164 By "immediately beneath" the parent, we mean from among nodes that are 165 direct children of the passed-in parent node. 166 167 @param parent: Parent node to search beneath. 168 @param name: Name of node to search for. 169 170 @return: First properly-named child of parent, or C{None} if no matching nodes are found. 171 """ 172 result = readChildren(parent, name) 173 if result is None or result == []: 174 return None 175 return result[0]
176
177 -def readStringList(parent, name):
178 """ 179 Returns a list of the string contents associated with nodes with a given 180 name immediately beneath the parent. 181 182 By "immediately beneath" the parent, we mean from among nodes that are 183 direct children of the passed-in parent node. 184 185 First, we find all of the nodes using L{readChildren}, and then we 186 retrieve the "string contents" of each of those nodes. The returned list 187 has one entry per matching node. We assume that string contents of a 188 given node belong to the first C{TEXT_NODE} child of that node. Nodes 189 which have no C{TEXT_NODE} children are not represented in the returned 190 list. 191 192 @param parent: Parent node to search beneath. 193 @param name: Name of node to search for. 194 195 @return: List of strings as described above, or C{None} if no matching nodes are found. 196 """ 197 lst = [] 198 result = readChildren(parent, name) 199 for entry in result: 200 if entry.hasChildNodes(): 201 for child in entry.childNodes: 202 if child.nodeType == Node.TEXT_NODE: 203 lst.append(child.nodeValue) 204 break 205 if lst == []: 206 lst = None 207 return lst
208
209 -def readString(parent, name):
210 """ 211 Returns string contents of the first child with a given name immediately 212 beneath the parent. 213 214 By "immediately beneath" the parent, we mean from among nodes that are 215 direct children of the passed-in parent node. We assume that string 216 contents of a given node belong to the first C{TEXT_NODE} child of that 217 node. 218 219 @param parent: Parent node to search beneath. 220 @param name: Name of node to search for. 221 222 @return: String contents of node or C{None} if no matching nodes are found. 223 """ 224 result = readStringList(parent, name) 225 if result is None: 226 return None 227 return result[0]
228
229 -def readInteger(parent, name):
230 """ 231 Returns integer contents of the first child with a given name immediately 232 beneath the parent. 233 234 By "immediately beneath" the parent, we mean from among nodes that are 235 direct children of the passed-in parent node. 236 237 @param parent: Parent node to search beneath. 238 @param name: Name of node to search for. 239 240 @return: Integer contents of node or C{None} if no matching nodes are found. 241 @raise ValueError: If the string at the location can't be converted to an integer. 242 """ 243 result = readString(parent, name) 244 if result is None: 245 return None 246 else: 247 return int(result)
248
249 -def readLong(parent, name):
250 """ 251 Returns long integer contents of the first child with a given name immediately 252 beneath the parent. 253 254 By "immediately beneath" the parent, we mean from among nodes that are 255 direct children of the passed-in parent node. 256 257 @param parent: Parent node to search beneath. 258 @param name: Name of node to search for. 259 260 @return: Long integer contents of node or C{None} if no matching nodes are found. 261 @raise ValueError: If the string at the location can't be converted to an integer. 262 """ 263 result = readString(parent, name) 264 if result is None: 265 return None 266 else: 267 return long(result)
268
269 -def readFloat(parent, name):
270 """ 271 Returns float contents of the first child with a given name immediately 272 beneath the parent. 273 274 By "immediately beneath" the parent, we mean from among nodes that are 275 direct children of the passed-in parent node. 276 277 @param parent: Parent node to search beneath. 278 @param name: Name of node to search for. 279 280 @return: Float contents of node or C{None} if no matching nodes are found. 281 @raise ValueError: If the string at the location can't be converted to a 282 float value. 283 """ 284 result = readString(parent, name) 285 if result is None: 286 return None 287 else: 288 return float(result)
289
290 -def readBoolean(parent, name):
291 """ 292 Returns boolean contents of the first child with a given name immediately 293 beneath the parent. 294 295 By "immediately beneath" the parent, we mean from among nodes that are 296 direct children of the passed-in parent node. 297 298 The string value of the node must be one of the values in L{VALID_BOOLEAN_VALUES}. 299 300 @param parent: Parent node to search beneath. 301 @param name: Name of node to search for. 302 303 @return: Boolean contents of node or C{None} if no matching nodes are found. 304 @raise ValueError: If the string at the location can't be converted to a boolean. 305 """ 306 result = readString(parent, name) 307 if result is None: 308 return None 309 else: 310 if result in TRUE_BOOLEAN_VALUES: 311 return True 312 elif result in FALSE_BOOLEAN_VALUES: 313 return False 314 else: 315 raise ValueError("Boolean values must be one of %s." % VALID_BOOLEAN_VALUES)
316 317 318 ######################################################################## 319 # Functions for writing values into XML documents 320 ######################################################################## 321
322 -def addContainerNode(xmlDom, parentNode, nodeName):
323 """ 324 Adds a container node as the next child of a parent node. 325 326 @param xmlDom: DOM tree as from C{impl.createDocument()}. 327 @param parentNode: Parent node to create child for. 328 @param nodeName: Name of the new container node. 329 330 @return: Reference to the newly-created node. 331 """ 332 containerNode = xmlDom.createElement(nodeName) 333 parentNode.appendChild(containerNode) 334 return containerNode
335
336 -def addStringNode(xmlDom, parentNode, nodeName, nodeValue):
337 """ 338 Adds a text node as the next child of a parent, to contain a string. 339 340 If the C{nodeValue} is None, then the node will be created, but will be 341 empty (i.e. will contain no text node child). 342 343 @param xmlDom: DOM tree as from C{impl.createDocument()}. 344 @param parentNode: Parent node to create child for. 345 @param nodeName: Name of the new container node. 346 @param nodeValue: The value to put into the node. 347 348 @return: Reference to the newly-created node. 349 """ 350 containerNode = addContainerNode(xmlDom, parentNode, nodeName) 351 if nodeValue is not None: 352 textNode = xmlDom.createTextNode(nodeValue) 353 containerNode.appendChild(textNode) 354 return containerNode
355
356 -def addIntegerNode(xmlDom, parentNode, nodeName, nodeValue):
357 """ 358 Adds a text node as the next child of a parent, to contain an integer. 359 360 If the C{nodeValue} is None, then the node will be created, but will be 361 empty (i.e. will contain no text node child). 362 363 The integer will be converted to a string using "%d". The result will be 364 added to the document via L{addStringNode}. 365 366 @param xmlDom: DOM tree as from C{impl.createDocument()}. 367 @param parentNode: Parent node to create child for. 368 @param nodeName: Name of the new container node. 369 @param nodeValue: The value to put into the node. 370 371 @return: Reference to the newly-created node. 372 """ 373 if nodeValue is None: 374 return addStringNode(xmlDom, parentNode, nodeName, None) 375 else: 376 return addStringNode(xmlDom, parentNode, nodeName, "%d" % nodeValue) # %d works for both int and long
377
378 -def addLongNode(xmlDom, parentNode, nodeName, nodeValue):
379 """ 380 Adds a text node as the next child of a parent, to contain a long integer. 381 382 If the C{nodeValue} is None, then the node will be created, but will be 383 empty (i.e. will contain no text node child). 384 385 The integer will be converted to a string using "%d". The result will be 386 added to the document via L{addStringNode}. 387 388 @param xmlDom: DOM tree as from C{impl.createDocument()}. 389 @param parentNode: Parent node to create child for. 390 @param nodeName: Name of the new container node. 391 @param nodeValue: The value to put into the node. 392 393 @return: Reference to the newly-created node. 394 """ 395 if nodeValue is None: 396 return addStringNode(xmlDom, parentNode, nodeName, None) 397 else: 398 return addStringNode(xmlDom, parentNode, nodeName, "%d" % nodeValue) # %d works for both int and long
399
400 -def addBooleanNode(xmlDom, parentNode, nodeName, nodeValue):
401 """ 402 Adds a text node as the next child of a parent, to contain a boolean. 403 404 If the C{nodeValue} is None, then the node will be created, but will be 405 empty (i.e. will contain no text node child). 406 407 Boolean C{True}, or anything else interpreted as C{True} by Python, will 408 be converted to a string "Y". Anything else will be converted to a 409 string "N". The result is added to the document via L{addStringNode}. 410 411 @param xmlDom: DOM tree as from C{impl.createDocument()}. 412 @param parentNode: Parent node to create child for. 413 @param nodeName: Name of the new container node. 414 @param nodeValue: The value to put into the node. 415 416 @return: Reference to the newly-created node. 417 """ 418 if nodeValue is None: 419 return addStringNode(xmlDom, parentNode, nodeName, None) 420 else: 421 if nodeValue: 422 return addStringNode(xmlDom, parentNode, nodeName, "Y") 423 else: 424 return addStringNode(xmlDom, parentNode, nodeName, "N")
425 426 427 ######################################################################## 428 # Functions for serializing DOM trees 429 ######################################################################## 430
431 -def serializeDom(xmlDom, indent=3):
432 """ 433 Serializes a DOM tree and returns the result in a string. 434 @param xmlDom: XML DOM tree to serialize 435 @param indent: Number of spaces to indent, as an integer 436 @return: String form of DOM tree, pretty-printed. 437 """ 438 xmlBuffer = StringIO() 439 serializer = Serializer(xmlBuffer, "UTF-8", indent=indent) 440 serializer.serialize(xmlDom) 441 xmlData = xmlBuffer.getvalue() 442 xmlBuffer.close() 443 return xmlData
444
445 -class Serializer(object):
446 447 """ 448 XML serializer class. 449 450 This is a customized serializer that I hacked together based on what I found 451 in the PyXML distribution. Basically, around release 2.7.0, the only reason 452 I still had around a dependency on PyXML was for the PrettyPrint 453 functionality, and that seemed pointless. So, I stripped the PrettyPrint 454 code out of PyXML and hacked bits of it off until it did just what I needed 455 and no more. 456 457 This code started out being called PrintVisitor, but I decided it makes more 458 sense just calling it a serializer. I've made nearly all of the methods 459 private, and I've added a new high-level serialize() method rather than 460 having clients call C{visit()}. 461 462 Anyway, as a consequence of my hacking with it, this can't quite be called a 463 complete XML serializer any more. I ripped out support for HTML and XHTML, 464 and there is also no longer any support for namespaces (which I took out 465 because this dragged along a lot of extra code, and Cedar Backup doesn't use 466 namespaces). However, everything else should pretty much work as expected. 467 468 @copyright: This code, prior to customization, was part of the PyXML 469 codebase, and before that was part of the 4DOM suite developed by 470 Fourthought, Inc. It its original form, it was Copyright (c) 2000 471 Fourthought Inc, USA; All Rights Reserved. 472 """ 473
474 - def __init__(self, stream=sys.stdout, encoding="UTF-8", indent=3):
475 """ 476 Initialize a serializer. 477 @param stream: Stream to write output to. 478 @param encoding: Output encoding. 479 @param indent: Number of spaces to indent, as an integer 480 """ 481 self.stream = stream 482 self.encoding = encoding 483 self._indent = indent * " " 484 self._depth = 0 485 self._inText = 0
486
487 - def serialize(self, xmlDom):
488 """ 489 Serialize the passed-in XML document. 490 @param xmlDom: XML DOM tree to serialize 491 @raise ValueError: If there's an unknown node type in the document. 492 """ 493 self._visit(xmlDom) 494 self.stream.write("\n")
495
496 - def _write(self, text):
497 obj = _encodeText(text, self.encoding) 498 self.stream.write(obj) 499 return
500
501 - def _tryIndent(self):
502 if not self._inText and self._indent: 503 self._write('\n' + self._indent*self._depth) 504 return
505
506 - def _visit(self, node):
507 """ 508 @raise ValueError: If there's an unknown node type in the document. 509 """ 510 if node.nodeType == Node.ELEMENT_NODE: 511 return self._visitElement(node) 512 513 elif node.nodeType == Node.ATTRIBUTE_NODE: 514 return self._visitAttr(node) 515 516 elif node.nodeType == Node.TEXT_NODE: 517 return self._visitText(node) 518 519 elif node.nodeType == Node.CDATA_SECTION_NODE: 520 return self._visitCDATASection(node) 521 522 elif node.nodeType == Node.ENTITY_REFERENCE_NODE: 523 return self._visitEntityReference(node) 524 525 elif node.nodeType == Node.ENTITY_NODE: 526 return self._visitEntity(node) 527 528 elif node.nodeType == Node.PROCESSING_INSTRUCTION_NODE: 529 return self._visitProcessingInstruction(node) 530 531 elif node.nodeType == Node.COMMENT_NODE: 532 return self._visitComment(node) 533 534 elif node.nodeType == Node.DOCUMENT_NODE: 535 return self._visitDocument(node) 536 537 elif node.nodeType == Node.DOCUMENT_TYPE_NODE: 538 return self._visitDocumentType(node) 539 540 elif node.nodeType == Node.DOCUMENT_FRAGMENT_NODE: 541 return self._visitDocumentFragment(node) 542 543 elif node.nodeType == Node.NOTATION_NODE: 544 return self._visitNotation(node) 545 546 # It has a node type, but we don't know how to handle it 547 raise ValueError("Unknown node type: %s" % repr(node))
548
549 - def _visitNodeList(self, node, exclude=None):
550 for curr in node: 551 curr is not exclude and self._visit(curr) 552 return
553
554 - def _visitNamedNodeMap(self, node):
555 for item in node.values(): 556 self._visit(item) 557 return
558
559 - def _visitAttr(self, node):
560 self._write(' ' + node.name) 561 value = node.value 562 text = _translateCDATA(value, self.encoding) 563 text, delimiter = _translateCDATAAttr(text) 564 self.stream.write("=%s%s%s" % (delimiter, text, delimiter)) 565 return
566
567 - def _visitProlog(self):
568 self._write("<?xml version='1.0' encoding='%s'?>" % (self.encoding or 'utf-8')) 569 self._inText = 0 570 return
571
572 - def _visitDocument(self, node):
573 self._visitProlog() 574 node.doctype and self._visitDocumentType(node.doctype) 575 self._visitNodeList(node.childNodes, exclude=node.doctype) 576 return
577
578 - def _visitDocumentFragment(self, node):
579 self._visitNodeList(node.childNodes) 580 return
581
582 - def _visitElement(self, node):
583 self._tryIndent() 584 self._write('<%s' % node.tagName) 585 for attr in node.attributes.values(): 586 self._visitAttr(attr) 587 if len(node.childNodes): 588 self._write('>') 589 self._depth = self._depth + 1 590 self._visitNodeList(node.childNodes) 591 self._depth = self._depth - 1 592 not (self._inText) and self._tryIndent() 593 self._write('</%s>' % node.tagName) 594 else: 595 self._write('/>') 596 self._inText = 0 597 return
598
599 - def _visitText(self, node):
600 text = node.data 601 if self._indent: 602 text.strip() 603 if text: 604 text = _translateCDATA(text, self.encoding) 605 self.stream.write(text) 606 self._inText = 1 607 return
608
609 - def _visitDocumentType(self, doctype):
610 if not doctype.systemId and not doctype.publicId: return 611 self._tryIndent() 612 self._write('<!DOCTYPE %s' % doctype.name) 613 if doctype.systemId and '"' in doctype.systemId: 614 system = "'%s'" % doctype.systemId 615 else: 616 system = '"%s"' % doctype.systemId 617 if doctype.publicId and '"' in doctype.publicId: 618 # We should probably throw an error 619 # Valid characters: <space> | <newline> | <linefeed> | 620 # [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] 621 public = "'%s'" % doctype.publicId 622 else: 623 public = '"%s"' % doctype.publicId 624 if doctype.publicId and doctype.systemId: 625 self._write(' PUBLIC %s %s' % (public, system)) 626 elif doctype.systemId: 627 self._write(' SYSTEM %s' % system) 628 if doctype.entities or doctype.notations: 629 self._write(' [') 630 self._depth = self._depth + 1 631 self._visitNamedNodeMap(doctype.entities) 632 self._visitNamedNodeMap(doctype.notations) 633 self._depth = self._depth - 1 634 self._tryIndent() 635 self._write(']>') 636 else: 637 self._write('>') 638 self._inText = 0 639 return
640
641 - def _visitEntity(self, node):
642 """Visited from a NamedNodeMap in DocumentType""" 643 self._tryIndent() 644 self._write('<!ENTITY %s' % (node.nodeName)) 645 node.publicId and self._write(' PUBLIC %s' % node.publicId) 646 node.systemId and self._write(' SYSTEM %s' % node.systemId) 647 node.notationName and self._write(' NDATA %s' % node.notationName) 648 self._write('>') 649 return
650
651 - def _visitNotation(self, node):
652 """Visited from a NamedNodeMap in DocumentType""" 653 self._tryIndent() 654 self._write('<!NOTATION %s' % node.nodeName) 655 node.publicId and self._write(' PUBLIC %s' % node.publicId) 656 node.systemId and self._write(' SYSTEM %s' % node.systemId) 657 self._write('>') 658 return
659
660 - def _visitCDATASection(self, node):
661 self._tryIndent() 662 self._write('<![CDATA[%s]]>' % (node.data)) 663 self._inText = 0 664 return
665
666 - def _visitComment(self, node):
667 self._tryIndent() 668 self._write('<!--%s-->' % (node.data)) 669 self._inText = 0 670 return
671
672 - def _visitEntityReference(self, node):
673 self._write('&%s;' % node.nodeName) 674 self._inText = 1 675 return
676
677 - def _visitProcessingInstruction(self, node):
678 self._tryIndent() 679 self._write('<?%s %s?>' % (node.target, node.data)) 680 self._inText = 0 681 return
682
683 -def _encodeText(text, encoding):
684 """ 685 @copyright: This code, prior to customization, was part of the PyXML 686 codebase, and before that was part of the 4DOM suite developed by 687 Fourthought, Inc. It its original form, it was attributed to Martin v. 688 Löwis and was Copyright (c) 2000 Fourthought Inc, USA; All Rights Reserved. 689 """ 690 encoder = codecs.lookup(encoding)[0] # encode,decode,reader,writer 691 if type(text) is not UnicodeType: 692 text = unicode(text, "utf-8") 693 return encoder(text)[0] # result,size
694
695 -def _translateCDATAAttr(characters):
696 """ 697 Handles normalization and some intelligence about quoting. 698 699 @copyright: This code, prior to customization, was part of the PyXML 700 codebase, and before that was part of the 4DOM suite developed by 701 Fourthought, Inc. It its original form, it was Copyright (c) 2000 702 Fourthought Inc, USA; All Rights Reserved. 703 """ 704 if not characters: 705 return '', "'" 706 if "'" in characters: 707 delimiter = '"' 708 new_chars = re.sub('"', '&quot;', characters) 709 else: 710 delimiter = "'" 711 new_chars = re.sub("'", '&apos;', characters) 712 #FIXME: There's more to normalization 713 #Convert attribute new-lines to character entity 714 # characters is possibly shorter than new_chars (no entities) 715 if "\n" in characters: 716 new_chars = re.sub('\n', '&#10;', new_chars) 717 return new_chars, delimiter
718 719 #Note: Unicode object only for now
720 -def _translateCDATA(characters, encoding='UTF-8', prev_chars='', markupSafe=0):
721 """ 722 @copyright: This code, prior to customization, was part of the PyXML 723 codebase, and before that was part of the 4DOM suite developed by 724 Fourthought, Inc. It its original form, it was Copyright (c) 2000 725 Fourthought Inc, USA; All Rights Reserved. 726 """ 727 CDATA_CHAR_PATTERN = re.compile('[&<]|]]>') 728 CHAR_TO_ENTITY = { '&': '&amp;', '<': '&lt;', ']]>': ']]&gt;', } 729 ILLEGAL_LOW_CHARS = '[\x01-\x08\x0B-\x0C\x0E-\x1F]' 730 ILLEGAL_HIGH_CHARS = '\xEF\xBF[\xBE\xBF]' 731 XML_ILLEGAL_CHAR_PATTERN = re.compile('%s|%s'%(ILLEGAL_LOW_CHARS, ILLEGAL_HIGH_CHARS)) 732 if not characters: 733 return '' 734 if not markupSafe: 735 if CDATA_CHAR_PATTERN.search(characters): 736 new_string = CDATA_CHAR_PATTERN.subn(lambda m, d=CHAR_TO_ENTITY: d[m.group()], characters)[0] 737 else: 738 new_string = characters 739 if prev_chars[-2:] == ']]' and characters[0] == '>': 740 new_string = '&gt;' + new_string[1:] 741 else: 742 new_string = characters 743 #Note: use decimal char entity rep because some browsers are broken 744 #FIXME: This will bomb for high characters. Should, for instance, detect 745 #The UTF-8 for 0xFFFE and put out &#xFFFE; 746 if XML_ILLEGAL_CHAR_PATTERN.search(new_string): 747 new_string = XML_ILLEGAL_CHAR_PATTERN.subn(lambda m: '&#%i;' % ord(m.group()), new_string)[0] 748 new_string = _encodeText(new_string, encoding) 749 return new_string
750