Python Manipulating XML
A short script with descriptions for reading and manipulating xml. It seems like the python ElementTree module should be the easiest and best suited for XML manipulation. However I had a complicated XML structure with multiple namespaces and lxml handled it better. ElementTree could only handle one namespace with it;s register function. In addition lxml has pretty_print which might be useful. Although in my case when I do inserts pretty_print did not work even with the FAX fix for remove_blank_text.
import lxml.etree as ET f = open('config.xml','rb') ## http://lxml.de/FAQ.html#why-doesn-t-the-pretty-print-option-reformat-my-xml-output #parser = ET.XMLParser(remove_blank_text=True) #tree = ET.parse(f, parser) tree = ET.parse(f) #for element in tree.iter(): # element.tail = None root = tree.getroot() namespace="http://xmlns.oracle.com/weblogic/domain" servers = tree.findall('.//{%s}server' % namespace) ## Loop through the nodes we found for server in servers: print "New SERVER node detected:" for child in server: tag = child.tag val = child.text ## Remove any existing children if tag == "{http://xmlns.oracle.com/weblogic/domain}ssl": print "found server.ssl and will remove", server.remove(child) if tag == "{http://xmlns.oracle.com/weblogic/domain}log": print "found server.log and will remove", server.remove(child) if tag == "{http://xmlns.oracle.com/weblogic/domain}data-source": print "found server.data-source and will remove", server.remove(child) print tag, val ## Add the 3 children we want child = ET.Element("ssl") child.text='' server.insert(1,child) ## Check out why xsi:nil is not working. UTF??? ## gchild = ET.Element("hostname-verifier",attrib={'xsi:nil':'true'}) gchild = ET.Element("hostname-verifier",attrib={'xsi_nil':'true'}) gchild.text='' child.insert(1,gchild) gchild = ET.Element("hostname-verification-ignored") gchild.text='true' child.insert(2,gchild) gchild = ET.Element("client-certificate-enforced") gchild.text='true' child.insert(3,gchild) gchild = ET.Element("two-way-ssl-enabled") gchild.text='false' child.insert(3,gchild) child = ET.Element("log") child.text='' server.insert(2,child) gchild = ET.Element("rotation-type") gchild.text='byTime' child.insert(1,gchild) gchild = ET.Element("number-of-files-limited") gchild.text='true' child.insert(2,gchild) gchild = ET.Element("rotate-log-on-startup") gchild.text='true' child.insert(3,gchild) child = ET.Element("data-source") child.text='' server.insert(3,child) gchild = ET.Element("data-source-log-file") gchild.text='' child.insert(1,gchild) ggchild = ET.Element("rotation-type") ggchild.text='byTime' gchild.insert(1,ggchild) ggchild = ET.Element("number-of-files-limited") ggchild.text='true' gchild.insert(2,ggchild) ggchild = ET.Element("rotate-log-on-startup") ggchild.text='true' gchild.insert(3,ggchild) ## Check out why pretty_print is not making newlines in new tags #print(ET.tostring(tree, pretty_print=True)) tree.write("wc-out.xml", pretty_print=True)