changing attribute value in xml via lxml python - python

here is my xml:
<request><table attributeA="50" attributeB="1"></table>........</request>
how do I update attributeA's value, to have something like attributeA="456"
<request><table attributeA="456" attributeB="1"></table>........</request>

Use etree and xpath :
>>> from lxml import etree
>>> xml = '<request><table attributeA="50" attributeB="1"></table></request>'
>>> root = etree.fromstring(xml)
>>> for el in root.xpath("//table[#attributeA]"):
... el.attrib['attributeA'] = "456"
...
>>> print etree.tostring(root)
<request><table attributeA="456" attributeB="1"/></request>

Related

how to search a word in xml file and print it in python

i want to search a specific word(which is entered by user) in .xml file. This is my xml file.
<?xml version="1.0" encoding="UTF-8"?>
<words>
<entry>
<word>John</word>
<pron>()</pron>
<gram>[Noun]</gram>
<poem></poem>
<meanings>
<meaning>name</meaning>
</meanings>
</entry>
</words>
here is my Code
import nltk
from nltk.tokenize import word_tokenize
import os
import xml.etree.ElementTree as etree
sen = input("Enter Your sentence - ")
print(sen)
print("\n")
print(word_tokenize(sen)[0])
tree = etree.parse('roman.xml')
node=etree.fromstring(tree)
#node=etree.fromstring('<a><word>waya</word><gram>[Noun]</gram>
<meaning>talking</meaning></a>')
s = node.findtext(word_tokenize(sen)[0])
print(s)
i have tried everything but still its giving me error
a bytes-like object is required, not 'ElementTree'
i really don't know how to solve it.
the error happens because you are passing an elementtree object to the fromstring () methods. Do like this:
>>> import os
>>> import xml.etree.ElementTree as etree
>>> a = etree.parse('a.xml')
>>> a
<xml.etree.ElementTree.ElementTree object at 0x10fcabeb8>
>>> b = a.getroot()
>>> b
<Element 'words' at 0x10fb21f48>
>>> b[0][0].text
'John'
Use find() and findall() methods to search.
for more info, check lib: https://docs.python.org/3/library/xml.etree.elementtree.html
Simple example:
test.xml
<?xml version="1.0" encoding="UTF-8"?>
<words>
<word value="John"></word>
<word value="Mike"></word>
<word value="Scott"></word>
</words>
example.py
root = ET.parse("test.xml")
>>> search = root.findall(".//word/.[#value='John']")
>>> search
[<Element 'word' at 0x10be9c868>]
>>> search[0].attrib
{'value': 'John'}
>>> search[0].tag
'word'

Insert after x node minidom xml python

I'm appending a node to an xml, but i want it to insert before some tags, could that be possible?
newNode = xmldoc.createElement("tag2")
txt = xmldoc.createTextNode("value2")
newNode.appendChild(txt)
n.appendChild(newNode)
This is my XML. When I append the child, it add after UniMed, I want it to insert after Cantidad and before UniMed. (Simplified version of my XML) "Item" can have more childs, and i do not know how many.
<ns0:Item>
<ns0:Cantidad>1</ns0:Cantidad>
<ns0:UniMed>L</ns0:UniMed>
</ns0:Item>
I think i can solve it by reading al the childs of Item, erase them, and then add them in the order I want.
But i dont think its the best idea...
Any ideas?
EDITED
SOLUTION
itemChildNodes = n.childNodes
n.insertBefore(newNode, itemChildNodes[itemChildNodes.length-2])
Use insertBefore method to insert New created tag.
Demo:
>>> from xml.dom import minidom
>>> content = """
... <xml>
... <Item>
... <Cantidad>1</Cantidad>
... <UniMed>L</UniMed>
... </Item>
... </xml>
... """
>>> root = minidom.parseString(content)
>>> insert_tag = root.createElement("tag2")
>>> htext = root.createTextNode('test')
>>> insert_tag.appendChild(htext)
<DOM Text node "'test'">
>>>
>>> items = root.getElementsByTagName("Item")
>>> item = items[0]
>>> item_chidren = item.childNodes
>>> item.insertBefore(insert_tag, item_chidren[2])
<DOM Element: tag2 at 0xb700da8c>
>>> root.toxml()
u'<?xml version="1.0" ?><xml>\n\t<Item>\n\t <Cantidad>1</Cantidad><tag2>test</tag2>\n\t <UniMed>L</UniMed>\n\t</Item>\n</xml>'
>>>

lxml get full ext of element

I have the following xml:
<text>test<br/><br/>All you need to know about British birds.<br/></text>
I am wishing to set the whole content of the tag <text> to 11111
I'm using pythong and lxml and the following are my codes:
import nltk
import lxml.etree as le
current_file = '/Users/noor/Dropbox/apps/APIofLife/src/clear_description/bird.rdf'
f = open(current_file,'r')
doc=le.parse(f)
for elem in doc.xpath("//text"):
elem.text = "11111"
f.close()
f = open(current_file,'w')
f.write(le.tostring(doc))
f.close()
However, after running the above codes, my results are:
<text>11111<br/><br/>All you need to know about British birds.<br/></text>
I want to know why the whole content of the tag <text> has not been changed to 11111
According to lxml.etree._Element documentation, text property correspond to the text before the first subelement.
You need to delete sub elements:
>>> import lxml.etree as le
>>>
>>> root = le.fromstring('''<text>test<br/><br/>
... All you need to know about British birds.
... <br/></text>''')
>>> for elem in root.xpath("//text"):
... elem.text = '1111'
... del elem[:] # <----------
...
>>> le.tostring(root)
'<text>1111</text>'

xml file parsing in python

xml file :
<global>
<rtmp>
<fcsapp>
<password>
<key>hello123</key>
<key>check123</key>
</password>
</fcsapp>
</rtmp>
</global>
python code : To obtain all the key tag values.
hello123
check123
using xml.etree.ElementTree
for streams in xmlRoot.iter('global'):
xpath = "/rtmp/fcsapp/password"
tag = "key"
for child in streams.findall(xpath):
resultlist.append(child.find(tag).text)
print resultlist
The output obtained is [hello123], but I want it to display both ([hello123, check123])
How do I obtain this?
Using lxml and cssselect I would do it like this:
>>> from lxml.html import fromstring
>>> doc = fromstring(open("foo.xml", "r").read())
>>> doc.cssselect("password key")
[<Element key at 0x7f77a6786cb0>, <Element key at 0x7f77a6786d70>]
>>> [e.text for e in doc.cssselect("password key")]
['hello123 \n ', 'check123 \n ']
With lxml and xpath You can do it in the following way:
from lxml import etree
xml = """
<global>
<rtmp>
<fcsapp>
<password>
<key>hello123</key>
<key>check123</key>
</password>
</fcsapp>
</rtmp>
</global>
"""
tree = etree.fromstring(xml)
result = tree.xpath('//password/key/text()')
print result # ['hello123', 'check123']
try beautifulsoup package "https://pypi.python.org/pypi/BeautifulSoup"
using xml.etree.ElementTree
for streams in xmlRoot.iter('global'):
xpath = "/rtmp/fcsapp/password"
tag = "key"
for child in streams.iter(tag):
resultlist.append(child.text)
print resultlist
have to iter over the "key" tag in for loop to obtain the desired result. The above code solves the problem.

Parsing wsdl (retrieve namespaces from the definitions)using an Element Tree

I am trying to parse a wsdl file using ElementTree, As part of this I"d like to retrieve all the namespaces from a given wsdl definitions element.
For instance in the below snippet , I am trying to retrieve all the namespaces in the definitions tag
<?xml version="1.0"?>
<definitions name="DateService" targetNamespace="http://dev-b.handel-dev.local:8080/DateService.wsdl" xmlns:tns="http://dev-b.handel-dev.local:8080/DateService.wsdl"
xmlns="http://schemas.xmlsoap.org/wsdl/" xmlns:soap="http://schemas.xmlsoap.org/wsdl/soap/" xmlns:myType="DateType_NS" xmlns:xsd="http://www.w3.org/2001/XMLSchema"
xmlns:wsdl="http://schemas.xmlsoap.org/wsdl/">
My code looks like this
import xml.etree.ElementTree as ET
xml_file='<path_to_my_wsdl>'
tree = xml.parse(xml_file)
rootElement = tree.getroot()
print (rootElement.tag) #{http://schemas.xmlsoap.org/wsdl/}definitions
print(rootElement.attrib) #targetNamespace="http://dev-b..../DateService.wsdl"
As I understand, in ElementTree the namespace URI is combined with the local name of the element .How can I retrieve all the namespace entries from the definitions element?
Appreciate your help on this
P.S: I am new (very!) to python
>>> import xml.etree.ElementTree as etree
>>> from StringIO import StringIO
>>>
>>> s = """<?xml version="1.0"?>
... <definitions
... name="DateService"
... targetNamespace="http://dev-b.handel-dev.local:8080/DateService.wsdl"
... xmlns:tns="http://dev-b.handel-dev.local:8080/DateService.wsdl"
... xmlns="http://schemas.xmlsoap.org/wsdl/"
... xmlns:soap="http://schemas.xmlsoap.org/wsdl/soap/"
... xmlns:myType="DateType_NS"
... xmlns:xsd="http://www.w3.org/2001/XMLSchema"
... xmlns:wsdl="http://schemas.xmlsoap.org/wsdl/">
... </definitions>"""
>>> file_ = StringIO(s)
>>> namespaces = []
>>> for event, elem in etree.iterparse(file_, events=('start-ns',)):
... print elem
...
(u'tns', 'http://dev-b.handel-dev.local:8080/DateService.wsdl')
('', 'http://schemas.xmlsoap.org/wsdl/')
(u'soap', 'http://schemas.xmlsoap.org/wsdl/soap/')
(u'myType', 'DateType_NS')
(u'xsd', 'http://www.w3.org/2001/XMLSchema')
(u'wsdl', 'http://schemas.xmlsoap.org/wsdl/')
Inspired by the ElementTree documentation
You can use lxml.
from lxml import etree
tree = etree.parse(file)
root = tree.getroot()
namespaces = root.nsmap
see https://stackoverflow.com/a/26807636/5375693

Categories