Read an xlsx file and write xml - python

I need your help please, I'm trying to write python code, where in input I take an xlsx I read the various fields of the cells and then generate an xml. I had trouble reading dates, so I used pandas, so I could use dataframes, now you give her a handful, I also read the test data, but I am not succeeding in writing this data within XML; can you please help me?
import pandas as pd
import datetime
import json
import datetime
from xml.etree import ElementTree as ET
df = pd.read_excel('parser.xlsx') #leggo il file excel
df['data autorizzazio'] = pd.to_datetime(df['data autorizzazio'])
#df['data movimentazio'] = pd.to_datetime(df['data menter code hereovimentazio'])
#df.head()
#df.info()
ET.register_namespace("CBIPaymentRequest","http://www.w3.org/2001/XMLSchema-instance")
root = ET.Element("{http://www.w3.org/2001/XMLSchema-instance}CBIPaymentRequest")
root1 = ET.SubElement(root,"GrpHdr")
#root2 = ET.SubElement(root,"PmtInf")
MsgId = ET.SubElement(root1,'MsgId')
MsgId = df.loc[0].values[1]#setto il valore della cella interessata
MsgId.text = df['data autorizzazio'].values[1]
#MsgId = MsgId
print(MsgId)
Prova = ET.SubElement(root1,'PROVA')
Prova = df.loc[0].values[5]
Prova1 = df.__setitem__(Prova,'Prova')
#Prova.text = df['Saluto5'].values[1]
print(Prova)
tree = ET.ElementTree(root)
tree.write("pandas_output_test_1.xml")

at the moment I generate this xml(unfortunately empty)
<CBIPaymentRequest:CBIPaymentRequest xmlns:CBIPaymentRequest="http://www.w3.org/2001/XMLSchema-instance">
<GrpHdr>
<MsgId/>
<PROVA/>
</GrpHdr>
</CBIPaymentRequest:CBIPaymentRequest>
I wish it was populated with data I read in xlsx file

Related

Display XML to CSV results in table format

I have the python code script. It is pulling the XML data and copying to CSV. I need the data to be shown in column format. The script is reporting the information as it is shown on the first results. I need to have it report in table format.
Example 1
EventID,TargetUserSid,TargetUserName,TargetDomainName,TargetLogonId
4634, S-1-5-21-2795111079-3225111112-3329435632-1610,grant.larson,AFC,0x3642df8
Example 2
This is the picture for the results of the code below.
EventID 4634
TargetUserSid S-1-5-21-2795111079-3225111112-3329435632-1610
TargetUserName grant.larson
TargetDomainName AFC
TargetLogonId 0x3642df8
LogonType 3
from xml.etree import ElementTree as ET
import pandas as pd
import csv
tree = ET.parse("SecurityLog-rev2.xml")
root = tree.getroot()
ns = "{http://schemas.microsoft.com/win/2004/08/events/event}"
data = []
for eventID in root.findall(".//"):
if eventID.tag == f"{ns}System":
for e_id in eventID.iter():
if e_id.tag == f'{ns}EventID':
row = "EventID", e_id.text
data.append(row)
if eventID.tag == f"{ns}EventData":
for attr in eventID.iter():
if attr.tag == f'{ns}Data':
#print(attr.attrib)
row = attr.get('Name'), attr.text
data.append(row)
df = pd.DataFrame.from_dict(data, orient='columns')
df.to_csv('event_log.csv', index=False, header=False)
print(df)

Adding a single occurrence xml tag using lxml

Based on a couple of other examples I've found here, I've created a script that creates an xml file from a csv input using lxml.etree and lxml.ebuilder. It gives me almost what I need - the one thing I'm struggling with is that I need to also include a single-occurrence tag at the top of the data which will contain a static value.
Here's my sample data:
ACTION|INV_ACCT_CLASS|EXT_INV_ID|WAREHOUSE_ID|NAME|CNTRY_CD|PHONE|ADDR_STR1|ADDR_STR2|CITY|ST|ZIP|ADD_KEY_NUM
add|2|AAA_00005|1001213|Company 1|US|9995555555|1313 Mockingbird Lane||New York|NY|10001|44433322
add|2|BBB_00008|1004312|Company 2|US|43255511110|Some other address||Stamford|CT|44112|11122233
My code so far:
import lxml.etree
from lxml.builder import E
import csv
with open("filename.csv") as csvfile:
results = E.paiInv(*(
E.invrec(
E.action(row['ACTION']),
E.investor(
E.inv_account_class(row['INV_ACCOUNT_CLASS']),
E.ext_inv_id(row['EXT_INV_ID']),
E.warehouse_id(row['WAREHOUSE_ID']),
E.name(row['NAME']),
E.cntry_cd(row['CNTRY_CD']),
E.phone(row['PHONE']),
E.addr_str1(row['ADDRESS_STR1']),
E.addr_str2(row['ADDRESS_STR2']),
E.city(row['CITY']),
E.st(row['ST']),
E.zip(row['ZIP']),
E.add_key_num(row['ADD_KEY_NUM'])
)
) for row in csv.DictReader(csvfile, delimiter = '|'))
)
lxml.etree.ElementTree(results).write("OutputFile.xml")
Here's my output so far:
<paiInv>
<invrec>
<action>add</action>
<investor>
<inv_account_class>2</inv_account_class>
<ext_inv_id>AAA_00005</ext_inv_id>
<warehouse_id>1001213</warehouse_id>
<name>Company 1</name>
<cntry_cd>US</cntry_cd>
<phone>9995555555</phone>
<addr_str1>1313 Mockingbird Lane</addr_str1>
<addr_str2></addr_str2>
<city>New York</city>
<st>NY</st>
<zip>10001</zip>
<add_key_num>44433322</add_key_num>
</investor>
</invrec>
<invrec>
<action>add</action>
<investor>
<inv_account_class>2</inv_account_class>
<ext_inv_id>BBB_00008</ext_inv_id>
<warehouse_id>1004312</warehouse_id>
<name>Company 2</name>
<cntry_cd>US</cntry_cd>
<phone>43255511110</phone>
<addr_str1>Some other address</addr_str1>
<addr_str2></addr_str2>
<city>Stamford</city>
<st>NB</st>
<zip>44112</zip>
<add_key_num>11122233</add_key_num>
</investor>
</invrec>
</paiInv>
And the output I need includes one extra (single occurrence) tag, named request_id, occurring at the top of the data, like this:
<paiInv>
<request_id>req44</request_id>
<invrec>
<action>add</action>
<investor>
<inv_account_class>2</inv_account_class>
<ext_inv_id>AAA_00005</ext_inv_id>
<warehouse_id>1001213</warehouse_id>
<name>Company 1</name>
<cntry_cd>US</cntry_cd>
<phone>9995555555</phone>
<addr_str1>1313 Mockingbird Lane</addr_str1>
<addr_str2></addr_str2>
<city>New York</city>
<st>NY</st>
<zip>10001</zip>
<add_key_num>44433322</add_key_num>
</investor>
</invrec>
<invrec>
<action>add</action>
<investor>
<inv_account_class>2</inv_account_class>
<ext_inv_id>BBB_00008</ext_inv_id>
<warehouse_id>1004312</warehouse_id>
<name>Company 2</name>
<cntry_cd>US</cntry_cd>
<phone>43255511110</phone>
<addr_str1>Some other address</addr_str1>
<addr_str2></addr_str2>
<city>Stamford</city>
<st>NB</st>
<zip>44112</zip>
<add_key_num>11122233</add_key_num>
</investor>
</invrec>
</paiInv>
Any suggestions will be appreciated. I haven't been able to get anything other than syntax errors with my attempts to get the extra tag so far.
Before you save the file, try something like:
doc = lxml.etree.ElementTree(results)
ins = lxml.etree.fromstring('<request_id>req44</request_id>')
ins.tail = "\n"
dest = doc.xpath('/paiInv')[0]
dest.insert(0,ins)
print(lxml.etree.tostring(doc).decode())
The output should be what you are looking for.

Extract data from xml to Excel (Python 2.7 )

i'm attempting to extract some data from a XML file and create a Excel with the information.
XML File:
<UniversalTransaction>
<TransactionInfo>
<DataContext>
<DataSourceCollection>
<DataSource>
<Type>AccountingInvoice</Type>
<Key>AR INV 00001006</Key>
</DataSource>
</DataSourceCollection>
<Company>
<Code>DCL</Code>
<Country>
<Code>CL</Code>
<Name>Chile</Name>
</Country>
<Name>Your Chile Corp</Name>
</Company>
...etc
Then I made this Code in python 2.7
import xml.etree.ElementTree as ET
import xlwt
from datetime import datetime
tree = ET.parse('ar.xml')
root = tree.getroot()
#extract xml
invoice = root.findall('DataSource')
arinv = root.find('Key').text
country = root.findall('Company')
ctry = root.find('Name').text
wb = xlwt.Workbook()
ws = wb.add_sheet('A Test Sheet')
ws.write(0, 0, arinv)
ws.write(0, 1, ctry)
wb.save('example2.xls')
But I get this error:
arinv = root.find('Key').text
'NoneType' object has no attribute 'text'
And i guess it will be the same with
ctry = root.find('Name').text
Also when I change the "extract xml" part of the code to this
for ar in root.findall('DataContext'):
nro = []
ctry = []
inv = ar.find('Key').text
nro.append(inv)
country = ar.find('Name').text
ctry.append(country)
i get the following error:
ws.write(0, 0, arinv)
name 'arinv' is not defined
then again, I guess its the same with "ctry"
Windows 10, python 2.7
I'll apreciate any help, thanks.
It is better to ask shortened questions - without yours bunch of context code. Probably you find a solution yourself when you carefully try to split out exact short question.
According to the docs, Element.find basically finds only in direct children. You need to use some XPath (look about XPath expressions in the docs) like
root.findall('.//Key')[0].text
(given with assumption the Key always exists, contains text and unique within a document; i.e. without validation)

Python XML modifying by ElementTree destroys the XML structure

I am using Python V 3.5.1 on windows framework in order to modify a text inside , the modification works great but after saving the tree all the empty tags get destroyed as the following example:
<HOSTNAME></HOSTNAME> Is being changed to <HOSTNAME />
child with a text between the tags looks good:
<HOSTNAME>tnas2</HOSTNAME> is being changed to
<HOSTNAME>tnas2</HOSTNAME> which is the same as the source.
The source XML file is:
<ROOT>
<DeletedName>
<VERIFY_DEST_SIZE>Y</VERIFY_DEST_SIZE>
<VERIFY_BYTES>Y</VERIFY_BYTES>
<TIMESTAMP>XXXXXXXXXDeletedXXXXXXXXXX</TIMESTAMP>
<EM_USERS>XXXXXXXXXDeletedXXXXXXXXXX</EM_USERS>
<EM_GROUPS></EM_GROUPS>
<LOCAL>
<HOSTNAME></HOSTNAME>
<PORT></PORT>
<USERNAME>XXXXXXXXXDeletedXXXXXXXXXX</USERNAME>
<PASSWORD>XXXXXXXXXDeletedXXXXXXXXXX</PASSWORD>
<HOME_DIR></HOME_DIR>
<OS_TYPE>Windows</OS_TYPE>
</LOCAL>
<REMOTE>
<HOSTNAME>DeletedHostName</HOSTNAME>
<PORT>22</PORT>
<USERNAME>XXXXXXXXXDeletedXXXXXXXXXX</USERNAME>
<PASSWORD>XXXXXXXXXDeletedXXXXXXXXXX</PASSWORD>
<HOME_DIR>XXXXXXXXXDeletedXXXXXXXXXX</HOME_DIR>
<OS_TYPE>Unix</OS_TYPE>
<CHAR_SET>UTF-8</CHAR_SET>
<SFTP>Y</SFTP>
<ENCRYPTION>Blowfish</ENCRYPTION>
<COMPRESSION>N</COMPRESSION>
</REMOTE>
</DeletedName>
</ROOT>
the code is:
import os
import xml.etree.ElementTree as ET
from shutil import copyfile
import datetime
def AddAuthUserToAccountsFile(AccountsFile,RemoteMachine,UserToAdd):
today = datetime.date.today()
today = str(today)
print(today)
BackUpAccountsFile = AccountsFile + "-" + today
try:
tree = ET.parse(AccountsFile)
except:
pass
try:
copyfile(AccountsFile,BackUpAccountsFile)
except:
pass
root = tree.getroot()
UsersTags = tree.findall('.//EM_USERS')
for UsersList in UsersTags:
Users = UsersList.text
Users = UsersList.text = Users.replace("||","|")
if UserToAdd not in Users:
print("The Users were : ",Users, "--->> Adding ",UserToAdd)
UsersList.text = Users + UserToAdd +"|"
tree.write(AccountsFile)
Appreciate for any help to pass this strange scenario.
Thanks,
Miki
OK, i found the solution -
just adding method = "html" to the tree.write line it keeps it as needed.
tree.write(AccountsFile,method = 'html')
Thanks.

Python ElementTree how do send the value of a variable to xml output

I want to update the xml file with the current date in lastrun date attribute.
The code below results in + str(mprocessdate) + and I want it to say 2015-04-16.
What's wrong with my code? Why do I get that string instead of the actual date?
company1.xml
<corp>
<lastrun date="20150123" />
<company id="18888802223">
<name>South Plantation</name>
<P_DNIS>99603</P_DNIS>
<Tracking_Phone>+18888802223</Tracking_Phone>
<Account>South Plantation</Account>
<AppendValue> Coupon</AppendValue>
<InsertCoupon>Y</InsertCoupon>
</company>
</corp>
Script
import datetime
from xml.etree import ElementTree as ET
mprocessdate = datetime.date.today()
print (mprocessdate)
tree = ET.parse("company1.xml")
mlastrun = tree.find('lastrun')
mlastrun.set('date', '+ str(mprocessdate) + ')
tree.write('company.xml')
Leave off the + and just put in the variable name.
import datetime
from xml.etree import ElementTree as ET
mprocessdate = datetime.date.today()
print (mprocessdate)
tree = ET.parse("company.xml")
mlastrun = tree.find('lastrun')
mlastrun.set('date', str(mprocessdate))
tree.write('company.xml')

Categories