Group branches in an XML tree with Python on a common field - python

I have a list of order details in a CSV, and want to join all items from the lines together on one order.
Example date is:
Order|Line|Item|Price
123456789|1|IK123456|199.99
654987321|1|MASGE12385|29.95
654987321|2|KLEAN458792|9.99
654987321|3|LP12489|1959.95
I want everything to be listed in an XML with the root as the Order Number, Child as the Line Number and Sub-Children as Item and Price.
I want the output to look like:
<Order number = "123456789">
<Line number = "1">
<Item>IK123456</Item>
<Price>199.99</Price>
</Line>
</Order>
<Order number = "654987321">
<Line = "1">
<Item>MASGE12385</Item>
<Price>29.95</Price>
</Line>
<Line = "2">
<Item>KLEAN458792</Item>
<Price>9.99</Price>
</Line>
<Line = "3">
<Item>LP12489</Item>
<Price>1959.95</Price>
</Line>
</Order>
Here is my code:
import csv
import xml.etree.ElementTree as ET
file = 'C:/github.txt'
with open (file, 'r') as f:
reader = csv.reader(f, delimiter = '|')
header = next(reader)
order_num = reader[0]
root = ET.Element("Order") #BUILD A ROOT FOR THE XML TREE
root.set('number', order_num) #ADD ATTRIBUTE
for row in reader: #ITERATE THROUGH EACH ROW AND POPULATE DATA IN BRANCHES OF XML TREE
line = ET.SubElement(root, 'line', number= reader[1])
item = ET.SubElement(line, 'item code')
item.text = reader[2]
price = ET.SubElement(line, 'price')
price.text = reader[3]
tree = ET.ElementTree(root)
tree.write('C:/github.xml', encoding = 'utf-8', xml_declaration = True)
(NOTE: I moved something and got an error, but not sure what happened)

During loop, consider keeping a tracker on Number to conditionally decide to create an element and keep related underlying items together. Additionally, consider csv.DictReader to iterate csv rows as a dictionary which takes first row headers as keys. Finally, use the built-in minidom to pretty print output. Below will incorporate all XML items under the single <Orders> root:
import csv
import xml.etree.ElementTree as ET
import xml.dom.minidom as mn
file = 'C:/github.txt'
curr_order = None
with open (file, 'r') as f:
reader = csv.DictReader(f, delimiter = '|')
# BUILD A ROOT FOR THE XML TREE
root = ET.Element("Orders")
# ITERATE THROUGH EACH ROW AS DICTIONARY
for d in reader:
# CONDITIONALLY BUILD ORDER ELEMENT
if curr_order != str(d['Order']):
orderElem = ET.SubElement(root, "Order")
curr_order = str(d['Order'])
# CREATE DESCENDANTS OF ORDER
orderElem.set('number', str(d['Order']))
line = ET.SubElement(orderElem, 'line', number = str(d['Line']))
ET.SubElement(line, 'item_code').text = str(d['Item'])
ET.SubElement(line, 'price').text = str(d['Price'])
# PRETTY PRINT OUTPUT
dom = mn.parseString(ET.tostring(root, encoding = 'utf-8'))
with open('C:/github.xml', 'wb') as f:
f.write(dom.toprettyxml(indent=" ", encoding = 'utf-8'))
Online Demo

Related

TypeError: 'int' object is not subscriptable when reading from HDFS

I'm reading in a file from HDFS and I keep getting this error:
TypeError: 'int' object is not subscriptable
csv file:
CLAIM_NUM,BEN_ST,AGE,MEDICAL_ONLY_IND,TTL_MED_LOSS,TTL_IND_LOSS,TTL_MED_EXP,TTL_IND_EXP,BP_CD,NI_CD,legalrep,depression,cardiac,diabetes,hypertension,obesity,smoker,subabuse,arthritis,asthma,CPT_codes,D,P,NDC_codes
123456789,IL,99,1,2201.26,0,97.16,0,31,4,1,0,0,0,0,0,0,0,0,0,NA,8409~71941,NA,NA
987654321,AL,98,1,568.12,0,20.82,0,42,52,1,0,0,0,0,0,0,0,0,0,NA,7242~8472~E9273,NA,NA
My code:
with hdfs.open("/user/ras.csv") as f:
reader = f.read()
for i, row in enumerate(reader, start=1):
root = ET.Element('cbcalc')
icdNode = ET.SubElement(root, "icdcodes")
for code in row['D'].split('~'):
ET.SubElement(icdNode, "code").text = code
ET.SubElement(root, "clientid").text = row['CLAIM_NUM']
ET.SubElement(root, "state").text = row['BEN_ST']
ET.SubElement(root, "country").text = "US"
ET.SubElement(root, "age").text = row['AGE']
ET.SubElement(root, "jobclass").text = "1"
ET.SubElement(root, "fulloutput").text ="Y"
cfNode = ET.SubElement(root, "cfactors")
for k in ['legalrep', 'depression', 'diabetes',
'hypertension', 'obesity', 'smoker', 'subabuse']:
ET.SubElement(cfNode, k.lower()).text = str(row[k])
psNode = ET.SubElement(root, "prosummary")
psicdNode = ET.SubElement(psNode, "icd")
for code in row['P'].split('~'):
ET.SubElement(psNode, "code").text = code
psndcNode = ET.SubElement(psNode, "ndc")
for code in row['NDC_codes'].split('~'):
ET.SubElement(psNode, "code").text = code
cptNode = ET.SubElement(psNode, "cpt")
for code in row['CPT_codes'].split('~'):
ET.SubElement(cptNode, "code").text = code
ET.SubElement(psNode, "hcpcs")
doc = ET.tostring(root, method='xml', encoding="UTF-8")
response = requests.post(target_url, data=doc, headers=login_details)
response_data = json.loads(response.text)
if type(response_data)==dict and 'error' in response_data.keys():
error_results.append(response_data)
else:
api_results.append(response_data)
What do I need to change so that I can loop through the csv file and put the data into xml format to make my API call?
I've tested this code out in python and it seems to be working, but once I put my file HDFS it begins to fall over.
The problem is (probably; I don't have this library installed) that f.read() is returning a bytes object. If you iterate over it (using enumerate for example) you'll be inspecting ints (one per character of the file, depending on context), not any kind of structured "row" objects.
Additional processing is necessary before you can start the loop you want to write.
Something like this might do what you want:
import pydoop.hdfs as hdfs
from io import TextIOWrapper
from csv import DictReader
with hdfs.open("/user/ras.csv") as h,
TextIOWrapper(h, *unknown_settings) as w,
DictReader(w, *defaults_are_probably_ok) as dict_reader:
for row in dict_reader:
...

Appending to a new row when converting from xml to csv in python

Im having trouble writing each control_code along with its description on a new row rather than having it in the same row but different column (see image). Any ideas would be appreciated!
This is the XML file I'm Parsing
My Python File:
import xml.etree.ElementTree as ET
import csv
xmlFile='/Users/userName/Desktop/xmlFile.xml'
tree = ET.parse(xmlFile)
root = tree.getroot()
# open a file for writing
excelFile = open('/Users/userName/Desktop/csvTable.csv', 'w')
# creates the csv writer object / variable to write to csv
csvwriter = csv.writer(excelFile)
# list that contains the header
list_head = []
count = 0
for element in root.findall('control'):
list_nodes=[]
# address_list = []
if count == 0:
control_code='code'
list_head.append(control_code)
description = element.find('.//statement/description').tag
list_head.append(description)
csvwriter.writerow(list_head)
count = count + 1
# Control Description and Control Code Parsing
if element.find('statement'):
for controlStmt in element.findall('statement'):
value1 = controlStmt.find('description').text
if controlStmt.find('statement') is not None:
for part2 in controlStmt.findall('statement'):
value2=part2.find('description').text
if part2.find('statement') is not None:
for part3 in part2.findall('statement'):
value3=part3.find('description').text
control_code=part3.find('number').text
list_nodes.append(control_code)
description=value1+value2+value3
list_nodes.append(description)
else:
value3=''
control_code=part2.find('number').text
list_nodes.append(control_code)
description=value1+value2
list_nodes.append(description)
else:
value2=''
control_code=element.find('number').text
list_nodes.append(control_code)
description=value1
list_nodes.append(description)
else:
value1=''
csvwriter.writerow(list_nodes)
excelFile.close()
My Output
Reset list_nodes=[] when you want a new row. Right now you have:
for element in root.findall('control'):
list_nodes=[]
...
for controlStmt in element.findall('statement'):
...
csvwriter.writerow(list_nodes)
And it should be:
for element in root.findall('control'):
...
for controlStmt in element.findall('statement'):
list_nodes=[]
...
csvwriter.writerow(list_nodes)
Or more simply, skip the list.appends:
for element in root.findall('control'):
...
for controlStmt in element.findall('statement'):
...
csvwriter.writerow([control_code,description])

python XML to CSV Parse result non

i have this xml but having issue parsing it into csv, i tried simple print statement but still getting no value:
<?xml version="1.0" encoding="UTF-8"?>
<Document xmlns="urn:iso:std:iso:20022:tech:xsd:pain.008.001.02" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<CstmrDrctDbtInitn>
<GrpHdr>
<MsgId>1820</MsgId>
<CreDtTm>2016-05-17T11:56:12</CreDtTm>
<NbOfTxs>197</NbOfTxs>
<CtrlSum>136661.81</CtrlSum>
<InitgPty>
<Nm>GS Netherlands CDZ C.V.</Nm>
</InitgPty>
</GrpHdr>
</CstmrDrctDbtInitn>
<CstmrDrctDbtInitn>
<GrpHdr>
<CreDtTm>2016-05-18T10:34:51</CreDtTm>
<NbOfTxs>1</NbOfTxs>
<CtrlSum>758.99</CtrlSum>
<InitgPty>
<Nm>GS Netherlands CDZ C.V.</Nm>
</InitgPty></GrpHdr></CstmrDrctDbtInitn>
</Document>
and i want to iterate value for each node.
So far i have written code as below:
import xml.etree.ElementTree as ET
import csv
with open("D:\Python\Dave\\17_05_16_1820_DD201606B10_Base.xml") as myFile:
tree = ET.parse(myFile)
ns = {'d': 'urn:iso:std:iso:20022:tech:xsd:pain.008.001.02'}
# open a file for writing
Resident_data = open('Bank.csv', 'w')
# create the csv writer object
csvwriter = csv.writer(Resident_data)
resident_head = []
#write Header
MsgId = 'MsgId'
resident_head.append(MsgId)
CreDtTm = 'CreDtTm'
resident_head.append(CreDtTm)
NbOfTxs = 'NbOfTxs'
resident_head.append(NbOfTxs)
CtrlSum = 'CtrlSum'
resident_head.append(CtrlSum)
csvwriter.writerow(resident_head)
for member in tree.findall('.//d:Document/d:CstmrDrctDbtInitn/d:GrpHdr/d:MsgId', ns):
resident = []
#write values
MsgId = member.find('MsgId').text
resident.append(MsgId)
CreDtTm = member.find('CreDtTm').text
resident.append(CreDtTm)
NbOfTxs = member.find('NbOfTxs').text
resident.append(NbOfTxs)
CtrlSum = member.find('CtrlSum').text
resident.append(CtrlSum)
csvwriter.writerow(resident)
Resident_data.close()
I get no error and my Bank.csv has only header but no data please help

Python Error Using Argparse

I made a program that converts a csv file to a xml file using argparse. First it will read the csv file as an inputfile then converts it to a xml file. Here is my code:
import sys, argparse
import csv
import indent
from xml.etree.ElementTree import ElementTree, Element, SubElement, Comment, tostring
parser=argparse.ArgumentParser(description='Convert wordlist text files to various formats.', prog='Text Converter')
parser.add_argument('-v','--verbose',action='store_true',dest='verbose',help='Increases messages being printed to stdout')
parser.add_argument('-c','--csv',action='store_true',dest='readcsv',help='Reads CSV file and converts to XML file with same name')
parser.add_argument('-x','--xml',action='store_true',dest='toxml',help='Convert CSV to XML with different name')
parser.add_argument('-i','--inputfile',type=argparse.FileType('r'),dest='inputfile',help='Name of file to be imported',required=True)
parser.add_argument('-o','--outputfile',type=argparse.FileType('w'),dest='outputfile',help='Output file name')
args = parser.parse_args()
def main(argv):
reader = read_csv(args.inputfile)
if args.verbose:
print ('Verbose Selected')
if args.toxml:
if args.verbose:
print ('Convert to XML Selected')
generate_xml(reader, args.outputfile)
if args.readcsv:
if args.verbose:
print ('Reading CSV file')
if not (args.toxml or args.readcsv):
parser.error('No action requested')
return 1
def read_csv(inputfile):
return list(csv.reader(inputfile))
def generate_xml(reader,outfile):
root = Element('Solution')
root.set('version','1.0')
tree = ElementTree(root)
head = SubElement(root, 'DrillHoles')
head.set('total_holes', '238')
description = SubElement(head,'description')
current_group = None
i = 0
for row in reader:
if i > 0:
x1,y1,z1,x2,y2,z2,cost = row
if current_group is None or i != current_group.text:
current_group = SubElement(description, 'hole',{'hole_id':"%s"%i})
collar = SubElement (current_group, 'collar',{'':', '.join((x1,y1,z1))}),
toe = SubElement (current_group, 'toe',{'':', '.join((x2,y2,z2))})
cost = SubElement(current_group, 'cost',{'':cost})
i+=1
indent.indent(root)
tree.write(outfile)
if (__name__ == "__main__"):
sys.exit(main(sys.argv))
then on the command prompt i write, Argparse.py -i 1250_12.csv -o output.xml -x
where argparse is the program name and 1250_12.csv is csv file name and output.xml is what i want the output name to be and -x is an action converting csv to xml.
this program was working 10 min ago and now it gets an error saying:
x1,y1,z1,x2,y2,z2,cost = row
Value error: need more than 1 value to unpack
It appears that there are lines in your CSV that do not have exactly 7 columns. Perhaps there are some empty lines?
You could use a try..except to catch and handle the error:
def generate_xml(reader,outfile):
root = Element('Solution')
root.set('version','1.0')
tree = ElementTree(root)
head = SubElement(root, 'DrillHoles')
head.set('total_holes', '238')
description = SubElement(head,'description')
current_group = None
next(reader) # skip the first line
for row in reader:
try:
x1,y1,z1,x2,y2,z2,cost = row
except ValueError as err:
sys.stderr.write('{e}: {r!r}'.format(e=err, r=row))
if current_group is None or i != current_group.text:
current_group = SubElement(description, 'hole',{'hole_id':"%s"%i})
collar = SubElement (current_group, 'collar',{'':', '.join((x1,y1,z1))}),
toe = SubElement (current_group, 'toe',{'':', '.join((x2,y2,z2))})
cost = SubElement(current_group, 'cost',{'':cost})
indent.indent(root)
tree.write(outfile)
Also, if you set reader = csv.reader(inputfile) instead of making it a list, then your program will require less memory since reader will be an iterator instead of a list of rows.
Morever, to skip the first line with next(reader), reader must be an iterator, not a list. So for the above to work, also change:
reader = read_csv(args.inputfile)
to
reader = csv.reader(inputfile)

Write XML filename based of CSV cell Python

Trying to save output from this script to a file based on a cell within the csv. I am able to call the variable {file_root_name} to write into the xml file but not as a variable to write the file name. How can I use the variable file_root_name as a variable to generate a file name?
import csv
import sys
from xml.etree import ElementTree
from xml.etree.ElementTree import Element, SubElement, Comment, tostring
from xml.dom import minidom
def prettify(elem):
"""Return a pretty-printed XML string for the Element.
"""
rough_string = ElementTree.tostring(elem, 'utf-8')
reparsed = minidom.parseString(rough_string)
return reparsed.toprettyxml(indent=" ", encoding = 'utf-8')
doctype = '<!DOCTYPE smil PUBLIC "-//W3C//DTD SMIL 2.0//EN" "http://www.w3.org/2001/SMIL20/SMIL20.dtd">'
video_data = ((256, 336000),
(512, 592000),
(768, 848000),
(1128, 1208000))
with open(sys.argv[1], 'rU') as f:
reader = csv.DictReader(f)
for row in reader:
root = Element('smil')
root.set('xmlns', 'http://www.w3.org/2001/SMIL20/Language')
head = SubElement(root, 'head')
meta = SubElement(head, 'meta base="rtmp://cp23636.edgefcs.net/ondemand"')
body = SubElement(root, 'body')
switch_tag = ElementTree.SubElement(body, 'switch')
for suffix, bitrate in video_data:
attrs = {'src': ("mp4:soundcheck/{year}/{id}/{file_root_name}_{suffix}.mp4"
.format(suffix=str(suffix), **row)),
'system-bitrate': str(bitrate),
}
ElementTree.SubElement(switch_tag, 'video', attrs)
xml, doc = prettify(root).split('\n', 1)
output = open('file_root_name'+'.smil', 'w')
output.write(xml + doctype + doc)
output.close
I'm not sure that I follow, but if the line
attrs = {'src': ("mp4:soundcheck/{year}/{id}/{file_root_name}_{suffix}.mp4"
.format(suffix=str(suffix), **row)),
'system-bitrate': str(bitrate),
}
works then "file_root_name" must be a string key of the dictlike object row. The line
output = open('file_root_name'+'.smil', 'w')
actually combines the string 'file_root_name' with '.smil'. So you'd really want something like
output = open(row['file_root_name']+'.smil', 'w')
BTW, the line
output.close
won't do anything-- you want output.close() instead, or simply
with open(row['file_root_name']+'.smil', 'w') as output:
output.write(xml + doctype + doc)

Categories