Python Error Using Argparse - python

I made a program that converts a csv file to a xml file using argparse. First it will read the csv file as an inputfile then converts it to a xml file. Here is my code:
import sys, argparse
import csv
import indent
from xml.etree.ElementTree import ElementTree, Element, SubElement, Comment, tostring
parser=argparse.ArgumentParser(description='Convert wordlist text files to various formats.', prog='Text Converter')
parser.add_argument('-v','--verbose',action='store_true',dest='verbose',help='Increases messages being printed to stdout')
parser.add_argument('-c','--csv',action='store_true',dest='readcsv',help='Reads CSV file and converts to XML file with same name')
parser.add_argument('-x','--xml',action='store_true',dest='toxml',help='Convert CSV to XML with different name')
parser.add_argument('-i','--inputfile',type=argparse.FileType('r'),dest='inputfile',help='Name of file to be imported',required=True)
parser.add_argument('-o','--outputfile',type=argparse.FileType('w'),dest='outputfile',help='Output file name')
args = parser.parse_args()
def main(argv):
reader = read_csv(args.inputfile)
if args.verbose:
print ('Verbose Selected')
if args.toxml:
if args.verbose:
print ('Convert to XML Selected')
generate_xml(reader, args.outputfile)
if args.readcsv:
if args.verbose:
print ('Reading CSV file')
if not (args.toxml or args.readcsv):
parser.error('No action requested')
return 1
def read_csv(inputfile):
return list(csv.reader(inputfile))
def generate_xml(reader,outfile):
root = Element('Solution')
root.set('version','1.0')
tree = ElementTree(root)
head = SubElement(root, 'DrillHoles')
head.set('total_holes', '238')
description = SubElement(head,'description')
current_group = None
i = 0
for row in reader:
if i > 0:
x1,y1,z1,x2,y2,z2,cost = row
if current_group is None or i != current_group.text:
current_group = SubElement(description, 'hole',{'hole_id':"%s"%i})
collar = SubElement (current_group, 'collar',{'':', '.join((x1,y1,z1))}),
toe = SubElement (current_group, 'toe',{'':', '.join((x2,y2,z2))})
cost = SubElement(current_group, 'cost',{'':cost})
i+=1
indent.indent(root)
tree.write(outfile)
if (__name__ == "__main__"):
sys.exit(main(sys.argv))
then on the command prompt i write, Argparse.py -i 1250_12.csv -o output.xml -x
where argparse is the program name and 1250_12.csv is csv file name and output.xml is what i want the output name to be and -x is an action converting csv to xml.
this program was working 10 min ago and now it gets an error saying:
x1,y1,z1,x2,y2,z2,cost = row
Value error: need more than 1 value to unpack

It appears that there are lines in your CSV that do not have exactly 7 columns. Perhaps there are some empty lines?
You could use a try..except to catch and handle the error:
def generate_xml(reader,outfile):
root = Element('Solution')
root.set('version','1.0')
tree = ElementTree(root)
head = SubElement(root, 'DrillHoles')
head.set('total_holes', '238')
description = SubElement(head,'description')
current_group = None
next(reader) # skip the first line
for row in reader:
try:
x1,y1,z1,x2,y2,z2,cost = row
except ValueError as err:
sys.stderr.write('{e}: {r!r}'.format(e=err, r=row))
if current_group is None or i != current_group.text:
current_group = SubElement(description, 'hole',{'hole_id':"%s"%i})
collar = SubElement (current_group, 'collar',{'':', '.join((x1,y1,z1))}),
toe = SubElement (current_group, 'toe',{'':', '.join((x2,y2,z2))})
cost = SubElement(current_group, 'cost',{'':cost})
indent.indent(root)
tree.write(outfile)
Also, if you set reader = csv.reader(inputfile) instead of making it a list, then your program will require less memory since reader will be an iterator instead of a list of rows.
Morever, to skip the first line with next(reader), reader must be an iterator, not a list. So for the above to work, also change:
reader = read_csv(args.inputfile)
to
reader = csv.reader(inputfile)

Related

How to print strings from a text file,separated by new line in Python

I'm new in Python and i'm challenging myself by making an online library management with prompt for the 1st phase.I'm stacked in search function.I have found how to print a user's input,but i can't find how to print and the following data.I want to search a book by name.If book's name is in the text,i want to print the details of the book,like author,isbn etc.
Here is the following code i have made:
def search():
search_book = input('Search a book: ')
with open('library.txt', mode='r', encoding='utf-8') as f:
index = 0
for line in f:
index += 1
if search_book in line:
print(f'{search_book} is in line {index}')
for details in range(index,index+5):
print(line[details])
And this is the text file's data:
FIRST
ME
9781234
2000
Science
SECOND
YOU
9791234
1980
Literature
It is separated by new line.As example a user input the name FIRST and the result will be:
FIRST
ME
9781234
2000
Science
There are two file options we can consider,
Csv file - Instead of individual readline, you could use one line for one book entry.
# ---------test.csv -------------
# BookName, ItemCode, Price
# Book1, 00012, 14.55
# Book2, 00232, 55.12
# -----End Csv-------------------
import csv
def read_csv(filename:str):
file_contents = None
# reading csv file
with open(filename, 'r') as csvfile:
file_contents = csv.reader(csvfile)
return file_contents
def search(file_contents, book_name:str):
if not file_contents:
return None
for line in file_contents:
if book_name in line:
return line
if __name__ == '__main__':
file_contents = read_csv('test.csv')
line = search(file_contents, 'ME')
print(line if line else 'No Hit Found')
Json - This is much better option than csv file
import json
def read_json(filename:str) -> dict:
with open(filename) as json_file:
all_books = json.load(json_file)
return all_books
def search(all_books:dict, book_name:str):
for book_id, book_details in all_books.items():
if book_details['Name'] == book_name:
return book_details
return None
if __name__ == '__main__':
all_books = read_json('books.json')
book = search(all_books, 'YOU')
print(book if book else 'Not hit found')
If your file contents can't change, then I will go with #tripleee suggestion above. Good luck.
You are reading a line at a time, and looping over the first line's contents. At this point in the program, there are not yet any additional lines. But a fix is relatively easy:
def search():
search_book = input('Search a book: ')
with open('library.txt', mode='r', encoding='utf-8') as f:
index = 0
print_this_many = 0
for line in f:
index += 1
if search_book in line:
print(f'{search_book} is in line {index}')
print_this_many = 5
if print_this_many:
print(line, end='')
print_this_many -= 1
We don't have the next lines in memory yet, but we can remember how many of them to print as we go ahead and read more of them. The print_this_many variable is used for this: When we see the title we want, we set it to 5 (to specify that this and the next four lines should be printed). Now, each time we read a new line, we check if this variable is positive; if it is, we print the line and decrement the variable. When it reaches zero, we will no longer print the following lines. This allows us to "remember" across iterations of the for loop which reads each new line whether we are in the middle of printing something.
A much better solution is to read the database into memory once, and organize the lines into a dictionary, for example.
def read_lib(filename):
library = dict()
with open(filename) as lib:
title = None
info = []
for line in lib:
line = line.rstrip('\n')
if title is None:
title = line
elif line == '':
if title and info:
library[title] = info
title = None
else:
info.append(line)
def search(title, library):
if title in library:
return library[title]
else:
return None
def main():
my_library = read_lib('library.txt')
while True:
sought = input('Search a book: ')
found = search(sought, my_library)
if found:
print('\n'.join(found))
else:
print('Sorry, no such title in library')

How to Exit Python Code when XML File is empty (XML to CSV conversion)

if the XML file is empty i.e. no root element, how to handle in python? When there are XML records, below Python code works fine but whenever XML file empty, conversion process files. I am looking for an exit instead of failing the process.
XML:
<?xml version = '1.0' encoding = 'UTF-8'?>
<ns2:exportEmpData xmlns:ns2="http://webservice.example.com/"/>
Python Code:
import xml
import csv
import xml.etree.ElementTree as ET
tree = ET.parse('C:/emp/emplist.xml')
root = tree.getroot()
# open a file for writing
Emp_data = open('C:/emp/emplist.csv', 'wb')
# create the csv writer object
csvwriter = csv.writer(Emp_data)
emp_head = []
count = 0
for member in root.findall('emplist'):
emp_nodes = []
if count == 0:
empId = member.find('empId').tag
emp_head.append(empId)
fullName = member.find('fullName').tag
emp_head.append(fullName)
currentAddress = member.find('currentAddress').tag
emp_head.append(currentAddress)
csvwriter.writerow(emp_head)
count = count + 1
empId = member.find('empId').text
emp_nodes.append(empId)
fullName = member.find('fullName').text
emp_nodes.append(fullName)
currentAddress = member.find('currentAddress').attrib.get('city')
emp_nodes.append(currentAddress)
csvwriter.writerow(emp_nodes)
Emp_data.close()

Understanding how to quote XML string in order to serialize using Python's ElementTree

My specs:
Python 3.4.3
Windows 7
IDE is Jupyter Notebooks
What I have referenced:
how-to-properly-escape-single-and-double-quotes
python-escaping-strings-for-use-in-xml
escaping-characters-in-a-xml-file-with-python
Here is the data and script, respectively, below (I have tried variations on serializing Column 'E' using both Sax and ElementTree):
Data
A,B,C,D,E,F,G,H,I,J
"3","8","1","<Request TransactionID="3" RequestType="FOO"><InstitutionISO /><CallID>23</CallID><MemberID>12</MemberID><MemberPassword /><RequestData><AccountNumber>2</AccountNumber><AccountSuffix>85</AccountSuffix><AccountType>S</AccountType><MPIAcctType>Checking</MPIAcctType><TransactionCount>10</TransactionCount></RequestData></Request>","<Response TransactionID="2" RequestType="HoldInquiry"><ShareList>0000',0001,0070,</ShareList></Response>","1967-12-25 22:18:13.471000","2005-12-25 22:18:13.768000","2","70","0"
Script
#!/usr/bin/python
# -*- coding: utf-8 -*-
import os.path
import sys
import csv
from io import StringIO
import xml.etree.cElementTree as ElementTree
from xml.etree.ElementTree import XMLParser
import xml
import xml.sax
from xml.sax import ContentHandler
class MyHandler(xml.sax.handler.ContentHandler):
def __init__(self):
self._charBuffer = []
self._result = []
def _getCharacterData(self):
data = ''.join(self._charBuffer).strip()
self._charBuffer = []
return data.strip() #remove strip() if whitespace is important
def parse(self, f):
xml.sax.parse(f, self)
return self._result
def characters(self, data):
self._charBuffer.append(data)
def startElement(self, name, attrs):
if name == 'Response':
self._result.append({})
def endElement(self, name):
if not name == 'Response': self._result[-1][name] = self._getCharacterData()
def read_data(path):
with open(path, 'rU', encoding='utf-8') as data:
reader = csv.DictReader(data, delimiter =',', quotechar="'", skipinitialspace=True)
for row in reader:
yield row
if __name__ == "__main__":
empty = ''
Response = 'sample.csv'
for idx, row in enumerate(read_data(Response)):
if idx > 10: break
data = row['E']
print(data) # The before
data = data[1:-1]
data = ""'{}'"".format(data)
print(data) # Sanity check
# data = '<Response TransactionID="2" RequestType="HoldInquiry"><ShareList>0000',0001,0070,</ShareList></Response>'
try:
root = ElementTree.XML(data)
# print(root)
except StopIteration:
raise
pass
# xmlstring = StringIO(data)
# print(xmlstring)
# Handler = MyHandler().parse(xmlstring)
Specifically, due to the quoting in the CSV file (which is beyond my control), I have had to resort to slicing the string (line 51) and then formatting it (line 52).
However the print out from the above attempt is as follows:
"<Response TransactionID="2" RequestType="HoldInquiry"><ShareList>0000'
<Response TransactionID="2" RequestType="HoldInquiry"><ShareList>0000
File "<string>", line unknown
ParseError: no element found: line 1, column 69
Interestingly - if I assign the variable "data" (as in line 54) I receive this:
File "<ipython-input-80-7357c9272b92>", line 56
data = '<Response TransactionID="2" RequestType="HoldInquiry"><ShareList>0000',0001,0070,</ShareList></Response>'
^
SyntaxError: invalid token
I seek feedback and information on how to address utilizing the most Pythonic means to do so. Ideally, is there a method that can leverage ElementTree. Thank you, in advance, for your feedback and guidance.
It seems that You have badly formatted (well, badly quoted) csv data.
If csv file is beyond Your control I suggest not using csv reader to read them,
instead - if You can rely on each field being properly quoted - split them yourself.
with open(Response, 'rU', encoding='utf-8') as data:
separated = data.read().split('","')
try:
x = ElementTree.XML(separated[3])
print(x)
xml.etree.ElementTree.dump(x)
y = ElementTree.XML(separated[4])
xml.etree.ElementTree.dump(y)
except Exception as e:
print(e)
outputs
<Element 'Request' at 0xb6d973b0>
<Request RequestType="FOO" TransactionID="3"><InstitutionISO /><CallID>23</CallID><MemberID>12</MemberID><MemberPassword /><RequestData><AccountNumber>2</AccountNumber><AccountSuffix>85</AccountSuffix><AccountType>S</AccountType><MPIAcctType>Checking</MPIAcctType><TransactionCount>10</TransactionCount></RequestData></Request>
<Response RequestType="HoldInquiry" TransactionID="2"><ShareList>0000',0001,0070,</ShareList></Response>

Python, XML, Iterating through tags produces NONE value

I currently have a program in Python that opens an XML file, iterates through the tags and prints out the contents to a text file.
import xml.etree.ElementTree as etree
import xml.etree.ElementTree as ET
import tkFileDialog
#asks for the location of the file to open
file = tkFileDialog.askopenfilename()
#parse in the file
tree = ET.parse(file)
root = tree.getroot()
#asks for the location/name of court file to save as
file = tkFileDialog.asksaveasfilename()
f = open(file, 'w')
f.write("\t\t\tReport: \n\n")
#this prints out all tags and text in the right order
iter_ = tree.getiterator()
for elem in iter_:
#print (elem.tag, elem.text)
f.write("\t")
f.write(elem.tag)
f.write(": \t\t")
f.write(str(elem.text))
f.write("\n")
f.close()
The problem is that some of the tags in the XML file don't have any user input, so when the program iterates through, it places NONE in the space, instead of leaving it blank.
Text File output:
Crime Report:
crime_report: None
case_number: 090
victim_details: None
victim_first_name: j
victim_surname: j
officer_details: None
officer_name: j
officer_ID_number: j
witness_details: None
witness_first_name: j
witness_address: j
Current XML file:
<crime_report>
<case_number caseno="unique identifier associated to the case">090
<victim_details>
<victim_first_name>j</victim_first_name>
<victim_surname>j</victim_surname>
</victim_details>
<officer_details>
<officer_name>j</officer_name>
<officer_ID_number>j</officer_ID_number>
</officer_details>
<witness_details>
<witness_first_name>j</witness_first_name>
<witness_address>j</witness_address>
</witness_details>
</case_number>
</crime_report>
I can't figure out how to iterate through, so that these will just be blank, instead of displaying NONE?

Trying to iterate through CSV files in a directory and grab a particular cell then post results to a single csv

I am trying to iterate through several CSV files in a directory and grab a particular cell (same cell location) from each CSV file (cell location found when opened in Excel) and then post all similar cells in a single CSV or xls file, one after the other.
I have writen the code below (with some researched help) but I am just iterating over the first csv file in my list and printing the same value each time, dependant on the number of CSV files in my list. Could anybody point me in the right direction?
Here's my poor attempt!
import xlwt
import xlrd
import csv
import glob
import os
files = ['1_IQ_QTA.csv','2_IQ_QTA.csv','3_IQ_QTA.csv','4_IQ_QTA.csv']
n = 0
row = 0
filename = ('outputList.csv', 'a')
fname = files[n]
workbookr = xlrd.open_workbook(fname)
sheetr = workbookr.sheet_by_index(0)
workbookw = xlwt.Workbook()
sheetw = workbookw.add_sheet('test')
while n<len(files):
fname = files[n]
workbookr = xlrd.open_workbook(fname[n])
data = [sheetr.cell_value(12, 1) for col in range(sheetr.ncols)]
for index, value in enumerate(data):
sheetw.write(row, index, value)
workbookw.save('outputList.csv')
row = row +1
n = n+1
workbookw.save('outputList.csv')
My code is still a bit messy, I may have leftover code from my various attempts!
Thanks
MikG
Assuming you are just trying to make a CSV file of the same cells from each file. So if you had 4 files, your output file will have 4 entries.
files = ['1_IQ_QTA.csv','2_IQ_QTA.csv','3_IQ_QTA.csv','4_IQ_QTA.csv']
n = 0
row = 0
outputfile = open('outputList.csv', 'w')
cellrow = 12 #collect the cell (12, 1) from each file and put it in the output list
cellcolumn = 1
while n<len(files):
fname = files[n]
currentfile = open(fname,'r')
for i in range (cellrow):
currentrow = currentfile.readline()
# print currentrow #for testing
columncnt=0
currentcell = ''
openquote = False
for char in currentrow:
if char == '"' and not openquote:
openquote = True
elif char == '"' and openquote:
openquote = False
elif char == ',' and not openquote:
columncnt+=1
if columncnt == cellcolumn:
cellvalue = currentcell
# print cellvalue #for testing
currentcell=''
else:
currentcell += char
outputfile.write (cellvalue + ',')
currentfile.close()
n += 1
outputfile.close()
It seemed to me that since you already had a CSV it would be easier to deal with as a regular file and parse through to find the right information, plus nothing to import. Happy coding!
I think you have an error at this line in the while loop:
workbookr = xlrd.open_workbook(fname[n])
must be:
workbookr = xlrd.open_workbook(fname)
otherwise your workbookr remains as you set it before outside the loop:
fname = files[n]
workbookr = xlrd.open_workbook(fname)
which is the first file in your list.
Since they are just csv files, there is no need for the excel libraries.
#!/usr/bin/env python
import argparse, csv
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='merge csv files on field', version='%(prog)s 1.0')
parser.add_argument('infile', nargs='+', type=str, help='list of input files')
parser.add_argument('--col', type=int, default=0, help='Column to grab')
parser.add_argument('--row', type=int, default=0, help='Row to grab')
parser.add_argument('--out', type=str, default='temp.csv', help='name of output file')
args = parser.parse_args()
data = []
for fname in args.infile:
with open(fname, 'rb') as df:
reader = csv.reader(df)
for index, line in enumerate(reader):
if index == args.row:
data.push(line[args.column])
del reader
writer = csv.writer(open(args.out, "wb"), dialect='excel')
writer.writerows(data)
del writer

Categories