import csv
def write_to_dictionaries_to_csv(csvWriter,lst_dic,lst_keys):
for dic in data:
lst = []
for key in lst_keys:
if key in dic:
value = dic[key]
lst.append(value)
return lst
data = [{'tow_reason': 'IL', 'tow_date': '2013-06-18'}, {'tow_date': '2014-09-25', 'tow_reason': 'GA'}]
with open("smallDataFileIWrote.csv", 'w') as f_out:
csv_w = csv.writer(f_out)
result = write_to_dictionaries_to_csv(csv_w, data, ['tow_reason','tow_date'])
csv_w.writerow(result)
Why is this code only writing:
IL,2013-06-18
to the file?
I want the file to have both:
IL, 2013-06-18
GA, 2014-09-25
written to the file what am I doing wrong?
You are reinitializing the lst every time in the loop and return inside the loop.
Move it out:
def write_to_dictionaries_to_csv(csvWriter,lst_dic,lst_keys):
lst = []
for dic in data:
row = []
for key in lst_keys:
if key in dic:
value = dic[key]
row.append(value)
lst.append(row)
return lst
For the writing:
result = write_to_dictionaries_to_csv(csv_w, data, ['tow_reason','tow_date'])
for row in result:
csv_w.writerow(row)
Final code:
import csv
def write_to_dictionaries_to_csv(lst_keys):
lst = []
for dic in data:
row = []
for key in lst_keys:
if key in dic:
value = dic[key]
row.append(value)
lst.append(row)
return lst
data = [{'tow_reason': 'IL', 'tow_date': '2013-06-18'},
{'tow_date': '2014-09-25', 'tow_reason': 'GA'}]
with open('smallDataFileIWrote.csv', 'w', newline='\n', encoding='utf-8') as f_out:
csv_w = csv.writer(f_out)
result = write_to_dictionaries_to_csv(['tow_reason', 'tow_date'])
for row in result:
csv_w.writerow(row)
P/s: Your code is quite ugly. Try removing unnecessary parts/variables and naming variables more meaningful.
Your lst is being empty, because it is inside loop. Try this
import csv
def write_to_dictionaries_to_csv(csvWriter,lst_dic,lst_keys):
lst = []
for dic in data:
for key in lst_keys:
if key in dic:
value = dic[key]
lst.append(value)
return lst
data = [{'tow_reason': 'IL', 'tow_date': '2013-06-18'}, {'tow_date': '2014-09-25', 'tow_reason': 'GA'}]
with open("smallDataFileIWrote.csv", 'w') as f_out:
csv_w = csv.writer(f_out)
result = write_to_dictionaries_to_csv(csv_w, data, ['tow_reason','tow_date'])
csv_w.writerow(result)
Related
I have a file in1.txt
info="0x0000b573" data="0x7" id="sp. PCU(Si)"
info="0x0000b573" data="0x00000007" id="HI all. SHa"
info="0x00010AC3" data="0x00000003" id="abc_16. PS"
info="0x00010ac3" data="0x00000045" id="hB2_RC/BS (Spr)"
info="0x205" data="0x00000010" id="cgc_15. PK"
info="0x205" data="0x10" id="cgsd_GH/BS (Scd)"
Expected output: out.txt
info="0x00010AC3" data="0x00000003" id="abc_16. PS"
info="0x00010ac3" data="0x00000045" id="hB2_RC/BS (Spr)"
I need only lines that have same info values and different data values to be written to out.txt.
but the current code removes all the line that have string data in it.
with open("in.txt", "r") as fin,open("out.txt", "w") as fout:
for line in fin:
if 'data' not in line:
fout.write(line.strip()+'\n')
what i need is for eg: line 1 and line 2 is having same info="0x0000b573" and data is "0x7" & "0x00000007" which is same then remove that line.
You can use regex
import re
s = '''info="0x0000b573" data="0x7" id="sp. PCU(Si)"
info="0x0000b573" data="0x00000007" id="HI all. SHa"
info="0x00010AC3" data="0x00000003" id="abc_16. PS"
info="0x00010ac3" data="0x00000045" id="hB2_RC/BS (Spr)"
info="0x205" data="0x00000010" id="cgc_15. PK"
info="0x205" data="0x10" id="cgsd_GH/BS (Scd)"'''
parsed_data = re.findall(r'info="([^"]+)" data="([^"]+)" id="[^"]+"', s, re.MULTILINE)
parsed_data = sorted([list(map(lambda x: int(x, 16), i)) + [index] for index,i in enumerate(parsed_data)])
row_numbers = [j for i in [[parsed_data[i][-1], parsed_data[i+1][-1]] for i in range(0,len(parsed_data),2) if parsed_data[i][1] != parsed_data[i+1][1]] for j in i]
final_output = []
for index,line in enumerate(s.split('\n')):
if index in row_numbers:
final_output.append(line)
final_out_text = '\n'.join(final_output)
print(final_out_text)
# info="0x00010AC3" data="0x00000003" id="abc_16. PS"
# info="0x00010ac3" data="0x00000045" id="hB2_RC/BS (Spr)"
You could try something like that too, I think
#!/usr/bin/python3
records = {}
items = []
info = []
data = []
with open("in.dat", "r") as fin:
for line in fin:
items=line.split(' ')
info = items[0].split('=')
data = items[1].split('=')
try:
key = info[1].strip('"').lower()
value = str(int(data[1].strip('"'), 16))
records[key][value] += 1
except KeyError:
try:
records[key][value] = 1
except KeyError:
records[key] = {value: 1}
out = dict()
for key in records:
for value in records[key]:
if records[key][value] == 1:
try:
out[key].append(value)
except KeyError:
out[key] = [value]
with open("out.dat", "w") as fout:
for key in out:
for value in out[key]:
fout.write(f"{key}={value}\n")
Something like this could work:
found_info_values = []
with open("in.txt", "r") as fin,open("out.txt", "w") as fout:
for line in fin:
info = line.split('"')[1]
if info not in found_info_values:
fout.write(line.strip()+'\n')
found_info_values += info
The client includes 3 rows at the bottom that contain totals for me to reconcile against in my program. Only problem is that my program is exhausting the input file with readlines() before it can do anything else. Is there a way to keep the file from being exhausted during my get_recon_total function call?
#!/usr/bin/env python
# pre_process.py
import csv
import sys
def main():
infile = sys.argv[1]
outfile = sys.argv[2]
with open(infile, 'rbU') as in_obj:
# Create reader object, get fieldnames for later on
reader, fieldnames = open_reader(in_obj)
nav_tot_cnt, nav_rec_cnt, nav_erec_cnt = get_recon_totals(in_obj)
print nav_tot_cnt, nav_rec_cnt, nav_erec_cnt
# This switches the dictionary to a sorted list... necessary??
reader_list = sorted(reader, key=lambda key: (key['PEOPLE_ID'],
key['DON_DATE']))
# Create a list to contain section header information
header_list = create_header_list(reader_list)
# Create dictionary that contains header list as the key,
# then all rows that match as a list of dictionaries.
master_dict = map_data(header_list, reader_list)
# Write data to processed file, create recon counts to compare
# to footer record
tot_cnt, rec_cnt, erec_cnt = write_data(master_dict, outfile, fieldnames)
print tot_cnt, rec_cnt, erec_cnt
def open_reader(file_obj):
'''
Uses DictReader from the csv module to take the first header line
as the fieldnames, then applies them to each element in the file.
Returns the DictReader object and the fieldnames being used (used
later when data is printed out with DictWriter.)
'''
reader = csv.DictReader(file_obj, delimiter=',')
return reader, reader.fieldnames
def create_header_list(in_obj):
p_id_list = []
for row in in_obj:
if (row['PEOPLE_ID'], row['DON_DATE']) not in p_id_list:
p_id_list.append((row['PEOPLE_ID'], row['DON_DATE']))
return p_id_list
def map_data(header_list, data_obj):
master_dict = {}
client_section_list = []
for element in header_list:
for row in data_obj:
if (row['PEOPLE_ID'], row['DON_DATE']) == element:
client_section_list.append(row)
element = list(element)
element_list = [client_section_list[0]['DEDUCT_AMT'],
client_section_list[0]['ND_AMT'],
client_section_list[0]['DEDUCT_YTD'],
client_section_list[0]['NONDEDUCT_YTD']
]
try:
element_list.append((float(client_section_list[0]['DEDUCT_YTD']) +
float(client_section_list[0]['NONDEDUCT_YTD'])
))
except ValueError:
pass
element.extend(element_list)
element = tuple(element)
master_dict[element] = client_section_list
client_section_list = []
return master_dict
def write_data(in_obj, outfile, in_fieldnames):
with open(outfile, 'wb') as writer_outfile:
writer = csv.writer(writer_outfile, delimiter=',')
dict_writer = csv.DictWriter(writer_outfile,
fieldnames=in_fieldnames,
extrasaction='ignore')
tot_cnt = 0
rec_cnt = 0
email_cnt = 0
for k, v in in_obj.iteritems():
writer_outfile.write(' -01- ')
writer.writerow(k)
rec_cnt += 1
for i, e in enumerate(v):
if v[i]['INT_CODE_EX0006'] != '' or v[i]['INT_CODE_EX0028'] != '':
email_cnt += 1
writer_outfile.write(' -02- ')
dict_writer.writerow(e)
tot_cnt += 1
return tot_cnt, rec_cnt, email_cnt
def get_recon_totals(in_obj):
print in_obj
client_tot_cnt = 0
client_rec_cnt = 0
client_erec_cnt = 0
for line in in_obj.readlines():
line = line.split(',')
if line[0] == 'T' and line[1] == 'Total Amount':
print 'Total Amount found.'
client_tot_cnt = line[2]
elif line[0] == 'T' and line[1] == 'Receipt Count':
print 'Receipt Count found.'
client_rec_cnt = line[2]
elif line[0] == 'T' and line[1] == 'Email Receipt Count':
print 'E-Receipt Count Found.'
client_erec_cnt = line[2]
return client_tot_cnt, client_rec_cnt, client_erec_cnt
if __name__ == '__main__':
main()
If your file is not very large, you can convert reader generator to a list of dcitonary , by calling list() on reader and then use it in your code instead of trying to read from the file directly.
Example -
def main():
infile = sys.argv[1]
outfile = sys.argv[2]
with open(infile, 'rbU') as in_obj:
# Create reader object, get fieldnames for later on
reader, fieldnames = open_reader(in_obj)
reader_list = list(reader)
nav_tot_cnt, nav_rec_cnt, nav_erec_cnt = get_recon_totals(reader_list)
print nav_tot_cnt, nav_rec_cnt, nav_erec_cnt
# This switches the dictionary to a sorted list... necessary??
reader_list = sorted(reader_list, key=lambda key: (key['PEOPLE_ID'],
key['DON_DATE']))
.
.
def get_recon_totals(reader_list):
print in_obj
client_tot_cnt = 0
client_rec_cnt = 0
client_erec_cnt = 0
for line in reader_list: #line here is a dict
if line[<fieldname for first column>] == 'T' and line[<fieldname for secondcolumn>] == 'Total Amount':
print 'Total Amount found.'
client_tot_cnt = line[<fieldname for third column>]
.
. #continued like above
.
return client_tot_cnt, client_rec_cnt, client_erec_cnt
There is a csv file, say A.csv, having content:
Place,Hotel,Food,Fare
Norway,Regal,NonVeg,5000
Poland,Jenny,Italiano,6000
Norway,Suzane,Vegeterian,4000
Norway,Regal,NonVeg,5000
I have to parse this csv and obtain an output by passing arguments in command prompt.
Example 1:
mycode.py Place
Desired output is:
Place,Fare
Norway,14000
Poland,6000
Example 2:
mycode.py Place Hotel
Desired output is:
Place,Hotel,Fare
Norway,Regal,10000
Poland,Jenny,6000
Norway,Suzane,4000
So it is clear from the above example that no matter what you pass as argument it gives you the sum of the Fare header for the common ones.
Below is my code and I am able to pass arguments and get an output, but I am stuck in sum of Fare. Can any one help me with this.
import sys
import csv
import collections
d = collections.defaultdict(list)
Data = []
Result = []
Final = []
Argvs = []
argv_len = len(sys.argv)
index = 0
input = ''
file = open('A.csv', 'rb')
try:
reader = csv.reader(file)
for row in reader:
Data.append(row)
for x in range(1, argv_len):
Argvs.append(sys.argv[x])
Argvs.append('Fare')
for input in Argvs:
for y in range(0, len(Data[0])):
if(input == Data[0][y]):
for z in range(1, len(Data)):
Result.append(Data[z][y])
break
Final.append(Result)
Result = []
New = []
NewFinal = []
for x in range(0, len(Final[0])):
for y in range(0, len(Final)):
New.append(Final[y][x])
NewFinal.append(New)
New = []
out = {}
for a in NewFinal:
out.setdefault(a[0],[]).append(int(a[-1]))
with open("output.csv", "wb") as csv_file:
writer = csv.writer(csv_file, dialect='excel', delimiter=',')
writer.writerow(Argvs)
for k,v in out.iteritems():
writer.writerow((k,sum(v)))
except Exception,e:
print str(e)
finally:
file.close()
I edit the code and tried to group it. Now I am able to get the aggregate of the Fare but not the desired output.
So when I am passing:
mycode.py Place Hotel
Instead of:
Place,Hotel,Fare
Norway,Regal,10000
Poland,Jenny,6000
Norway,Suzane,4000
I am getting:
Place,Hotel,Fare
Norway,14000
Poland,6000
Finally i managed to get my desired output.
Below i am sharing the final code. \
import sys
import csv
Data = []
Result = []
Final = []
Argvs = []
argv_len = len(sys.argv)
index = 0
input = ''
file = open('A.csv', 'rb')
try:
reader = csv.reader(file)
for row in reader:
Data.append(row)
for x in range(1, argv_len):
Argvs.append(sys.argv[x])
Argvs.append('Fare')
for input in Argvs:
for y in range(0, len(Data[0])):
if(input == Data[0][y]):
for z in range(1, len(Data)):
Result.append(Data[z][y])
break
Final.append(Result)
Result = []
New = []
NewFinal = []
for x in range(0, len(Final[0])):
for y in range(0, len(Final)):
New.append(Final[y][x])
NewFinal.append(New)
New = []
out = {}
for a in NewFinal:
count_val = a[-1]
del a[-1]
key_val = ','.join(a)
out.setdefault(key_val.strip('"'),[]).append(int(count_val))
with open("output.csv", "wb") as csv_file:
writer = csv.writer(csv_file, delimiter=',',quotechar=' ')
writer.writerow(Argvs)
for k,v in out.iteritems():
writer.writerow((k,sum(v)))
except Exception,e:
print str(e)
finally:
file.close()
I want to return a dictionary that a file contains. What I have is this code:
def read_report(filename):
new_report = {}
input_filename = open(filename)
for line in input_filename:
lines = line[:-1]
new_report.append(lines)
input_filename.close()
return new_report
It says I can't append to a dictionary. So how would I go with adding lines from the file into the dictionary? Let's say my filename is this:
shorts: a, b, c, d
longs: a, b, c, d
mosts: a
count: 11
avglen: 1.0
a 5
b 3
c 2
d 1
I'm assuming the last lines of your files (the ones that don't contain :) are to be ignored.
from collections import defaultdict
d = defaultdict(list)
with open('somefile.txt') as f:
for line in f:
if ':' in line:
key, val = line.split(':')
d[key.strip()] += val.rstrip().split(',')
def read_line(filename):
list = []
new_report = {}
file_name = open(filename)
for i in file_name:
list.append(i[:-1])
for i in range(len(list)):
new_report[i] = list[i]
file_name.close()
return new_report
if you rewrite your input file to have uniform lines like the first and the second, you could try this:
EDIT: modified code to support also lines with space separator instead of colon (:)
def read_report(filename):
new_report = {}
f = open(filename)
for line in f:
if line.count(':') == 1:
key, value = line.split(':')
else:
key, value = line.split(' ')
new_report[key] = value.split(',')
f.close()
return new_report
I have the below code:
datedict = defaultdict(set)
with open('d:/info.csv', 'r') as csvfile:
filereader = csv.reader(csvfile, 'excel')
#passing the header
read_header = False
start_date=date(year=2009,month=1,day=1)
#print((seen_date - start_date).days)
tdic = {}
for row in filereader:
if not read_header:
read_header = True
continue
# reading the rest rows
name,id,firstseen = row[0],row[1],row[3]
try:
seen_date = datetime.datetime.strptime(firstseen, '%d/%m/%Y').date()
deltadays = (seen_date-start_date).days
deltaweeks = deltadays/7 + 1
key = name +'-'+id
currentvalue = tdic.get(key, [])
currentvalue.append(deltaweeks)
tdic[key] = currentvalue
except ValueError:
print('Date value error')
pass
tdic = dict((name, max(weeks) - min(weeks) + 1) for name, weeks in tdic.iteritems())
pprint.pprint(tdic)
in which I get the below result:
{'Mali-2': 20,
'Gooki-3': 6,
'Piata-4': 6,
'Goerge-5': 4,
'Samoo-6': 1,
'Marria-7': 2}
Now I would like to write and print the three items, name,id and weeks as separate columns in an excel file. Anyone knows how it is possible?
>>> with open('out.csv', 'w') as f:
w = csv.writer(f)
for k, v in tdic.iteritems():
name, id_ = k.split('-')
weeks = v
w.writerow([name, id_, weeks])
>>> with open('out.csv') as f:
print f.read()
Piata,4,6
Mali,2,20
Goerge,5,4
Gooki,3,6
Samoo,6,1
Marria,7,2
I however don't like the way you have done this, here are some suggestions for your code:
key = name +'-'+id
Instead of using string manipulation to create a key, use a tuple:
key = (name, id)
Change this line:
tdic = dict((name, max(weeks) - min(weeks) + 1) for name, weeks in tdic.iteritems())
to just say
tdic = dict((key, max(weeks) - min(weeks) + 1) for key, weeks in tdic.iteritems())
since now it is a key of (name, id_) we should reflect that (it's a minor thing but important)
Then the above code would simply be
>>> with open('out.csv', 'w') as f:
w = csv.writer(f)
for (name, id_), weeks in tdic.iteritems():
w.writerow([name, id_, weeks])
You can define a dict of dict like this:
a_dict = {key: { anotherKey: value}}
Or if the dict already exists:
a_dict[key] = {anotherkey: value}
print a_dict[key][anotherkey]