I am looking to append a dictionary I have to a CSV file were I already have a header line
and if a value doesn't exist I want to write '-999':
SDict ={T1:'A',T2:'B',T4:'D')
where CSV file has header of
T1,T2,T3,T4,T5
7,8,9,10,11
and the expected results are
T1,T2,T3,T4,T5
7,8,9,10,11
A,B,-999,D,-999
I am trying to do so with the code:
import sys
import os
import csv
def GetFileHeader(Fpath):
i=10
ResFile=open (Fpath, 'r+')
HeaderDict={}
r=csv.reader(ResFile)
HeaderList = r.next()
for Header in HeaderList:
HeaderDict[Header]=i+1
print HeaderDict
ResFile.close()
return HeaderDict
Fpath= r'Z:\temp\assaf\S2TTP\S2T_TP\modules\results\Y124\res.csv'
Header= GetFileHeader(Fpath)
with open(Fpath,'rb') as fin:
dr = csv.DictReader(fin, dialect='excel')
print dr
print dr.fieldnames
# dr.fieldnames contains values from first row of `f`.
with open(Fpath,'ab+') as fou:
dw = csv.DictWriter(fou, dialect='excel', fieldnames=dr.fieldnames)
fieldnames=dr.fieldnames
for K in fieldnames:
dw.writerow(Header[k])
I think you can simply do:
import csv
SDict = {'T1': 'A', 'T2': 'B', 'T4': 'D'}
with open('file.csv', 'r+b') as f:
header = next(csv.reader(f))
dict_writer = csv.DictWriter(f, header, -999)
dict_writer.writerow(SDict)
This is assuming you're on Python 2.X. Also, be wary of files which don't end in a newline, or you could end up with a row like 7,8,9,10,11A,B,-999,D,-999.
Related
This is one file result.csv:
M11251TH1230
M11543TH4292
M11435TDS144
This is another file sample.csv:
M11435TDS144,STB#1,Router#1
M11543TH4292,STB#2,Router#1
M11509TD9937,STB#3,Router#1
M11543TH4258,STB#4,Router#1
Can I write a Python program to compare both the files and if line in result.csv matches with the first word in the line in sample.csv, then append 1 else append 0 at every line in sample.csv?
import pandas as pd
d1 = pd.read_csv("1.csv",names=["Type"])
d2 = pd.read_csv("2.csv",names=["Type","Col2","Col3"])
d2["Index"] = 0
for x in d1["Type"] :
d2["Index"][d2["Type"] == x] = 1
d2.to_csv("3.csv",header=False)
Considering "1.csv" and "2.csv" are your csv input files and "3.csv" is the result you needed
The solution using csv.reader and csv.writer (csv module):
import csv
newLines = []
# change the file path to the actual one
with open('./data/result.csv', newline='\n') as csvfile:
data = csv.reader(csvfile)
items = [''.join(line) for line in data]
with open('./data/sample.csv', newline='\n') as csvfile:
data = list(csv.reader(csvfile))
for line in data:
line.append(1 if line[0] in items else 0)
newLines.append(line)
with open('./data/sample.csv', 'w', newline='\n') as csvfile:
writer = csv.writer(csvfile)
writer.writerows(newLines)
The sample.csv contents:
M11435TDS144,STB#1,Router#1,1
M11543TH4292,STB#2,Router#1,1
M11509TD9937,STB#3,Router#1,0
M11543TH4258,STB#4,Router#1,0
With only one column, I wonder why you made it as a result.csv. If it is not going to have any more columns, a simple file read operation would suffice. Along with converting the data from result.csv to dictionary will help in quick run as well.
result_file = "result.csv"
sample_file = "sample.csv"
with open(result_file) as fp:
result_data = fp.read()
result_dict = dict.fromkeys(result_data.split("\n"))
"""
You can change the above logic, in case you have very few fields on csv like this:
result_data = fp.readlines()
result_dict = {}
for result in result_data:
key, other_field = result.split(",", 1)
result_dict[key] = other_field.strip()
"""
#Since sample.csv is a real csv, using csv reader and writer
with open(sample_file, "rb") as fp:
sample_data = csv.reader(fp)
output_data = []
for data in sample_data:
output_data.append("%s,%d" % (data, data[0] in result_dict))
with open(sample_file, "wb") as fp:
data_writer = csv.writer(fp)
data_writer.writerows(output_data)
The following snippet of code will work for you
import csv
with open('result.csv', 'rb') as f:
reader = csv.reader(f)
result_list = []
for row in reader:
result_list.extend(row)
with open('sample.csv', 'rb') as f:
reader = csv.reader(f)
sample_list = []
for row in reader:
if row[0] in result_list:
sample_list.append(row + [1])
else:
sample_list.append(row + [0]
with open('sample.csv', 'wb') as f:
writer = csv.writer(f)
writer.writerows(sample_list)
I am trying to merge two csv files with a common id column and write the merge to a new file. I have tried the following but it is giving me an error -
import csv
from collections import OrderedDict
filenames = "stops.csv", "stops2.csv"
data = OrderedDict()
fieldnames = []
for filename in filenames:
with open(filename, "rb") as fp: # python 2
reader = csv.DictReader(fp)
fieldnames.extend(reader.fieldnames)
for row in reader:
data.setdefault(row["stop_id"], {}).update(row)
fieldnames = list(OrderedDict.fromkeys(fieldnames))
with open("merged.csv", "wb") as fp:
writer = csv.writer(fp)
writer.writerow(fieldnames)
for row in data.itervalues():
writer.writerow([row.get(field, '') for field in fieldnames])
Both files have the "stop_id" column but I'm getting this error back -
KeyError: 'stop_id'
Any help would be much appreciated.
Thanks
Here is an example using pandas
import sys
from StringIO import StringIO
import pandas as pd
TESTDATA=StringIO("""DOB;First;Last
2016-07-26;John;smith
2016-07-27;Mathew;George
2016-07-28;Aryan;Singh
2016-07-29;Ella;Gayau
""")
list1 = pd.read_csv(TESTDATA, sep=";")
TESTDATA=StringIO("""Date of Birth;Patient First Name;Patient Last Name
2016-07-26;John;smith
2016-07-27;Mathew;XXX
2016-07-28;Aryan;Singh
2016-07-20;Ella;Gayau
""")
list2 = pd.read_csv(TESTDATA, sep=";")
print list2
print list1
common = pd.merge(list1, list2, how='left', left_on=['Last', 'First', 'DOB'], right_on=['Patient Last Name', 'Patient First Name', 'Date of Birth']).dropna()
print common
Thanks Shijo.
This is what worked for me after - merged by the first column in each csv.
import csv
from collections import OrderedDict
with open('stops.csv', 'rb') as f:
r = csv.reader(f)
dict2 = {row[0]: row[1:] for row in r}
with open('stops2.csv', 'rb') as f:
r = csv.reader(f)
dict1 = OrderedDict((row[0], row[1:]) for row in r)
result = OrderedDict()
for d in (dict1, dict2):
for key, value in d.iteritems():
result.setdefault(key, []).extend(value)
with open('ab_combined.csv', 'wb') as f:
w = csv.writer(f)
for key, value in result.iteritems():
w.writerow([key] + value)
http://example.com/item/all-atv-quad.html,David,"Punjab",+123456789123
http://example.com/item/70cc-2014.html,Qubee,"Capital",+987654321987
http://example.com/item/quad-bike-zenith.html,Zenith,"UP",+123456789123
I have this test.csv where I have scraped a few items from certain site but the thing is "number" field has redundancy. So I somehow need to remove a row that has the same number as before. This is just the example file, In the real file some numbers are repeated more than 50+ times.
import csv
with open('test.csv', newline='') as csvfile:
csvreader = csv.reader(csvfile, delimiter=',')
for column in csvreader:
"Some logic here"
if (column[3] == "+123456789123"):
print (column[0])
"or here"
I need reformated csv like this:
http://example.com/item/all-atv-quad.html,David,"Punjab",+123456789123
http://example.com/item/70cc-2014.html,Qubee,"Capital",+987654321987
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import pandas as pd
def direct():
seen = set()
with open("test.csv") as infile, open("formatted.csv", 'w') as outfile:
for line in infile:
parts = line.rstrip().split(',')
number = parts[-1]
if number not in seen:
seen.add(number)
outfile.write(line)
def using_pandas():
"""Alternatively, use Pandas"""
df = pd.read_csv("test.csv", header=None)
df = df.drop_duplicates(subset=[3])
df.to_csv("formatted_pandas.csv", index=None, header=None)
def main():
direct()
using_pandas()
if __name__ == "__main__":
main()
This would filter out duplicates:
seen = set()
for line in csvreader:
if line[3] in seen:
continue
seen.add(line[3])
# write line to output file
And the csv read and write logic:
with open('test.csv') as fobj_in, open('test_clean.csv', 'w') as fobj_out:
csv_reader = csv.reader(fobj_in, delimiter=',')
csv_writer = csv.writer(fobj_out, delimiter=',')
seen = set()
for line in csvreader:
if line[3] in seen:
continue
seen.add(line[3])
csv_writer.writerow(line)
So I have a program that creates CSV from .Json.
First I load the json file.
f = open('Data.json')
data = json.load(f)
f.close()
Then I go through it, looking for a specific keyword, if I find that keyword. I'll write everything related to that in a .csv file.
for item in data:
if "light" in item:
write_light_csv('light.csv', item)
This is my write_light_csv function :
def write_light_csv(filename,dic):
with open (filename,'a') as csvfile:
headers = ['TimeStamp', 'light','Proximity']
writer = csv.DictWriter(csvfile, delimiter=',', lineterminator='\n',fieldnames=headers)
writer.writeheader()
writer.writerow({'TimeStamp': dic['ts'], 'light' : dic['light'],'Proximity' : dic['prox']})
I initially had wb+ as the mode, but that cleared everything each time the file was opened for writing. I replaced that with a and now every time it writes, it adds a header. How do I make sure that header is only written once?.
You could check if file is already exists and then don't call writeheader() since you're opening the file with an append option.
Something like that:
import os.path
file_exists = os.path.isfile(filename)
with open (filename, 'a') as csvfile:
headers = ['TimeStamp', 'light', 'Proximity']
writer = csv.DictWriter(csvfile, delimiter=',', lineterminator='\n',fieldnames=headers)
if not file_exists:
writer.writeheader() # file doesn't exist yet, write a header
writer.writerow({'TimeStamp': dic['ts'], 'light': dic['light'], 'Proximity': dic['prox']})
Just another way:
with open(file_path, 'a') as file:
w = csv.DictWriter(file, my_dict.keys())
if file.tell() == 0:
w.writeheader()
w.writerow(my_dict)
You can check if the file is empty
import csv
import os
headers = ['head1', 'head2']
for row in interator:
with open('file.csv', 'a') as f:
file_is_empty = os.stat('file.csv').st_size == 0
writer = csv.writer(f, lineterminator='\n')
if file_is_empty:
writer.writerow(headers)
writer.writerow(row)
I would use some flag and run a check before writing headers! e.g.
flag=0
def get_data(lst):
for i in lst:#say list of url
global flag
respons = requests.get(i)
respons= respons.content.encode('utf-8')
respons=respons.replace('\\','')
print respons
data = json.loads(respons)
fl = codecs.open(r"C:\Users\TEST\Desktop\data1.txt",'ab',encoding='utf-8')
writer = csv.DictWriter(fl,data.keys())
if flag==0:
writer.writeheader()
writer.writerow(data)
flag+=1
print "You have written % times"%(str(flag))
fl.close()
get_data(urls)
Can you change the structure of your code and export the whole file at once?
def write_light_csv(filename, data):
with open (filename, 'w') as csvfile:
headers = ['TimeStamp', 'light','Proximity']
writer = csv.DictWriter(csvfile, delimiter=',', lineterminator='\n',fieldnames=headers)
writer.writeheader()
for item in data:
if "light" in item:
writer.writerow({'TimeStamp': item['ts'], 'light' : item['light'],'Proximity' : item['prox']})
write_light_csv('light.csv', data)
You can use the csv.Sniffer Class and
with open('my.csv', newline='') as csvfile:
if csv.Sniffer().has_header(csvfile.read(1024))
# skip writing headers
While using Pandas: (for storing Dataframe data to CSV file)
just add this check before setting header property if you are using an index to iterate over API calls to add data in CSV file.
if i > 0:
dataset.to_csv('file_name.csv',index=False, mode='a', header=False)
else:
dataset.to_csv('file_name.csv',index=False, mode='a', header=True)
Here's another example that only depends on Python's builtin csv package. This method checks that the header is what's expected or it throws an error. It also handles the case where the file doesn't exist or does exist but is empty by writing the header. Hope this helps:
import csv
import os
def append_to_csv(path, fieldnames, rows):
is_write_header = not os.path.exists(path) or _is_empty_file(path)
if not is_write_header:
_assert_field_names_match(path, fieldnames)
_append_to_csv(path, fieldnames, rows, is_write_header)
def _is_empty_file(path):
return os.stat(path).st_size == 0
def _assert_field_names_match(path, fieldnames):
with open(path, 'r') as f:
reader = csv.reader(f)
header = next(reader)
if header != fieldnames:
raise ValueError(f'Incompatible header: expected {fieldnames}, '
f'but existing file has {header}')
def _append_to_csv(path, fieldnames, rows, is_write_header: bool):
with open(path, 'a') as f:
writer = csv.DictWriter(f, fieldnames=fieldnames)
if is_write_header:
writer.writeheader()
writer.writerows(rows)
You can test this with the following code:
file_ = 'countries.csv'
fieldnames_ = ['name', 'area', 'country_code2', 'country_code3']
rows_ = [
{'name': 'Albania', 'area': 28748, 'country_code2': 'AL', 'country_code3': 'ALB'},
{'name': 'Algeria', 'area': 2381741, 'country_code2': 'DZ', 'country_code3': 'DZA'},
{'name': 'American Samoa', 'area': 199, 'country_code2': 'AS', 'country_code3': 'ASM'}
]
append_to_csv(file_, fieldnames_, rows_)
If you run this once you get the following in countries.csv:
name,area,country_code2,country_code3
Albania,28748,AL,ALB
Algeria,2381741,DZ,DZA
American Samoa,199,AS,ASM
And if you run it twice you get the following (note, no second header):
name,area,country_code2,country_code3
Albania,28748,AL,ALB
Algeria,2381741,DZ,DZA
American Samoa,199,AS,ASM
Albania,28748,AL,ALB
Algeria,2381741,DZ,DZA
American Samoa,199,AS,ASM
If you then change the header in countries.csv and run the program again, you'll get a value error, like this:
ValueError: Incompatible header: expected ['name', 'area', 'country_code2', 'country_code3'], but existing file has ['not', 'right', 'fieldnames']
Some example data:
title1|title2|title3|title4|merge
test|data|here|and
test|data|343|AND
",3|data|343|and
My attempt at coding this:
import csv
import StringIO
storedoutput = StringIO.StringIO()
fields = ('title1', 'title2', 'title3', 'title4', 'merge')
with open('file.csv', 'rb') as input_csv:
reader = csv.DictReader(input_csv, fields, delimiter='|')
for counter, row in enumerate(reader):
counter += 1
#print row
if counter != 1:
for field in fields:
if field == "merge":
row['merge'] = ("%s%s%s" % (row["title1"], row["title3"], row["title4"]))
print row
storedoutput.writelines(','.join(map(str, row)) + '\n')
contents = storedoutput.getvalue()
storedoutput.close()
print "".join(contents)
with open('file.csv', 'rb') as input_csv:
input_csv = input_csv.read().strip()
output_csv = []
output_csv.append(contents.strip())
if "".join(output_csv) != input_csv:
with open('file.csv', 'wb') as new_csv:
new_csv.write("".join(output_csv))
Output should be
title1|title2|title3|title4|merge
test|data|here|and|testhereand
test|data|343|AND|test343AND
",3|data|343|and|",3343and
For your reference upon running this code the first print it prints the rows as I would hope then to appear in the output csv. However the second print prints the title row x times where x is the number of rows.
Any input or corrections or working code would be appreciated.
I think we can make this a lot simpler. Dealing with the rogue " was a bit of a nuisance, I admit, because you have to work hard to tell Python you don't want to worry about it.
import csv
with open('file.csv', 'rb') as input_csv, open("new_file.csv", "wb") as output_csv:
reader = csv.DictReader(input_csv, delimiter='|', quoting=csv.QUOTE_NONE)
writer = csv.DictWriter(output_csv, reader.fieldnames, delimiter="|",quoting=csv.QUOTE_NONE, quotechar=None)
merge_cols = "title1", "title3", "title4"
writer.writeheader()
for row in reader:
row["merge"] = ''.join(row[col] for col in merge_cols)
writer.writerow(row)
produces
$ cat new_file.csv
title1|title2|title3|title4|merge
test|data|here|and|testhereand
test|data|343|AND|test343AND
",3|data|343|and|",3343and
Note that even though you wanted the original file updated, I refused. Why? It's a bad idea, because then you can destroy your data while working on it.
How can I be so sure? Because that's exactly what I did when I first ran your code, and I know better. ;^)
That double quote in the last line is definitely messing up the csv.DictReader().
This works:
new_lines = []
with open('file.csv', 'rb') as f:
# skip the first line
new_lines.append(f.next().strip())
for line in f:
# the newline and split the fields
line = line.strip().split('|')
# exctract the field data you want
title1, title3, title4 = line[0], line[2], line[3]
# turn the field data into a string and append in to the rest
line.append(''.join([title1, title3, title4]))
# save the new line for later
new_lines.append('|'.join(line))
with open('file.csv', 'w') as f:
# make one long string and write it to the new file
f.write('\n'.join(new_lines))
import csv
import StringIO
stored_output = StringIO.StringIO()
with open('file.csv', 'rb') as input_csv:
reader = csv.DictReader(input_csv, delimiter='|', quoting=csv.QUOTE_NONE)
writer = csv.DictWriter(stored_output, reader.fieldnames, delimiter="|",quoting=csv.QUOTE_NONE, quotechar=None)
merge_cols = "title1", "title3", "title4"
writer.writeheader()
for row in reader:
row["merge"] = ''.join(row[col] for col in merge_cols)
writer.writerow(row)
contents = stored_output.getvalue()
stored_output.close()
print contents
with open('file.csv', 'rb') as input_csv:
input_csv = input_csv.read().strip()
if input_csv != contents.strip():
with open('file.csv', 'wb') as new_csv:
new_csv.write("".join(contents))