I have read other question and tried but unsuccessful. There is already 1 row in data.csv (called productId) and I would like to append data into new row under already created headers (productUrl, discount, evaluateScore, volume, packageType, lotNum, validTime, storeName, storeUrl, allImageUrls).
import csv
def get_details():
pid = get_id()
print(pid)
data = aliexpress.get_product_details(['productId', 'productUrl', 'discount', 'evaluateScore',
'volume', 'packageType', 'lotNum', 'validTime', 'storeName', 'storeUrl',
'allImageUrls'], pid)
with open('data.csv', 'a', newline='') as csvfile:
fieldnames = ['productUrl', 'discount', 'evaluateScore',
'volume', 'packageType', 'lotNum', 'validTime', 'storeName', 'storeUrl',
'allImageUrls']
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
productUrl = data['productUrl']
discount = data['discount']
evaluateScore = data['evaluateScore']
volume = data['volume']
packageType = data['packageType']
lotNum = data['lotNum']
validTime = data['validTime']
storeName = data['storeName']
storeUrl = data['storeUrl']
allImageUrls = data['allImageUrls']
allImageUrlstuple = allImageUrls.split(',')
print(allImageUrls)
writer.writerow({'productUrl': productUrl, 'discount': discount, 'evaluateScore': evaluateScore,
'volume': volume, 'packageType': packageType, 'lotNum': lotNum, 'validTime': validTime, 'storeName': storeName, 'storeUrl': storeUrl,
'allImageUrls': allImageUrlstuple})
You do not need to re-write the header if the header is already in the CSV. Remove writer.writeheader()
Ex:
import csv
def get_details():
pid = get_id()
print(pid)
data = aliexpress.get_product_details(['productId', 'productUrl', 'discount', 'evaluateScore',
'volume', 'packageType', 'lotNum', 'validTime', 'storeName', 'storeUrl',
'allImageUrls'], pid)
with open('data.csv', 'a', newline='') as csvfile:
fieldnames = ['productUrl', 'discount', 'evaluateScore',
'volume', 'packageType', 'lotNum', 'validTime', 'storeName', 'storeUrl',
'allImageUrls']
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
productUrl = data['productUrl']
discount = data['discount']
evaluateScore = data['evaluateScore']
volume = data['volume']
packageType = data['packageType']
lotNum = data['lotNum']
validTime = data['validTime']
storeName = data['storeName']
storeUrl = data['storeUrl']
allImageUrls = data['allImageUrls']
allImageUrlstuple = allImageUrls.split(',')
print(allImageUrls)
writer.writerow({'productUrl': productUrl, 'discount': discount, 'evaluateScore': evaluateScore,
'volume': volume, 'packageType': packageType, 'lotNum': lotNum, 'validTime': validTime, 'storeName': storeName, 'storeUrl': storeUrl,
'allImageUrls': allImageUrlstuple})
It may be easier to rewrite the file:
with open('data.csv',newline='') as csvfile:
r = csv.reader(csvfile)
data = [line for line in r]
with open('data.csv','w',newline='') as csvfile:
w = csv.writer(csvfile)
w.writerow(['col1','col2'])
w.writerows(data)
Related
def general_number(request):
csvfilename = 'algo/generalnumber.csv'
csvfile = open(csvfilename, 'r')
reader = csv.DictReader(csvfile)
fieldnames = ("Ticker","Company","Industry","PreviousClose","Open","Bid","Ask","DaysRange","ftWeekRange","Volume","AvgVolume","MarketCap","Beta","PERatioTTM","EPSTTM","EarningsDate","ForwardDividendYield","ExDividendDate","OneyTargetEst","ticker_company")
output = []
for each in reader:
row = {}
for field in fieldnames:
row[field] = each[field]
output.append(row)
return JsonResponse(output[20:30],safe=False)
Here I am getting duplicate data in json from csv.
Each datum is showing 10 times. What is wrong in this?
It's just an indentation problem. Fixed it, try now!
def general_number(request):
csvfilename = 'algo/generalnumber.csv'
csvfile = open(csvfilename, 'r')
reader = csv.DictReader(csvfile)
fieldnames = ("Ticker","Company","Industry","PreviousClose","Open","Bid","Ask","DaysRange","ftWeekRange","Volume","AvgVolume","MarketCap","Beta","PERatioTTM","EPSTTM","EarningsDate","ForwardDividendYield","ExDividendDate","OneyTargetEst","ticker_company")
output = []
for each in reader:
row = {}
for field in fieldnames:
row[field] = each[field]
output.append(row)
return JsonResponse(output[20:30],safe=False)
typeError with csv file in python3I tried to write a python3 programme that writes and edit data in csv file,but it is showing TypeError: a bytes-like object is required, not'str'
screenshot of the output in the terminal is attached.I don't know what this means and what is wrong.Someone help me fix it.the code is below:
import csv
import datetime
import shutil
from tempfile import NamedTemporaryFile
def get_length(file_path):
with open("data.csv", "r") as csvfile:
#some csv file data.csv is created in the same directory
reader = csv.reader(csvfile)
reader_list = list(reader)
return len(reader_list)
def append_data(file_path, name, email, amount):
fieldnames = ['id', 'name', 'email', 'amount', 'sent', 'date']
#the number of rows?
next_id = get_length(file_path)
with open(file_path, "a") as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writerow({
"id": next_id,
"name": name,
"email": email,
"sent": "",
"amount": amount,
"date": datetime.datetime.now()
})
#append_data("data.csv", "some name", "address#mail.com", 123.456)
def edit_data(edit_id=None, email=None, amount=None, sent=None):
filename = "data.csv"
temp_file = NamedTemporaryFile(delete=False)
with open(filename, "rb") as csvfile, temp_file:
reader = csv.DictReader(csvfile)
fieldnames = ['id', 'name', 'email', 'amount', 'sent', 'date']
writer = csv.DictWriter(temp_file, fieldnames=fieldnames)
writer.writeheader()
for row in reader:
#print(row['id'] == 4)
if edit_id is not None:
if int(row['id']) == int(edit_id):
row['amount'] = amount
row['sent'] = sent
elif email is not None and edit_id is None:
if str(row['email']) == str(email):
row['amount'] = amount
row['sent'] = sent
else:
pass
writer.writerow(row)
shutil.move(temp_file.name, filename)
return True
return False
edit_data(8, 9992.32, "")
edit_data(email='address#mail.com', amount=19.90, sent='10')
On Python 3 when using csv readers and writers you need to open your file open("data.csv", "r", newline=""). Using rb is compatible with Python 2.x only. You also need to setup similar settings for your NamedTemporaryFile.
The following should help:
import csv
import datetime
import shutil
from tempfile import NamedTemporaryFile
fieldnames = ['id', 'name', 'email', 'amount', 'sent', 'date']
def edit_data(edit_id=None, email=None, amount=None, sent=None):
filename = "data.csv"
with open(filename, "r", newline="") as csvfile, \
NamedTemporaryFile(delete=False, mode="w", newline="") as temp_file:
reader = csv.DictReader(csvfile, fieldnames=fieldnames)
writer = csv.DictWriter(temp_file, fieldnames=fieldnames)
writer.writeheader()
for row in reader:
#print(row['id'] == 4)
if edit_id is not None:
if int(row['id']) == int(edit_id):
row['amount'] = amount
row['sent'] = sent
elif email is not None and edit_id is None:
if str(row['email']) == str(email):
row['amount'] = amount
row['sent'] = sent
else:
pass
writer.writerow(row)
shutil.move(temp_file.name, filename)
edit_data(email='address#mail.com', amount=19.90, sent='10')
So I have a program that creates CSV from .Json.
First I load the json file.
f = open('Data.json')
data = json.load(f)
f.close()
Then I go through it, looking for a specific keyword, if I find that keyword. I'll write everything related to that in a .csv file.
for item in data:
if "light" in item:
write_light_csv('light.csv', item)
This is my write_light_csv function :
def write_light_csv(filename,dic):
with open (filename,'a') as csvfile:
headers = ['TimeStamp', 'light','Proximity']
writer = csv.DictWriter(csvfile, delimiter=',', lineterminator='\n',fieldnames=headers)
writer.writeheader()
writer.writerow({'TimeStamp': dic['ts'], 'light' : dic['light'],'Proximity' : dic['prox']})
I initially had wb+ as the mode, but that cleared everything each time the file was opened for writing. I replaced that with a and now every time it writes, it adds a header. How do I make sure that header is only written once?.
You could check if file is already exists and then don't call writeheader() since you're opening the file with an append option.
Something like that:
import os.path
file_exists = os.path.isfile(filename)
with open (filename, 'a') as csvfile:
headers = ['TimeStamp', 'light', 'Proximity']
writer = csv.DictWriter(csvfile, delimiter=',', lineterminator='\n',fieldnames=headers)
if not file_exists:
writer.writeheader() # file doesn't exist yet, write a header
writer.writerow({'TimeStamp': dic['ts'], 'light': dic['light'], 'Proximity': dic['prox']})
Just another way:
with open(file_path, 'a') as file:
w = csv.DictWriter(file, my_dict.keys())
if file.tell() == 0:
w.writeheader()
w.writerow(my_dict)
You can check if the file is empty
import csv
import os
headers = ['head1', 'head2']
for row in interator:
with open('file.csv', 'a') as f:
file_is_empty = os.stat('file.csv').st_size == 0
writer = csv.writer(f, lineterminator='\n')
if file_is_empty:
writer.writerow(headers)
writer.writerow(row)
I would use some flag and run a check before writing headers! e.g.
flag=0
def get_data(lst):
for i in lst:#say list of url
global flag
respons = requests.get(i)
respons= respons.content.encode('utf-8')
respons=respons.replace('\\','')
print respons
data = json.loads(respons)
fl = codecs.open(r"C:\Users\TEST\Desktop\data1.txt",'ab',encoding='utf-8')
writer = csv.DictWriter(fl,data.keys())
if flag==0:
writer.writeheader()
writer.writerow(data)
flag+=1
print "You have written % times"%(str(flag))
fl.close()
get_data(urls)
Can you change the structure of your code and export the whole file at once?
def write_light_csv(filename, data):
with open (filename, 'w') as csvfile:
headers = ['TimeStamp', 'light','Proximity']
writer = csv.DictWriter(csvfile, delimiter=',', lineterminator='\n',fieldnames=headers)
writer.writeheader()
for item in data:
if "light" in item:
writer.writerow({'TimeStamp': item['ts'], 'light' : item['light'],'Proximity' : item['prox']})
write_light_csv('light.csv', data)
You can use the csv.Sniffer Class and
with open('my.csv', newline='') as csvfile:
if csv.Sniffer().has_header(csvfile.read(1024))
# skip writing headers
While using Pandas: (for storing Dataframe data to CSV file)
just add this check before setting header property if you are using an index to iterate over API calls to add data in CSV file.
if i > 0:
dataset.to_csv('file_name.csv',index=False, mode='a', header=False)
else:
dataset.to_csv('file_name.csv',index=False, mode='a', header=True)
Here's another example that only depends on Python's builtin csv package. This method checks that the header is what's expected or it throws an error. It also handles the case where the file doesn't exist or does exist but is empty by writing the header. Hope this helps:
import csv
import os
def append_to_csv(path, fieldnames, rows):
is_write_header = not os.path.exists(path) or _is_empty_file(path)
if not is_write_header:
_assert_field_names_match(path, fieldnames)
_append_to_csv(path, fieldnames, rows, is_write_header)
def _is_empty_file(path):
return os.stat(path).st_size == 0
def _assert_field_names_match(path, fieldnames):
with open(path, 'r') as f:
reader = csv.reader(f)
header = next(reader)
if header != fieldnames:
raise ValueError(f'Incompatible header: expected {fieldnames}, '
f'but existing file has {header}')
def _append_to_csv(path, fieldnames, rows, is_write_header: bool):
with open(path, 'a') as f:
writer = csv.DictWriter(f, fieldnames=fieldnames)
if is_write_header:
writer.writeheader()
writer.writerows(rows)
You can test this with the following code:
file_ = 'countries.csv'
fieldnames_ = ['name', 'area', 'country_code2', 'country_code3']
rows_ = [
{'name': 'Albania', 'area': 28748, 'country_code2': 'AL', 'country_code3': 'ALB'},
{'name': 'Algeria', 'area': 2381741, 'country_code2': 'DZ', 'country_code3': 'DZA'},
{'name': 'American Samoa', 'area': 199, 'country_code2': 'AS', 'country_code3': 'ASM'}
]
append_to_csv(file_, fieldnames_, rows_)
If you run this once you get the following in countries.csv:
name,area,country_code2,country_code3
Albania,28748,AL,ALB
Algeria,2381741,DZ,DZA
American Samoa,199,AS,ASM
And if you run it twice you get the following (note, no second header):
name,area,country_code2,country_code3
Albania,28748,AL,ALB
Algeria,2381741,DZ,DZA
American Samoa,199,AS,ASM
Albania,28748,AL,ALB
Algeria,2381741,DZ,DZA
American Samoa,199,AS,ASM
If you then change the header in countries.csv and run the program again, you'll get a value error, like this:
ValueError: Incompatible header: expected ['name', 'area', 'country_code2', 'country_code3'], but existing file has ['not', 'right', 'fieldnames']
I have been trying to do this in python for a couple days now and I have it working sort of but I am stuck on this last error:
File "C:\Python34\lib\csv.py", line 149, in _dict_to_list
+ ", ".join([repr(x) for x in wrong_fields]))
ValueError: dict contains fields not in fieldnames:
here is what I have for code:
import csv
import sys
fin = 'csvTest.txt'
fout = open('test2.csv','wb')
csv_file = csv.DictReader(open(fin, 'r'), delimiter='\t', quotechar='"')
fieldnames = ['fName', 'lName', 'addr1', 'city', 'state', 'zip', 'terms', 'apptDate', 'product', 'description', 'quantity', 'price', 'total', 'taxAmt', 'invoiceNum']
csvwriter = csv.DictWriter(fout, delimiter='\t', fieldnames=fieldnames)
csvwriter.writeheader()
for row in csv_file:
row = row['fName'], row['lName'], row['addr1'], row['city'], row['state'], row['zip'], row['terms'], row['apptDate'], row['product'],
row['description'].replace('/n','').replace(',',''), row['quantity'], row['price'], row['total'], row['taxAmt'], row['invoiceNum']
print(row)
csvwriter.writerow(row)
fout.close()
I am simply trying to import a csv file and strip off all /n and , from the description field ONLY then write them back out as a TAB delimited CSV. I have tried a few ways of making this work but am having trouble targeting the right field. This is my first go at csv with python.
Any Ideas?
Your long row assignment line converts the dictionary row object into a tuple object. The error you're seeing is the csv library complaining that you didn't pass it a dict object. Also, the indentation is a little screwy (but that could just be StackOverflow).
import csv
import sys
with open('csvTest.txt', 'rb') as file_in:
with open('test2.csv','wb') as file_out:
csv_reader = csv.DictReader(file_in, delimiter='\t', quotechar='"')
fieldnames = ['fName', 'lName', 'addr1', 'city', 'state', 'zip', 'terms',
'apptDate', 'product', 'description', 'quantity', 'price',
'total', 'taxAmt', 'invoiceNum']
csv_writer = csv.DictWriter(file_out, delimiter='\t', fieldnames=fieldnames)
csv_writer.writeheader()
for row in csv_reader:
row['description'] = row['description'].replace('/n','').replace(',','')
print(row)
csv_writer.writerow(row)
I'm writing a simple script in Python as a learning exercise. I have a TSV file I've downloaded from the Ohio Board of Elections, and I want to manipulate some of the data and write out a CSV file for import into another system.
My issue is that it's leaking memory like a sieve. On a single run of a 154MB TSV file it consumes 2GB of memory before I stop it.
The code is below, can someone please help me identify what I'm missing with Python?
import csv
import datetime
import re
def formatAddress(row):
address = ''
if str(row['RES_HOUSE']).strip():
address += str(row['RES_HOUSE']).strip()
if str(row['RES_FRAC']).strip():
address += '-' + str(row['RES_FRAC']).strip()
if str(row['RES STREET']).strip():
address += ' ' + str(row['RES STREET']).strip()
if str(row['RES_APT']).strip():
address += ' APT ' + str(row['RES_APT']).strip()
return address
vote_type_map = {
'G': 'General',
'P': 'Primary',
'L': 'Special'
}
def formatRow(row, fieldnames):
basic_dict = {
'Voter ID': str(row['VOTER ID']).strip(),
'Date Registered': str(row['REGISTERED']).strip(),
'First Name': str(row['FIRSTNAME']).strip(),
'Last Name': str(row['LASTNAME']).strip(),
'Middle Initial': str(row['MIDDLE']).strip(),
'Name Suffix': str(row['SUFFIX']).strip(),
'Voter Status': str(row['STATUS']).strip(),
'Current Party Affiliation': str(row['PARTY']).strip(),
'Year Born': str(row['DATE OF BIRTH']).strip(),
#'Voter Address': formatAddress(row),
'Voter Address': formatAddress({'RES_HOUSE': row['RES_HOUSE'], 'RES_FRAC': row['RES_FRAC'], 'RES STREET': row['RES STREET'], 'RES_APT': row['RES_APT']}),
'City': str(row['RES_CITY']).strip(),
'State': str(row['RES_STATE']).strip(),
'Zip Code': str(row['RES_ZIP']).strip(),
'Precinct': str(row['PRECINCT']).strip(),
'Precinct Split': str(row['PRECINCT SPLIT']).strip(),
'State House District': str(row['HOUSE']).strip(),
'State Senate District': str(row['SENATE']).strip(),
'Federal Congressional District': str(row['CONGRESSIONAL']).strip(),
'City or Village Code': str(row['CITY OR VILLAGE']).strip(),
'Township': str(row['TOWNSHIP']).strip(),
'School District': str(row['SCHOOL']).strip(),
'Fire': str(row['FIRE']).strip(),
'Police': str(row['POLICE']).strip(),
'Park': str(row['PARK']).strip(),
'Road': str(row['ROAD']).strip()
}
for field in fieldnames:
m = re.search('(\d{2})(\d{4})-([GPL])', field)
if m:
vote_type = vote_type_map[m.group(3)] or 'Other'
#print { 'k1': m.group(1), 'k2': m.group(2), 'k3': m.group(3)}
d = datetime.date(year=int(m.group(2)), month=int(m.group(1)), day=1)
csv_label = d.strftime('%B %Y') + ' ' + vote_type + ' Ballot Requested'
d = None
basic_dict[csv_label] = row[field]
m = None
return basic_dict
output_rows = []
output_fields = []
with open('data.tsv', 'r') as f:
r = csv.DictReader(f, delimiter='\t')
#f.seek(0)
fieldnames = r.fieldnames
for row in r:
output_rows.append(formatRow(row, fieldnames))
f.close()
if output_rows:
output_fields = sorted(output_rows[0].keys())
with open('data_out.csv', 'wb') as f:
w = csv.DictWriter(f, output_fields, quotechar='"')
w.writeheader()
for row in output_rows:
w.writerow(row)
f.close()
You are accumulating all the data into a huge list, output_rows. You need to process each row as you read it, instead of saving all of them into a memory-expensive Python list.
with open('data.tsv', 'rb') as fin, with open('data_out.csv', 'wb') as fout:
reader = csv.DictReader(fin, delimiter='\t')
firstrow = next(r)
fieldnames = reader.fieldnames
basic_dict = formatRow(firstrow, fieldnames)
output_fields = sorted(basic_dict.keys())
writer = csv.DictWriter(fout, output_fields, quotechar='"')
writer.writeheader()
writer.writerow(basic_dict)
for row in reader:
basic_dict = formatRow(row, fieldnames)
writer.writerow(basic_dict)
You're not leaking any memory, you're just using a ton of memory.
You're turning each line of text into a dict of Python strings, which takes considerably more memory than a single string. For full details, see Why does my 100MB file take 1GB of memory?
The solution is to do this iteratively. You don't actually need the whole list, because you never refer back to any previous values. So:
with open('data.tsv', 'r') as fin, open('data_out.csv', 'w') as fout:
r = csv.DictReader(fin, delimiter='\t')
output_fields = sorted(r.fieldnames)
w = csv.DictWriter(fout, output_fields, quotechar='"')
w.writeheader()
for row in r:
w.writerow(formatRow(row, fieldnames))
Or, even more simply:
w.writerows(formatRow(row, fieldnames) for row in r)
Of course this is slightly different from you original code in that it creates the output file even if the input file is empty. You can fix that pretty easily if it's important:
with open('data.tsv', 'r') as fin:
r = csv.DictReader(fin, delimiter='\t')
first_row = next(r)
if row:
with open('data_out.csv', 'wb') as fout:
output_fields = sorted(r.fieldnames)
w = csv.DictWriter(fout, output_fields, quotechar='"')
w.writeheader()
w.writerow(formatRow(row, fieldnames))
for row in r:
w.writerow(formatRow(row, fieldnames))
maybe it helps some with an similar problem..
While reading a plain CSV file line by line and deciding by a field if it should be saved in file A or file B, a memory overflow occurred and my kernel died. I therefore analyzed my memory usage and this small change 1. tripled the iterations by a cut 2. fixed the problem with the memory leackage
That was my Code with memory leakage and long runtime
with open('input_file.csv', 'r') as input_file, open('file_A.csv', 'w') as file_A, open('file_B.csv', 'w') as file_B):
input_csv = csv.reader(input_file)
file_A_csv = csv.writer(file_A)
file_B_csv = csv.writer(file_B)
for row in input_file:
condition_row = row[1]
if condition_row == 'condition':
file_A.writerow(row)
else:
file_B.write(row)
BUT if you don't declare the variable (or more variables of your reading file) before like this:
with open('input_file.csv', 'r') as input_file, open('file_A.csv', 'w') as file_A, open('file_B.csv', 'w') as file_B):
input_csv = csv.reader(input_file)
file_A_csv = csv.writer(file_A)
file_B_csv = csv.writer(file_B)
for row in input_file:
if row[1] == 'condition':
file_A.writerow(row)
else:
file_B.write(row)
I can not explain why this is so, but after some tests I could determine that I am on average 3 times as fast and my RAM is close to zero.