I'm iterating API requests for each row of the input CSV file. And I want to add API output results to the existing CSV file.
Input
Desired output
As you can see, I added three headers with corresponding results (latitude, longitude, coordinates)
However, I'm finding difficulty with writing the right query for this. Below is the best I could do.
df=pd.read_csv(r"C:\users\testu\documents\travis_50000_melissa_joined_dropna - Copy2.csv",delimiter=',', na_values="nan")
# Output
with open(r"C:\users\testu\documents\travis_50000_melissa_joined_dropna - Copy2.csv", 'r') as csvin, open (r"C:\users\testu\documents\travis_50000_melissa_joined_dropna - Copy3.csv", 'w', newline='') as out:
csvreader = csv.DictReader(csvin)
fieldnames = csvreader.fieldnames + ["latitude","longitude","coordinates"]
csvwriter = csv.DictWriter(out, fieldnames)
csvwriter.writeheader()
# Iterating requests for each row
for row in df.itertuples():
output = client.geocode(str(row.addressline1) + ', ' + str(row.city) + ', ' + str(row.state) + ', ' + str(row.postalcode)).coords
cord = '(' + str(output[0]) + ', '+ str(output[1]) + ')'
for node, row in enumerate(csvreader, 3):
csvwriter.writerow(dict(3, {'latitude': output[0], 'longitude': output[1], 'coordinates': cord}))
Update:
Here is my new Python query:
df=pd.read_csv(r"C:\users\testu\documents\travis_50000_melissa_joined_dropna - Copy2.csv",delimiter=',', na_values="nan")
# Output
with open(r"C:\users\testu\documents\travis_50000_melissa_joined_dropna - Copy2.csv", 'r') as csvin, open (r"C:\users\testu\documents\travis_50000_melissa_joined_dropna - Copy3.csv", 'w', newline='') as out:
csvreader = csv.DictReader(csvin)
fieldnames = csvreader.fieldnames + ["latitude","longitude","coordinates"]
csvwriter = csv.DictWriter(out, fieldnames)
csvwriter.writeheader()
# Iterating requests for each row
for row in df.itertuples():
output = client.geocode(str(row.addressline1) + ', ' + str(row.city) + ', ' + str(row.state) + ', ' + str(row.postalcode)).coords
cord = '(' + str(output[0]) + ', '+ str(output[1]) + ')'
for node, row1 in enumerate(csvreader, 38):
csvwriter.writerow(dict(row1,latitude= output[0] % node))
for node, row2 in enumerate(csvreader, 39):
csvwriter.writerow(dict(row2,longitude = output[1] % node))
for node, row3 in enumerate(csvreader, 40):
csvwriter.writerow(dict(row3,coordinates= cord % node))
However, I get the following result:
You can more easily accomplish this by using more of pandas features.
Import the data from csv as you have been doing.
import pandas as pd
df = pd.read_csv("input_file.csv")
You can use dataframe.apply(func, axis=1) to apply a function to each row of a dataframe. https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.apply.html
def get_coords(row):
return client.geocode(str(row.addressline1) + ', ' + str(row.city) + ', ' \
+ str(row.state) + ', ' + str(row.postalcode)).coords
coords = df.apply(get_coords, axis=1)
df['latitide'] = coords.values[:,0]
df['longitude'] = coords.values[:,1]
df['coords'] = coords
You can then easily save the dataframe to csv using:
df.to_csv('output_filename.csv')
Hope this help.
p.s. code is untested but should be good :)
Related
My code below create csv file and then adding random data to it.
Somehow, the first part of the code create a blank row below header.
Can anyone please help to fix the code to remove this blank row?
Thank you,
Hary
header_list = ["Firm", "hour", "unit sold", "product code", "dollar value"]
for i in range(3):
# create file with header -----------
with open(f'D:\\2000 transactions\\location_id_' + str(i) + '.csv', 'w', newline='') as file:
dw = csv.DictWriter(file, delimiter=',',fieldnames=header_list)
dw.writeheader() # this line create a blank row below header
# adding data to file ---------------
for j in range(5):
n = random.randint(1, 99)
text = str(str(sp100_list['Name'].loc[n]) + ',' + str(random.randint(5, 20)) + ',' + str(random.randint(200, 1000)) + ',' + str(
random.randint(100, 150)) + ',' + str(int(random.random() * 1000000)))
myfile = open(f'D:\\2000 transactions\\location_id_' + str(i) + '.csv', 'a')
myfile.write('\n' + text)
Actually you add unexpected blank row in the line myfile.write('\n' + text),not dw.writeheader(). You add a \n first, which add a blank row to your file. Move the \n to backwards should solve it.
code:
import csv
header_list = ["Firm", "hour", "unit sold", "product code", "dollar value"]
for i in range(1):
with open(f'test{str(i)}.csv', 'w', newline='') as file:
dw = csv.DictWriter(file, delimiter=',',fieldnames=header_list)
dw.writeheader() # this line create a blank row below header
for j in range(5):
with open(f'test{str(i)}.csv', 'a', newline='') as myfile:
text = "1,1"
myfile.write(text+'\n')
result:
Firm,hour,unit sold,product code,dollar value
1,1
1,1
1,1
1,1
1,1
you can only use csv writer instead of DictWriter.here is example
use csv write to write header and rows
header_list = ["Firm", "hour", "unit sold", "product code", "dollar value"]
for i in range(3):
#create writer here
with open(f'tmp_' + str(i) + '.csv', 'w') as file:
writer = csv.writer(file, delimiter=',')
#write header
writer.writerow(header_list)
# adding data to file ---------------
for j in range(5):
n = random.randint(1, 99)
writer.writerow([sp100_list['Name'].loc[n],random.randint(5, 20),
random.randint(200, 1000),random.randint(100, 150),
int(random.random() * 1000000)])
I able to write hostname in the /tmp/filter.log but any hint how can i write all three values[hostname, owner, seats] in the file?
def list_hosts(nc):
resp = nc.send_service_request('ListHosts', json.dumps({}))
result = resp['result']
l = []
f=open("/tmp/filter.log", "w+")
for r in result:
if "team-prod" in r['owner']:
print r['owner'], r['hostname'], r['seats']
f.write(r['hostname'] + "\n")
f.close()
l.append(r['hostname'])
return l
nc = create_client('zone', 'team_PROD_USERNAME', 'team_PROD_PASSWORD')
l = list_hosts(nc)
print l
The file should have entries as below:
team-prod\*, np-team-052, [u'123123123-18d1-483d-9af8-169ac66b26e4']
Current entry is:
np-team-052
f.write(str(r['owner']) + ', ' + str(r['hostname']) + ', ' + str(r['seats']) + '\n')
Please help to correct the python script to get the required output
I have written below code to convert csv to xml.
In input file have column from 1 to 278. In output file need to have tag from A1 to A278,
Code :
#!/usr/bin/python
import sys
import os
import csv
if len(sys.argv) != 2:
os._exit(1)
path=sys.argv[1] # get folder as a command line argument
os.chdir(path)
csvFiles = [f for f in os.listdir('.') if f.endswith('.csv') or f.endswith('.CSV')]
for csvFile in csvFiles:
xmlFile = csvFile[:-4] + '.xml'
csvData = csv.reader(open(csvFile))
xmlData = open(xmlFile, 'w')
xmlData.write('<?xml version="1.0"?>' + "\n")
# there must be only one top-level tag
xmlData.write('<TariffRecords>' + "\n")
rowNum = 0
for row in csvData:
if rowNum == 0:
tags = Tariff
# replace spaces w/ underscores in tag names
for i in range(len(tags)):
tags[i] = tags[i].replace(' ', '_')
else:
xmlData.write('<Tariff>' + "\n")
for i in range(len(tags)):
xmlData.write(' ' + '<' + tags[i] + '>' \
+ row[i] + '</' + tags[i] + '>' + "\n")
xmlData.write('</Tariff>' + "\n")
rowNum +=1
xmlData.write('</TariffRecords>' + "\n")
xmlData.close()
Getting below error from script:
Traceback (most recent call last):
File "ctox.py", line 20, in ?
tags = Tariff
NameError: name 'Tariff' is not defined
Sample Input file.(this is a sample record in actual input file will contain 278 columns).
If input file has two or three records, same needs to be appended in one XML file.
name,Tariff Summary,Record ID No.,Operator Name,Circle (Service Area),list
Prepaid Plan Voucher,test_All calls 2p/s,TT07PMPV0188,Ta Te,Gu,
Prepaid Plan Voucher,test_All calls 3p/s,TT07PMPV0189,Ta Te,HR,
Sample output file
The above two TariffRecords, tariff will be hard coded at the beginning and end of xml file.
<TariffRecords>
<Tariff>
<A1>Prepaid Plan Voucher</A1>
<A2>test_All calls 2p/s</A2>
<A3>TT07PMPV0188</A3>
<A4>Ta Te</A4>
<A5>Gu</A5>
<A6></A6>
</Tariff>
<Tariff>
<A1>Prepaid Plan Voucher</A1>
<A2>test_All calls 3p/s</A2>
<A3>TT07PMPV0189</A3>
<A4>Ta Te</A4>
<A5>HR</A5>
<A6></A6>
</Tariff>
</TariffRecords>
First off you need to replace
tags = Tariff
with
tags = row
Secondly you want to replace the write line to not write tags name but write A1, A2 etc..
Complete code:
import sys
import os
import csv
if len(sys.argv) != 2:
os._exit(1)
path=sys.argv[1] # get folder as a command line argument
os.chdir(path)
csvFiles = [f for f in os.listdir('.') if f.endswith('.csv') or f.endswith('.CSV')]
for csvFile in csvFiles:
xmlFile = csvFile[:-4] + '.xml'
csvData = csv.reader(open(csvFile))
xmlData = open(xmlFile, 'w')
xmlData.write('<?xml version="1.0"?>' + "\n")
# there must be only one top-level tag
xmlData.write('<TariffRecords>' + "\n")
rowNum = 0
for row in csvData:
if rowNum == 0:
tags = row
# replace spaces w/ underscores in tag names
for i in range(len(tags)):
tags[i] = tags[i].replace(' ', '_')
else:
xmlData.write('<Tariff>' + "\n")
for i, index in enumerate(range(len(tags))):
xmlData.write(' ' + '<' + 'A%s' % (index+1) + '>' \
+ row[i] + '</' + 'A%s' % (index+1) + '>' + "\n")
xmlData.write('</Tariff>' + "\n")
rowNum +=1
xmlData.write('</TariffRecords>' + "\n")
xmlData.close()
Output:
<?xml version="1.0"?>
<TariffRecords>
<Tariff>
<A1>Prepaid Plan Voucher</A1>
<A2>test_All calls 2p/s</A2>
<A3>TT07PMPV0188</A3>
<A4>Ta Te</A4>
<A5>Gu</A5>
<A6></A6>
</Tariff>
<Tariff>
<A1>Prepaid Plan Voucher</A1>
<A2>test_All calls 3p/s</A2>
<A3>TT07PMPV0189</A3>
<A4>Ta Te</A4>
<A5>HR</A5>
<A6></A6>
</Tariff>
</TariffRecords>
import pandas as pd
from xml.etree import ElementTree as xml
df = pd.read_csv("file_path")
csv_data = df.values
root = xml.Element("TariffRecords")
tariff = xml.subelement("Tariff", root)
for index, data in enumarate(csv_data):
row = xml.Element("A"+str(index), tariff)
row.set(str(data))
I have a csv file and I need to mix 2 of its columns:
Sitio, ID_espacio, Espacio, Tamano, Country, Impresiones_exchange, Importe_a_cobrar, eCPM, Subastas, Fill_rate
NUEVO_Infotechnology, 264244, NUEVO_Infotechnology - Home_IT - IT_Header, Variable (1240x90), Bangladesh, 0, 0.00, 0.00, 1, 0.00
NUEVO Apertura, 274837, NUEVO Apertura - Nota_Ap - Right3_300x250, 300x250, Paises Bajos, 0, 0.00, 0.00, 4, 0.00
The problem is I need to mix ID_espaciowith Espacio but in this way:
example:
NUEVO_Infotechnology, 264244, NUEVO_Infotechnology - Home_IT - IT_Header, Variable (1240x90), Bangladesh, 0, 0.00, 0.00, 1, 0.00
What I need:
NUEVO_Infotechnology, 264244 - Home_IT - IT_Header, Variable (1240x90), Bangladesh, 0, 0.00, 0.00, 1, 0.00
As you can see I remove the first name of the Espacio until the '-' and then i put the ID_espacio.
I tried to do it and I could but the now I need to have all the csv and not only my modification:
import csv
lista_ok = []
base = []
with open("test.csv", 'rb') as f:
reader = csv.reader(f)
your_list = list(reader)
for item in your_list[1:]:
a = item[2].split(" - ")
base.append(a)
for item in base:
for itemf in your_list[1:]:
b = []
a = itemf[1] + ' - ' + ' - '.join(item[1:])
b.append(a)
lista_ok.append(b)
Output:
[[' 264244 - Home_IT - IT_Header'], [' 274837 - Home_IT - IT_Header'], [' 264244 - Nota_Ap - Right3_300x250'], [' 274837 - Nota_Ap - Right3_300x250']]
Output I need:
[['Sitio', ' ID_espacio', ' Espacio', ' Tamano', ' Country', ' Impresiones_exchange', ' Importe_a_cobrar', ' eCPM', ' Subastas', ' Fill_rate'], ['NUEVO_Infotechnology', ' 264244 - Home_IT - IT_Header', ' Variable (1240x90)', ' Bangladesh', ' 0', ' 0.00', ' 0.00', ' 1', ' 0.00'], ['NUEVO Apertura', ' 274837 - Nota_Ap - Right3_300x250', ' 300x250', ' Paises Bajos', ' 0', ' 0.00', ' 0.00', ' 4', ' 0.00']]
Here another version:
import csv
lista_ok = []
with open("test.csv", 'rb') as f:
reader = csv.reader(f)
your_list = list(reader)
for item in your_list:
sitio = item[0]
id_espacio = item[1]
item.remove(id_espacio)
espacio_parts = item[1].split(' - ')
if your_list.index(item) > 0:
espacio_parts[0] = espacio_parts[0].lstrip().replace(sitio,id_espacio)
espacio = ' - '.join(espacio_parts)
item[1] = espacio
lista_ok.append(item)
You could write a function that transforms a single row the way you want. Then call that function for each row as you read it from the file and put it in your final list:
def row_transform(row, is_header=False):
if not is_header:
# trim Sitio from Espacio
row[2] = row[2].split(" - ", 1)[1]
# add ID to espacio
row[2] = " - ".join((row[1], espacio))
# remove ID col
del row[1]
return row
with open("test.csv") as fp:
reader = csv.reader(fp)
lista_ok = [row_transform(next(reader), True)]
lista_ok.extend((row_transform(row) for row in reader))
sample data:
id, Name, mail, data1, data2, data3
1, Name1, mail#com, abc, 14, de
1, Name1, mail#com, fgh, 25, kl
1, Name1, mail#com, mno, 38, pq
2, Name2, mail#com, abc, 14, d
I wrote a script that selects the first field is a unique string to clear the duplicates. However, since the data in the fields date1-3 are not repeated, it is necessary to make the result:
1, Name1, mail#com, "abc, 14, de, fgh, 25, kl, mno, 38, pq"
How to merge rows in the array?
My code not work:
import sys
import csv
in_fln = sys.argv[1]
# You can replace here and choose any delimiter:
csv.register_dialect('dlm', delimiter=',')
csv.register_dialect('dmt', delimiter=';')
# if this .csv file do:
if (in_fln[-3:]) == "csv":
out_fln = 'out' + in_fln
inputf = open(in_fln, 'r')
seen = []
outfile = []
nout = {}
#rowun = []
try:
reader = csv.reader(inputf, dialect='dlm')
# select by ContactID
for row in reader:
if row[0] not in seen:
#IT'S work byt temp comment
#rowun = '"' + (row[-4]) + ', ' + (row[-3]) + ', ' + (row[-2]) + '"'
#outfile.append(row[:-5]+[rowun])
outfile.append(row[:-4])
rowun = (row[0])
nout[rowun] = (row[-4:-1])
seen.append(row[0])
print (type(row))
else:
#rowun = '"' + (row[-4]) + ', ' + (row[-3]) + ', ' + (row[-2]) + '"'
#nout.insert(-1,(row[-4:-1]))
print (type(row))
rowun = (row[0])
rowun2 = {rowun:(row[-4:-1])}
nout.update(rowun2)
finally:
#print (nout)
#print (outfile[:-1])
#csv.writer(open(('nout' + in_fln), 'w', newline='')).writerows(nout)
csv.writer(open(out_fln, 'w', newline=''), dialect='dlm').writerows(outfile)
inputf.close()
print ("All done")
This should do the trick.
from collections import defaultdict
import pandas as pd
# recreate your example
df = pd.DataFrame([[1, 'Name1', 'mail#com', 'abc', 14, 'de'],
[1, 'Name1', 'mail#com', 'fgh', 25, 'kl'],
[1, 'Name1', 'mail#com', 'mno', 38, 'pq'],
[2, 'Name2', 'mail#com', 'abc', 14, 'd']
], columns=['id', 'Name', 'mail', 'data1', 'data2','data3'])
res = defaultdict(list)
for ind, row in df.iterrows():
key = (row['id'], row['Name'], row['mail'])
value = (row['data1'], row['data2'], row['data3'])
res[key].append(value)
for key, value in res.items():
print(key, value)
# gives
# (2, 'Name2', 'mail#com') [('abc', 14, 'd')]
# (1, 'Name1', 'mail#com') [('abc', 14, 'de'), ('fgh', 25, 'kl'), ('mno', 38, 'pq')]
My own version is very close to the beter:
Now all work!
#!/usr/bin/env python3
import csv, re
import os, sys
in_fln = sys.argv[1]
# You can replace here and choose any delimiter:
#csv.register_dialect('dlm', delimiter=',')
dm = ','
seen = []
# if this .csv file do:
if (in_fln[-3:]) == "csv":
out_fln = 'out' + in_fln
#create the full structure: output_rows
infile = csv.reader(open(in_fln, 'r'), delimiter=dm, quotechar='"')
output_rows = []
for row in infile:
a = 0
if row[0] not in seen:
seen.append(row[0])
output_rows.append(row[:-4])
#rowun = '"' + row[-4] + ', ' + row[-3] + ', ' + row[-2] + '"'
rowun = row[-4] + ', ' + row[-3] + ', ' + row[-2]
output_rows.append([rowun])
else:
#output_rows.append([row[-4], row[-3], row[-2]])
#rowun = '"' + row[-4] + ', ' + row[-3] + ', ' + row[-2] + '"'
rowun = row[-4] + ', ' + row[-3] + ', ' + row[-2]
#output_rows.insert(-1,[rowun])
#rowun = str(rowun)
#print (rowun)
output_rows[-1].append(rowun)
#Finally save it to a file
csv.writer(open(out_fln, 'w', newline=''), delimiter=dm, quotechar='"').writerows(output_rows)
chng = [
['","',','], # chng "," on ,
['\n"',',"'], # Del new str
]
input_file = open(out_fln).read()
output_file = open(out_fln,'w')
for string in chng:
input_file = re.sub(str(string[0]),str(string[1]),input_file)
output_file.write(input_file)
output_file.close()
print ("All done")