I have a csv file that looks like this:
Current csv file
Would like make a new csv file that looks like this:
Desired csv file
My first thoughts are to:
r1= []
r2 = []
with open('loadba1.csv', "r") as csv_file:
data = csv.reader(csv_file, delimiter=',')
f or rows in data:
r1.append(rows[0])
r2.append(rows[1])
r1 will give - TRUE
r2 will give - 'L_602356450160818331', 'wan1'
Then loop through again r2 to pull out each value and 'somehow' combine.
I also cannot loose the value relationship e.g. TRUE - wan1 - L_602356450160818331
I am not sure of the approach I should take. Please advise.
What you probably want to do is use a manual while loop rather than for:
with open('loadba1.csv', "r") as csv_file:
data = csv.reader(csv_file, delimiter=',')
while True:
try:
load_bal, interface = next(data)
except StopIteration:
break # end of file
try:
_, the_id = next(data)
except StopIteration:
raise ValueError("No ID row for %s" % interface)
... # write out (load_bal, interface, the_id)
import pandas as pd
df = pd.Dataframe("csv1.csv", index = False)
result = []
Id, LoadBal, Interface = "","",""
for index, row in df.iterrows():
for col, val in row.iteritems():
if col == 0 and val:
LoadBal = val
elif LoadBal:
Interface = val
result.append({"LoadBal": LoadBal, "Interface": Interface,"Id": Id })
Id, LoadBal, Interface = "","",""
elif col!=0:
Id = val
result = pd.DataFrame(result)
result.to_csv("csv2.csv")
Related
I am pretty new with python and I cannot shake this "index out of range error" at line wks.insert_row([row[1],row[2],row[3]], 1). I am trying to send the data to google sheets but it is not working. The wks.insert_row([1,2,3],1) does send 1,2, and 3 to the sheet. It must be something with row[1], row[2], row[3]. I have tried removing wks.insert_row([1,2,3],1) and changing the index to random numbers, still does not work. Thank you for the help!
import csv
import gspread
import time
MONTH = 'July'
file = f"Chase_{MONTH}.csv"
transactions = []
def ChaseFin(file):
with open(file, mode = 'r') as csv_file:
csv_reader = csv.reader(csv_file)
for row in csv_reader:
date = row[1]
name = row[2]
amount = row[3]
transaction = ((date, name, amount))
print(transaction)
transactions.append(transaction)
return transactions
sa = gspread.service_account()
sh = sa.open("Personal Finances")
wks = sh.worksheet(f"{MONTH}")
rows = ChaseFin(file)
for row in rows:
wks.insert_row([row[1],row[2],row[3]], 1)
time.sleep(2)
wks.insert_row([1,2,3],1)
I need to scan through line by line in time column time in CSV file, and see if there was activity in another column during 5 hours ago, then add column with value 1.
Here is my idea:
import csv
from collections import namedtuple
from contextlib import closing
light3 = pd.read_csv('G:/light3.csv')
light3.Time = pd.to_datetime(light3.Time)
m = light3.Time - DateOffset(hours = 5)
def search():
item = light3[(light3['Time']> m)| (light3['Time']== m)]
raw_data = 'G:/light3.csv'
failure = 'No matching item could be found with that item code. Please try again.'
check = False
with open('G:/project/test.pcap_z/light.csv','r') as csvinput:
with open('G:/light1.csv', 'w') as csvoutput:
writer = csv.writer(csvoutput, lineterminator='\n')
reader = csv.reader(csvinput)
read_data = csv.DictReader(csvinput, delimiter=';')
item_data = namedtuple(read_data['Time'], read_data.Time)
all = [ ]
row = next(reader)
row.append('f')
all.append(row)
while check == False:
for row in reader:
if row.Item == item:
row.append('f')
all.append(row)
row.append(1)
writer.writerows(all)
I am trying to take a csv file (which can be found here on my GitHub repo, https://github.com/playdscription/100daysofpython/blob/master/day_012/master_exercise_boolean.csv and turn it into a dictionary in python3. The idea is to take a google sheet, have multiple people fill it out, then turn it into a csv and have a python script pack that into a dictionary so I can access this information in a variety of ways.
I open the csv file, make a reader object out of it, then I loop through each line, and loop through each item in a specific part of the line and if the item has a value in it then I want it to write that value to a dictionary I have labeled joint. However even if there is a value in the item, I cannot get it to even just print that value. What am I doing wrong?
import csv
exercise_library = {}
joint = {}
with open('/Users/laptop/github/100daysofpython/day_012/master_exercise_boolean.csv' , 'r') as csv_file:
csv_reader = csv.reader(csv_file)
#look for joint actions of the NECK
for line in csv_reader:
for item in line[4:7]:
if item == True:
joint[line[3]] = [item]
#look for joint actions of the scapula.
for item in line[8:12]:
if item == True:
joint[line[7]] = [item]
#look for joint actions of the glenero_humeral.
for item in line[13:19]:
if item == True:
print(item)
#joint[line[12]] = [item]
exercise_library[line[0]] = [joint]
What you need to do is that create the key names and then assign values to dictionary. Also, items are read as string '1' not boolean, so I have changed that in code below.
import csv
exercise_library = {}
joint = {}
colnames = []
with open('test.csv' , 'r') as csv_file:
csv_reader = csv.reader(csv_file)
counter = 0
for line in csv_reader:
if counter==0:
colnames.append(line[3])
colnames.append(line[7])
colnames.append(line[12])
else:
for item in line[4:7]:
if item == '1':
joint[colnames[0]] = item
#look for joint actions of the scapula.
for item in line[8:12]:
if item == '1':
joint[colnames[1]] = item
#look for joint actions of the glenero_humeral.
for item in line[13:19]:
if item == '1':
joint[colnames[2]] = item
exercise_library[line[0]] = joint
counter = counter + 1
May not be what you want but this is the way you should do.
import csv
import requests
from collections import defaultdict
header = []
data = defaultdict(list)
with open('master_exercise_boolean.csv') as csv_file:
csv_reader = csv.reader(csv_file)
for i, line in enumerate(csv_reader):
if i ==0:
header = line
else:
for j, item in enumerate(line):
if item:
data[header[j]].append(item)
print(data)
I have a csv file where each record is a LinkedIn contact. I have to recreate another csv file where each contact it was reached only after a specific date (ex all the contact that are connected to me after 1/04/2017).
So this is my implementation:
def import_from_csv(file):
key_order = ("FirstName","LastName","EmailAddress","Company","ConnectedOn")
linkedin_contacts = []
with open(file, encoding="utf8") as csvfile:
reader=csv.DictReader(csvfile, delimiter=',')
for row in reader:
single_person = {"FirstName": row["FirstName"], "LastName": row["LastName"],
"EmailAddress": row["EmailAddress"], "Company": row["Company"],
"ConnectedOn": parser.parse(row["ConnectedOn"])}
od = OrderedDict((k, single_person[k]) for k in key_order)
linkedin_contacts.append(od)
return linkedin_contacts
the first script give to me a list of ordered dict, i dont know if the way i used to achive the correct order is good, also seeing some example (like here) i'm not using the od.update method, but i dont think i need it, is it correct?
Now i wrote a second function to filter the list:
def filter_by_date(connections):
filtered_list = []
target_date = parser.parse("01/04/2017")
for row in connections:
if row["ConnectedOn"] > target_date:
filtered_list.append(row)
return filtered_list
Am I doing this correctly?
Is there a way to optimize the code? Thanks
First point: you don't need the OrderedDict at all, just use a csv.DictWriter to write the filtered csv.
fieldnames = ("FirstName","LastName","EmailAddress","Company","ConnectedOn")
with open("/apth/to/final.csv", "wb") as f:
writer = csv.DictWriter(f, fieldnames)
writer.writeheader()
writer.writerows(filtered_contacts)
Second point: you don't need to create a new dict from the one yielded by the csv reader, just update the ConnectedOn key in place :
def import_from_csv(file):
linkedin_contacts = []
with open(file, encoding="utf8") as csvfile:
reader=csv.DictReader(csvfile, delimiter=',')
for row in reader:
row["ConnectedOn"] = parser.parse(row["ConnectedOn"])
linkedin_contacts.append(row)
return linkedin_contacts
And finally, if all you have to do is take the source csv, filter out records on ConnectedOn and write the result, you don't need to load the whole source in memory, create a filtered list (in memory again) and write the filtered list, you can stream the whole operation:
def filter_csv(source_path, dest_path, date):
fieldnames = ("FirstName","LastName","EmailAddress","Company","ConnectedOn")
target = parser.parse(date)
with open(source_path, "rb") as source, open(dest_path, "wb") as dest:
reader = csv.DictReader(source)
writer = csv.DictWriter(dest, fieldnames)
# if you want a header line with the fieldnames - else comment it out
writer.writeheaders()
for row in reader:
row_date = parser.parse(row["ConnectedOn"])
if row_date > target:
writer.writerow(row)
And here you are, plain and simple.
NB : I don't know what "parser.parse()" is but as others answers mention, you'd probably be better using the datetime module instead.
For filtering you could use filter() function:
def filter_by_date(connections):
target_date = datetime.strptime("01/04/2017", '%Y/%m/%d').date()
return list(filter(lambda x: x["ConnectedOn"] > target_date, connections))
And instead of creating simple dict and then fill its values into OrderedDict you could write values directly to the OrderedDict:
for row in reader:
od = OrderedDict()
od["FirstName"] = row["FirstName"]
od["LastName"] = row["LastName"]
od["EmailAddress"] = row["EmailAddress"]
od["Company"] = row["Company"]
od["ConnectedOn"] = datetime.strptime(row["ConnectedOn"], '%Y/%m/%d').date()
linkedin_contacts.append(od)
If you know date format you don't need python_dateutil, you could use built-in datetime.datetime.strptime() with needed format.
Because you don't precise the format string.
Use :
from datetime import datetime
format = '%d/%m/%Y'
date_text = '01/04/2017'
# inverse by datetime.strftime(format)
datetime.strptime(date_text, format)
#....
# with format as global
for row in reader:
od = OrderedDict()
od["FirstName"] = row["FirstName"]
od["LastName"] = row["LastName"]
od["EmailAddress"] = row["EmailAddress"]
od["Company"] = row["Company"]
od["ConnectedOn"] = strptime(row["ConnectedOn"], format)
linkedin_contacts.append(od)
Do:
def filter_by_date(connections, date_text):
target_date = datetime.strptime(date_text, format)
return [x for x in connections if x["ConnectedOn"] > target_dat]
I have 2 excel file having thousands of data.I want to take the row from file1 and search the entire file2 and output the repeated to file3.
file 1 file2 file3
abc.bcg#gmail.com abc.bcg_12253 abc.bcg_12253
bcg.abc#gmail.com efx.rfz_12345 def.xyz_08345
def.xyz#gmail.com wqr.qtf_34567
zxc.mnb_98764
def.xyz_08345
FileReader = csv.DictReader(f)
for row in FileReader:
emailLegalFile = row['email']
name_emailFile = emailFile[:emailLegalFile.find('#')]
print name_emailLegalFile
#with open(inputfile, 'rb') as d:
inputFileReader = csv.DictReader(d)
for r in inputFileReader:
if name_emailFile in r['google_email']:
date = r['date']
time = r['time']
t_format = r['format']
file_size = r['file_size']
google_email = r['google_email']
#writer = csv.writer(w)
#dic = {'date': date, 'time':time,'format':t_format,'file_size':file_size, 'google_email':google_email}
#writer.writerow(dic)
list = [date,time,t_format,file_size,google_email]
with open('result.csv','a') as e:
writer_1 = csv.writer(e,delimiter=',',quotechar='|', quoting=csv.QUOTE_MINIMAL)
writer_1.writerow(list)
File2 has 5 columns, but wanted to match with the 5th column only.
The output I am getting is just the 1st value i.e. abc.bcg_12253.
Please help me in solving this.
Thank you