Python 3, turning csv into a python dictionary - python

I am trying to take a csv file (which can be found here on my GitHub repo, https://github.com/playdscription/100daysofpython/blob/master/day_012/master_exercise_boolean.csv and turn it into a dictionary in python3. The idea is to take a google sheet, have multiple people fill it out, then turn it into a csv and have a python script pack that into a dictionary so I can access this information in a variety of ways.
I open the csv file, make a reader object out of it, then I loop through each line, and loop through each item in a specific part of the line and if the item has a value in it then I want it to write that value to a dictionary I have labeled joint. However even if there is a value in the item, I cannot get it to even just print that value. What am I doing wrong?
import csv
exercise_library = {}
joint = {}
with open('/Users/laptop/github/100daysofpython/day_012/master_exercise_boolean.csv' , 'r') as csv_file:
csv_reader = csv.reader(csv_file)
#look for joint actions of the NECK
for line in csv_reader:
for item in line[4:7]:
if item == True:
joint[line[3]] = [item]
#look for joint actions of the scapula.
for item in line[8:12]:
if item == True:
joint[line[7]] = [item]
#look for joint actions of the glenero_humeral.
for item in line[13:19]:
if item == True:
print(item)
#joint[line[12]] = [item]
exercise_library[line[0]] = [joint]

What you need to do is that create the key names and then assign values to dictionary. Also, items are read as string '1' not boolean, so I have changed that in code below.
import csv
exercise_library = {}
joint = {}
colnames = []
with open('test.csv' , 'r') as csv_file:
csv_reader = csv.reader(csv_file)
counter = 0
for line in csv_reader:
if counter==0:
colnames.append(line[3])
colnames.append(line[7])
colnames.append(line[12])
else:
for item in line[4:7]:
if item == '1':
joint[colnames[0]] = item
#look for joint actions of the scapula.
for item in line[8:12]:
if item == '1':
joint[colnames[1]] = item
#look for joint actions of the glenero_humeral.
for item in line[13:19]:
if item == '1':
joint[colnames[2]] = item
exercise_library[line[0]] = joint
counter = counter + 1

May not be what you want but this is the way you should do.
import csv
import requests
from collections import defaultdict
header = []
data = defaultdict(list)
with open('master_exercise_boolean.csv') as csv_file:
csv_reader = csv.reader(csv_file)
for i, line in enumerate(csv_reader):
if i ==0:
header = line
else:
for j, item in enumerate(line):
if item:
data[header[j]].append(item)
print(data)

Related

How can I write filtered results from a JSON file to a CSV file in Python?

I am trying to make a program that can save the results of a filtered JSON file as a CSV. Right now my function only saves the keys of the JSON to the CSV file.
Ideally I want the function to take two arguments: column (key) it is searching in; and the item (value) it is searching for.
This is my current function:
def save_csv(key, value):
with open('db.json') as json_file:
info = json.load(json_file)
test = info['data']
csv_file = open('test.csv', 'w')
csv_writer = csv.writer(csv_file)
count = 0
for e in test:
if count == 0:
header_csv = e.keys()
csv_writer.writerow(header_csv)
count += 1
for e in key:
if e == value:
csv_writer.writerow(e.values())
csv_file.close()
How could I change this function to make it save the filtered results in a CSV?
No matter what changes I try to make, it will only save the keys to the header of the CSV. None of the results I am filtering for will save to the CSV.
def save_csv(key, value):
with open('db.json') as json_file:
info = json.load(json_file)
test = info['data']
with open('test.csv', 'w', newline='') as csv_file:
csv_writer = csv.writer(csv_file)
for n,v in enumerate(test):
if not n:
header_csv = e.keys()
csv_writer.writerow(header_csv)
if key in v and v.get(key)==value:
csv_writer.writerow(e.values())

csv row rearrangement and also preserve value order, python

I have a csv file that looks like this:
Current csv file
Would like make a new csv file that looks like this:
Desired csv file
My first thoughts are to:
r1= []
r2 = []
with open('loadba1.csv', "r") as csv_file:
data = csv.reader(csv_file, delimiter=',')
f or rows in data:
r1.append(rows[0])
r2.append(rows[1])
r1 will give - TRUE
r2 will give - 'L_602356450160818331', 'wan1'
Then loop through again r2 to pull out each value and 'somehow' combine.
I also cannot loose the value relationship e.g. TRUE - wan1 - L_602356450160818331
I am not sure of the approach I should take. Please advise.
What you probably want to do is use a manual while loop rather than for:
with open('loadba1.csv', "r") as csv_file:
data = csv.reader(csv_file, delimiter=',')
while True:
try:
load_bal, interface = next(data)
except StopIteration:
break # end of file
try:
_, the_id = next(data)
except StopIteration:
raise ValueError("No ID row for %s" % interface)
... # write out (load_bal, interface, the_id)
import pandas as pd
df = pd.Dataframe("csv1.csv", index = False)
result = []
Id, LoadBal, Interface = "","",""
for index, row in df.iterrows():
for col, val in row.iteritems():
if col == 0 and val:
LoadBal = val
elif LoadBal:
Interface = val
result.append({"LoadBal": LoadBal, "Interface": Interface,"Id": Id })
Id, LoadBal, Interface = "","",""
elif col!=0:
Id = val
result = pd.DataFrame(result)
result.to_csv("csv2.csv")

Search value in column in CSV file using Python

I need to scan through line by line in time column time in CSV file, and see if there was activity in another column during 5 hours ago, then add column with value 1.
Here is my idea:
import csv
from collections import namedtuple
from contextlib import closing
light3 = pd.read_csv('G:/light3.csv')
light3.Time = pd.to_datetime(light3.Time)
m = light3.Time - DateOffset(hours = 5)
def search():
item = light3[(light3['Time']> m)| (light3['Time']== m)]
raw_data = 'G:/light3.csv'
failure = 'No matching item could be found with that item code. Please try again.'
check = False
with open('G:/project/test.pcap_z/light.csv','r') as csvinput:
with open('G:/light1.csv', 'w') as csvoutput:
writer = csv.writer(csvoutput, lineterminator='\n')
reader = csv.reader(csvinput)
read_data = csv.DictReader(csvinput, delimiter=';')
item_data = namedtuple(read_data['Time'], read_data.Time)
all = [ ]
row = next(reader)
row.append('f')
all.append(row)
while check == False:
for row in reader:
if row.Item == item:
row.append('f')
all.append(row)
row.append(1)
writer.writerows(all)

extract record by csv and filtering by date

I have a csv file where each record is a LinkedIn contact. I have to recreate another csv file where each contact it was reached only after a specific date (ex all the contact that are connected to me after 1/04/2017).
So this is my implementation:
def import_from_csv(file):
key_order = ("FirstName","LastName","EmailAddress","Company","ConnectedOn")
linkedin_contacts = []
with open(file, encoding="utf8") as csvfile:
reader=csv.DictReader(csvfile, delimiter=',')
for row in reader:
single_person = {"FirstName": row["FirstName"], "LastName": row["LastName"],
"EmailAddress": row["EmailAddress"], "Company": row["Company"],
"ConnectedOn": parser.parse(row["ConnectedOn"])}
od = OrderedDict((k, single_person[k]) for k in key_order)
linkedin_contacts.append(od)
return linkedin_contacts
the first script give to me a list of ordered dict, i dont know if the way i used to achive the correct order is good, also seeing some example (like here) i'm not using the od.update method, but i dont think i need it, is it correct?
Now i wrote a second function to filter the list:
def filter_by_date(connections):
filtered_list = []
target_date = parser.parse("01/04/2017")
for row in connections:
if row["ConnectedOn"] > target_date:
filtered_list.append(row)
return filtered_list
Am I doing this correctly?
Is there a way to optimize the code? Thanks
First point: you don't need the OrderedDict at all, just use a csv.DictWriter to write the filtered csv.
fieldnames = ("FirstName","LastName","EmailAddress","Company","ConnectedOn")
with open("/apth/to/final.csv", "wb") as f:
writer = csv.DictWriter(f, fieldnames)
writer.writeheader()
writer.writerows(filtered_contacts)
Second point: you don't need to create a new dict from the one yielded by the csv reader, just update the ConnectedOn key in place :
def import_from_csv(file):
linkedin_contacts = []
with open(file, encoding="utf8") as csvfile:
reader=csv.DictReader(csvfile, delimiter=',')
for row in reader:
row["ConnectedOn"] = parser.parse(row["ConnectedOn"])
linkedin_contacts.append(row)
return linkedin_contacts
And finally, if all you have to do is take the source csv, filter out records on ConnectedOn and write the result, you don't need to load the whole source in memory, create a filtered list (in memory again) and write the filtered list, you can stream the whole operation:
def filter_csv(source_path, dest_path, date):
fieldnames = ("FirstName","LastName","EmailAddress","Company","ConnectedOn")
target = parser.parse(date)
with open(source_path, "rb") as source, open(dest_path, "wb") as dest:
reader = csv.DictReader(source)
writer = csv.DictWriter(dest, fieldnames)
# if you want a header line with the fieldnames - else comment it out
writer.writeheaders()
for row in reader:
row_date = parser.parse(row["ConnectedOn"])
if row_date > target:
writer.writerow(row)
And here you are, plain and simple.
NB : I don't know what "parser.parse()" is but as others answers mention, you'd probably be better using the datetime module instead.
For filtering you could use filter() function:
def filter_by_date(connections):
target_date = datetime.strptime("01/04/2017", '%Y/%m/%d').date()
return list(filter(lambda x: x["ConnectedOn"] > target_date, connections))
And instead of creating simple dict and then fill its values into OrderedDict you could write values directly to the OrderedDict:
for row in reader:
od = OrderedDict()
od["FirstName"] = row["FirstName"]
od["LastName"] = row["LastName"]
od["EmailAddress"] = row["EmailAddress"]
od["Company"] = row["Company"]
od["ConnectedOn"] = datetime.strptime(row["ConnectedOn"], '%Y/%m/%d').date()
linkedin_contacts.append(od)
If you know date format you don't need python_dateutil, you could use built-in datetime.datetime.strptime() with needed format.
Because you don't precise the format string.
Use :
from datetime import datetime
format = '%d/%m/%Y'
date_text = '01/04/2017'
# inverse by datetime.strftime(format)
datetime.strptime(date_text, format)
#....
# with format as global
for row in reader:
od = OrderedDict()
od["FirstName"] = row["FirstName"]
od["LastName"] = row["LastName"]
od["EmailAddress"] = row["EmailAddress"]
od["Company"] = row["Company"]
od["ConnectedOn"] = strptime(row["ConnectedOn"], format)
linkedin_contacts.append(od)
Do:
def filter_by_date(connections, date_text):
target_date = datetime.strptime(date_text, format)
return [x for x in connections if x["ConnectedOn"] > target_dat]

Replacing specific data in a csv file

I'm currently in the process of producing a quiz as a competition between me and my friends, and to learn a bit more about programming which I am relatively new to. My program is intended to keep the last 3 results for each user that uses the quiz and replaces the oldest result with the newest. The current stage I have reached is being able to check if the user has their name in the file, and if not writes to the file as normal.
if team == 'Team 1':
path = 'team1scores.csv'
elif team == 'Team 2':
path = 'team2scores.csv'
elif team == 'Team 3':
path = 'team3scores.csv'
else:
print("--Error Defining File Path--")
with open(path, 'rt') as csvfile:
ver_read = csv.reader(csvfile, delimiter =",")
ver_write = csv.writer(csvfile, delimiter =",")
for row in ver_read:
if user in row:
row_data = list(ver_read)
row_len = len(row_data)
if row_len >= 3:
>>> The Problem is here
else:
with open(path, 'a+', newline='') as csvfile:
csvwriter = csv.writer(csvfile, delimiter=',')
csvwriter.writerows(datacsv)
The problem I have with the program is being able to replace the result, say I had the data below in my csv file with 3 inputs already. These need to be kept in two different columns. As I plan to have a sorting feature included.
Jake,5
Jake,7
Jake,2
Max,9
Lee,8
I have experimented several times with the basis of the code above but I am confused once the program reaches the situation of replacing the information. So far I have been able to overwrite the entire file but not specific pieces of data.
Will the ver_write be neccessary in the next steps?
Edit:
I now have an updated version but still have the same problem, This program is adapted from 2ps's answer to fit into my criteria. It still needs to overwrite and needs to print to two different cells for the name and the score. The basis is there for what I need but it won't work.
from collections import OrderedDict
user_data = OrderedDict()
data_to_write = []
with open(path, 'r+') as csvfile:
ver_read = csv.reader(csvfile, delimiter =";")
for x, row in enumerate(ver_read):
if user == row[0]:
user_data[x] = row
else:
data_to_write.append(row)
if len(user_data) > 2:
keys = user_data.keys()[-2:]
for x in keys:
data_to_write.append(user_data[x])
data_to_write.append(datacsv)
with open(path, 'w') as csvfile:
ver_write = csv.writer(csvfile, delimiter=",")
ver_write.writerows(data_to_write)
else:
with open(path, 'a+', newline='') as csvfile:
csvwriter = csv.writer(csvfile, delimiter=',')
csvwriter.writerows(datacsv)
Am I doing something fundamentally wrong here?
As far as I know, you cannot change one row in a file. So you'll have to rewrite the complete file.
I do not know how you insert new data, but you could do the following:
import csv
# Assuming new score
new_score = ['Jake', '3']
# Open the file containing the scores
with open('scores.csv', 'r') as csvfile:
ver_read = csv.reader(csvfile, delimiter=',')
# Make a dict that will contain the scores per person
names = {}
for row in ver_read:
# Grab the name and the score
name,score = list(row)
# If it's not in names yet, put it in and make it a list
if name not in names:
names[name] = []
# Append the score to the name list
names[name].append(score)
# Add the new score
names[new_score[0]].append(new_score[1])
with open('scores.csv', 'w') as csvfile:
# Loop the names in the names dict
for name in names:
# If the person has more than 3 scores, only take the last 3
if len(names[name]) > 3:
names[name] = names[name][-3:]
# For each score, print it
for score in names[name]:
print('{},{}'.format(name, score))
#ver_write.writerow([name, score])
In:
Jake,5
Jake,7
Jake,2
Max,9
Lee,8
New score:
Jake,3
Out:
Jake,7
Jake,2
Jake,3
Max,9
Lee,8
from collections import OrderedDict
user_data = OrderedDict() # dict to hold all matching rows for the user, keyed off of line number
data_to_write = []
with open(path, 'r') as csvfile: # read-write mode for file
ver_read = csv.reader(csvfile, delimiter =",")
for x, row in enumerate(ver_read):
if user == row[0]:
user_data[x] = row
else:
data_to_write.append(row) # store it for afterwards
if len(user_data) >= 3:
keys = user_data.keys()[-2:] # Grab the last two scores in the file
for x in keys:
data_to_write.append(user_data[x])
# Write the contents of the new score here:
data_to_write.append(. . . . .)
with open(path, 'w') as csvfile:
# finally write the changes to file
ver_write = csv.writer(csvfile, delimiter=",")
ver_write.writerows(data_to_write)
You could try something like this maybe:
data_to_write = []
with open(path, 'r+') as csvfile: # read-write mode for file
ver_read = csv.reader(csvfile, delimiter =",")
row_data = list(ver_read)
for row in row_data:
if user in row:
if row_data.index(row) >= 3: # if we found the user in a row after 3
row = [] # change the information here to whatever suits you
else:
row = [] # or here depending on your logic
data_to_write.append(row) # store it for afterwards
# finally write the changes to file
ver_write = csv.writer(csvfile, delimiter=",")
ver_write.writerows(data_to_write)

Categories