How to extract column and row in csv using python - python

I have this input in a file.csv
"","min","max","rainfall","days_clear"
"Missouri",-2,10,300,23
"Amsterdam",-3,5,1212,34
"LA",10,20,1000,54
I wanted to write a simple program to find the city with the lowest rainfall which is Missouri in this case. How can I do that using Python csv reader?
I can try extract the items but unfortunately the first row of the file has to be there.
I wanted to have something like count[Missouri]=300
count[Amsterdam]=1212 etc.. so that I can do a minimum and reference back to print the city.
Please advise. Thanks.

import csv
def main():
with open('file.csv', 'rb') as inf:
data = [(int(row['rainfall']), row['']) for row in csv.DictReader(inf)]
data.sort()
print data[0]
if __name__=="__main__":
main()
returns
(300, 'Missouri')

One way to do this would be to use the csv module's DictReader class to write a function to extract the column of data. DictReader will take care of handling the first row of field names automatically. The built-in min() function can then be used to determine the item with the smallest value in the column.
import csv
def csv_extract_col(csvinput, colname, key):
""" extract a named column from a csv stream into a dictionary
colname: name of columm to extract
key: name of another columm to use as keys in returned dict
"""
col = {}
for row in csv.DictReader(csvinput):
col[row[key]] = row[colname]
return col
if __name__=='__main__':
import StringIO
csvdata = """\
"","min","max","rainfall","days_clear" # field name row
"Missouri",-2,10,300,23
"Amsterdam",-3,5,1212,34
"LA",10,20,1000,54
"""
csvfile = StringIO.StringIO(csvdata)
rainfall = csv_extract_col(csvfile, 'rainfall', '')
print rainfall
# {'Amsterdam': '1212', 'LA': '1000', 'Missouri': '300'}
print min(rainfall.iteritems(), key=lambda r: float(r[1]))
# ('Missouri', '300')

import StringIO
import csv
example = """"","min","max","rainfall","days_clear"
"Missouri",-2,10,300,23
"Amsterdam",-3,5,1212,34
"LA",10,20,1000,54
"""
data_in = StringIO.StringIO(example)
#data_in = open('mycsvdata.csv')
def read_data(data_in):
reader = csv.reader(data_in)
cols = []
results = {}
for row in reader:
if not cols:
cols = row
continue
row = [ int(x) if x.lstrip('-').isdigit() else x for x in row ]
results[row[0]] = dict(zip(cols[1:],row[1:]))
return results
data = read_data(data_in)
min(data.items(),key=lambda x: x[1].get('rainfall'))
Returns
('Missouri', {'max': 10, 'days_clear': 23, 'rainfall': 300, 'min': -2})

To read from a file, you need to remove all code that deals with a string:
reader = csv.reader(open('file.csv', 'rb'))
rainfall = csv_extract_col(reader, 'rainfall', '')
Update: Sorry, it neads a bit more work than that. The first arg of csv_extract_col will be used as the first arg of csv.DictReader so (in this case) it should be an open file object, and should never be a csv.reader instance. See below:
import csv
### def csv_extract_col(csvinput, colname, key):
### exactly as provided by #martineau
if __name__ == '__main__':
import sys
filename, data_col_name, key_col_name = sys.argv[1:4]
input_file_object = open(filename, 'rb')
result_dict = csv_extract_col(input_file_object, data_col_name, key_col_name)
print result_dict
print min(result_dict.iteritems(), key=lambda r: float(r[1]))
Results:
command-prompt>\python27\python joj_csv.py joj.csv rainfall ""
{'Amsterdam': '1212', 'LA': '1000', 'Missouri': '300'}
('Missouri', '300')
command-prompt>\python27\python joj_csv.py joj.csv days_clear ""
{'Amsterdam': '34', 'LA': '54', 'Missouri': '23'}
('Missouri', '23')
Update 2 in response to comment """there must be something i missed out.. i tried.. [what looks like #martineau's function] with the above main function you define. Then in my shell, i define python rainfall "". But it gives me KeyError: 'rainfall'"""
Two possibilities:
(1) You made a mistake patching the pieces of source code together. Check your work.
(2) Your file doesn't have the expected heading row contents. Try some debugging e.g. change #martineau's code so that you can insert a print statement etc. to show what the csv.DictReader thinks about your heading row:
reader = csv.DictReader(csvinput)
print "fieldnames", reader.fieldnames
assert colname in reader.fieldnames
assert key in reader.fieldnames
for row in reader:
If you are still stuck, show us ALL of your code plus the full traceback and error message -- either edit your question or put it up on pastbin or dropbox; DON'T put it into a comment!!

My code for cases in which there are several cities having the same minimum or several cities having the same maximum:
import csv
def minmax_col(filename,key,colname):
with open(filename,'rb') as csvfile:
rid = csv.DictReader(csvfile,
fieldnames=None,
quoting=csv.QUOTE_NONNUMERIC)
mini = float('inf')
maxi = float('-inf')
limin = limax =[]
for row in rid:
if row[colname] == maxi:
limax.append(row[key])
elif row[colname] > maxi:
maxi = row[colname]
limax = [row[key]]
if row[colname] == mini:
limin.append(row[key])
elif row[colname] < mini:
mini = row[colname]
limin = [row[key]]
return (key,(maxi,limax),(mini,limin))
key = 'rainfall'
city,(Ma,liMa),(mi,limi) = minmax_col('filename.csv','',key)
print 'Cities analysed on ' + repr(key) + ' parameter :'
print 'maximum==',Ma,' cities :',', '.join(liMa)
print 'minimum==',mi,' cities :',', '.join(limi)
print
key = 'min'
city,(Ma,liMa),(mi,limi) = minmax_col('filename.csv','',key)
print 'Cities analysed on ' + repr(key) + ' parameter :'
print 'maximum==',Ma,' cities :',', '.join(liMa)
print 'minimum==',mi,' cities :',', '.join(limi)
On a file like that:
"","min","max","rainfall","days_clear"
"Missouri",-2,10,300,23
"Amsterdam",-3,5,1212,34
"Oslo",-2,8,800,12
"LA",10,20,1000,54
"Kologoro",28,45,1212,1
the result is
Cities analysed according the 'rainfall' parameter :
maximum== 1212.0 cities : Amsterdam, Kologoro
minimum== 300.0 cities : Missouri
Cities analysed according the 'min' parameter :
maximum== 28.0 cities : Kologoro
minimum== -3.0 cities : Amsterdam

Related

Python CSV multivalued columns to multiple rows

I am trying to convert nested JSON to CSV using python.
There can be multiple values for some attributes:
like phone1, phone2,phone3 for a single individual.
I wrote a python code for this and its doing the job perfectly.
But I am getting the values of multiple attributes in multiple columns but I want them in multiple rows,
like for example:
my output:
name phone1 phone2 phone3
xyx 98 34 56
required output:
name phone
xyx 98
xyx 34
xyx 56
The code for this is:
import sys
import json
import csv
import io
import pandas as pd
##
# Convert to string keeping encoding in mind...
##
processed_data = []
def to_string(s):
try:
return str(s)
except:
# Change the encoding type if needed
return s.encode('utf-8')
def reduce_item(key, value):
global reduced_item
global res
# Reduction Condition 1
if type(value) is list:
i = 0
for sub_item in value:
reduce_item(key + '_' + to_string(i), sub_item)
i = i + 1
# Reduction Condition 2
elif type(value) is dict:
sub_keys = value.keys()
for sub_key in sub_keys:
reduce_item(key+'_'+to_string(sub_key), value[sub_key])
# Base Condition
else:
#if reduced_item.get(to_string(key)):
#reduced_item[to_string(key)] = to_string(value)
#processed_data.append(reduced_item)
#else:
reduced_item[to_string(key)]=to_string(value)
if __name__ == "__main__":
if len(sys.argv) != 4:
print("\nUsage: python json_to_csv.py <node> <json_in_file_path> <csv_out_file_path>\n")
else:
# Reading arguments
node = sys.argv[1]
json_file_path = sys.argv[2]
csv_file_path = sys.argv[3]
with io.open(json_file_path, 'r', encoding='utf-8-sig') as fp:
json_value = fp.read()
raw_data = json.loads(json_value)
try:
data_to_be_processed = raw_data[node]
except:
data_to_be_processed = raw_data
header = []
for item in data_to_be_processed:
reduced_item={}
reduce_item(node, item)
header += reduced_item.keys()
processed_data.append(reduced_item)
header = list(set(header))
header.sort()
with open(csv_file_path, 'w+') as f:
writer = csv.DictWriter(f, header,quoting=csv.QUOTE_ALL)
writer.writeheader()
for row in processed_data:
writer.writerow(row)
print("Just completed writing csv file with %d columns" % len(header))
I tried changing the code but was not able to achieve the desired result.It would be of great help if anyone can suggest the changes for this code
Thanks in advance

Merging two csv files into list of dictionaries

i have a task to do and i got stuck because whatever i do it does't seem to work.
So i have to csv files.
First called persons_file and it contains header line: id, name, surname.
And visits_file containing id, person_id, site.
I have to write a function called merge that gets to files as arguments (both StrionIO type) and returns list of dictionaries with number of visits for each users:
[ {
"id": (person's id),
"name": (person's name),
"surname": (person's surname),
"visits": (number of visits)
} ]
I came up with this and i don't know where my mistake is.
import io
def merge(persons_file,visits_file):
line_counter = 0
return_list = []
list_of_person_ids = []
visits = 0
for row in visits_file:
if line_counter == 0:
line_counter+=1
continue
list_of_person_ids.append(row.split(',')[1])
line_counter = 0
for row in persons_file:
if line_counter == 0:
line_counter+=1
continue
help_dict = {}
split_row = row.split(',')
help_dict['id'] = split_row[0]
help_dict['name'] = split_row[1]
help_dict['surname'] = split_row[2][:len(split_row[2])-1]
if split_row[0] in list_of_person_ids:
visits = list_of_person_ids.count(split_row[0])
help_dict['visits'] = str(visits)
return_list.append(help_dict)
visits=0
return return_list
file1 = open('persons_file.csv' , mode='r')
file2 = open('visits_file.csv' , mode='r')
persons_file_arg = io.StringIO(file1.read())
visits_file_arg = io.StringIO(file2.read())
list_of_visits = merge(persons_file_arg,visits_file_arg)
for i in list_of_visits:
print(i)
file1.close()
file2.close()
I will be glad if anyone could help me.
What is the issue? Is it the output that is not what you expected, or are you getting an exception? Your code seems like it should achieve the result you want, but I have a couple suggestions to make that could simplify things.
Look into collections.Counter you could then call count_of_visits_by_person_id = Counter(list_of_person_ids) to get a result of the form:
{person_id: number_of_visits, ...}. You could then use this to simply look up the number of visits in your next for loop. e.g.:
from collections import Counter
...
count_of_visits_by_person_id = Counter(list_of_person_ids)
for row in persons_file:
if line_counter == 0:
line_counter += 1
continue
help_dict = {}
split_row = row.split(',')
help_dict['id'] = split_row[0]
help_dict['name'] = split_row[1]
help_dict['surname'] = split_row[2][:-1]
# [:len(split_row[2]) - 1] is equivalent to [:-1]
# I assume you are stripping whitespace from the right side,
# which can also be accomplished using split_row[2].rstrip()
if split_row[0] in count_of_visits_by_person_id:
visits = count_of_visits_by_person_id[split_row[0]]
else:
visits = 0
help_dict['visits'] = str(visits)
return_list.append(help_dict)
The generally simpler and safer way to open files is using the with statement. Here is an example:
with open('visits_file.csv', mode='r') as visits_file:
row = visits_file.readline()
while row:
row = visits_file.readline() # Skips the first line
list_of_person_ids.append(row.split(',')[1])

How to find a data in CSV file with python

I have a problem , i wanna to search a data with python from csv file
my code like this
#search process area
area_proses = []
sg1 = []
sg2 = []
sg3 = []
avg = []
#input number you want to search
number = raw_input('Masukan id Spesific Goal\n')
#read csv, and split on "," the line
csv_file = csv.reader(open('C:/xampp_2/htdocs/SkripsiV2/fuzzy/download.csv', "rb"), delimiter=",")
#loop through csv list
for row in csv_file:
area_proses.append(row[1])
sg1.append(row[2])
sg2.append(row[3])
sg3.append(row[4])
avg.append(row[5])
#if current rows 1nd value is equal to input, print that row
if number == row[0]:
#masukan data
print(area_proses,sg1,sg2,sg3,avg)
my problem is when i search with id 11 the output is like this:
(['area_proses', 'Service Delivery'], ['sg1', '3.71'], ['sg2', '3.48'], ['sg3',
'3.30'], ['avg', '3.50'])
but when i search id 12 the output is like :
(['area_proses', 'Service Delivery', 'Incident Resolution and Prevention'], ['sg
1', '3.71', '3.83'], ['sg2', '3.48', '3.65'], ['sg3', '3.30', '3.70'], ['avg', '
3.50', '3.73'])
How i can solved this problem?
Download.csv
"id","area_proses","sg1","sg2","sg3","avg","fuzzy",
"11","Service Delivery","3.71","3.48","3.30","3.50","0.00000000000",
"12","Incident Resolution and Prevention","3.83","3.65","3.70","3.73","0.00000000000",
"13","Service System Development","3.93","3.29","3.26","3.49","0.00000000000",
"14","Service System Transition","3.00","3.43","0.00","3.22","0.00000000000",
"15","Strategic Service Management","3.48","3.86","0.00","3.67","0.00000000000",
"16","Configuration Management","3.14","3.57","0.00","3.36","0.00000000000",
"17","Measurement and Analysis","2.93","3.18","0.00","3.06","0.00000000000",
Try using the pandas library. Install it, then do:
import pandas as pd
df = pd.read_csv('csv_file.csv')
df[df['id'] == number]
Just change 'rb' to 'r'
fopn = open(file_loc, "r")
csv_file = csv.reader(fopn)
for row in csv_file:
if number == row[0]:
print(row)

I am trying to create a dictionary in python

It takes a file of 500 complaints, returns the number of the complaint as the key and a tuple with the make of the car, date of complaint, Crash True or False, City and State as the value.
ex) mydict("Complaints.txt")[416]
('CHRYSLER', datetime.date(1995, 1, 9), False, 'ARCADIA', 'FL')
so far I have :
from collections import defaultdict
import datetime
def fieldict(filename):
with open(filename) as f:
x=[line.split('\t')[0].strip() for line in f] #list of complaint numbers
y= line.split('\t') #list of full complaints
d={}
for j in x:
Y= True
N= False
d[j] = tuple(y[2],datetime.date(y[7]), y[6], y[12], y[13]) #dict with number of complaint as key and tuple with index as values
return d
y is the entire complaint broken up into a list with \t characters removed. If someone could point me in the right direction it would be much appreciated
You could also lean on the csv module a bit (untested):
import csv
def fieldict(filename):
fullDict = {}
with open(filename) as f:
reader = csv.reader(f, delimiter='\t')
for y in reader:
fullDict[y[0].strip()] = (y[2],datetime.date(y[7]), y[6], y[12], y[13])
return fullDict
if __name__ == "__main__":
mydict = fieldict("Complaints.txt")
print mydict[416]
if I am understanding your correctly, I think this is what you are looking for.
import datetime
def fieldict(filename):
returnDict = {}
with open(filename) as f:
for line in f:
lineList = line.split('\t')
index = lineList[0].strip()
complaint = tuple(lineList[2],datetime.date(lineList[7]), lineList[6], lineList[12], lineList[13])
returnDict[index] = complaint
return returnDict
if __name__ == "__main__":
mydict = fieldict("Complaints.txt")
print mydict[416]

Group and Check-mark using Python

I have several files, each of which has data like this (filename:data inside separated by newline):
Mike: Plane\nCar
Paula: Plane\nTrain\nBoat\nCar
Bill: Boat\nTrain
Scott: Car
How can I create a csv file using python that groups all the different vehicles and then puts a X on the applicable person, like:
Assuming those line numbers aren't in there (easy enough to fix if they are), and with an input file like following:
Mike: Plane
Car
Paula: Plane
Train
Boat
Car
Bill: Boat
Train
Scott: Car
Solution can be found here : https://gist.github.com/999481
import sys
from collections import defaultdict
import csv
# see http://stackoverflow.com/questions/6180609/group-and-check-mark-using-python
def main():
# files = ["group.txt"]
files = sys.argv[1:]
if len(files) < 1:
print "usage: ./python_checkmark.py file1 [file2 ... filen]"
name_map = defaultdict(set)
for f in files:
file_handle = open(f, "r")
process_file(file_handle, name_map)
file_handle.close()
print_csv(sys.stdout, name_map)
def process_file(input_file, name_map):
cur_name = ""
for line in input_file:
if ":" in line:
cur_name, item = [x.strip() for x in line.split(":")]
else:
item = line.strip()
name_map[cur_name].add(item)
def print_csv(output_file, name_map):
names = name_map.keys()
items = set([])
for item_set in name_map.values():
items = items.union(item_set)
writer = csv.writer(output_file, quoting=csv.QUOTE_MINIMAL)
writer.writerow( [""] + names )
for item in sorted(items):
row_contents = map(lambda name:"X" if item in name_map[name] else "", names)
row = [item] + row_contents
writer.writerow( row )
if __name__ == '__main__':
main()
Output:
,Mike,Bill,Scott,Paula
Boat,,X,,X
Car,X,,X,X
Plane,X,,,X
Train,,X,,X
Only thing this script doesn't do is keep the columns in order that the names are in. Could keep a separate list maintaining the order, since maps/dicts are inherently unordered.
Here is an example of how-to parse these kind of files.
Note that the dictionary is unordered here. You can use ordered dict (in case of Python 3.2 / 2.7) from standard library, find any available implmentation / backport in case if you have older Python versions or just save an order in additional list :)
data = {}
name = None
with open(file_path) as f:
for line in f:
if ':' in line: # we have a name here
name, first_vehicle = line.split(':')
data[name] = set([first_vehicle, ]) # a set of vehicles per name
else:
if name:
data[name].add(line)
# now a dictionary with names/vehicles is available
# let's convert it to simple csv-formatted string..
# a set of all available vehicles
vehicles = set(v for vlist in data.values()
for v in vlist)
for name in data:
name_vehicles = data[name]
csv_vehicles = ''
for v in vehicles:
if v in name_vehicles:
csv_vehicles += v
csv_vehicles += ','
csv_line = name + ',' + csv_vehicles
Assuming that the input looks like this:
Mike: Plane
Car
Paula: Plane
Train
Boat
Car
Bill: Boat
Train
Scott: Car
This python script, places the vehicles in a dictionary, indexed by the person:
#!/usr/bin/python
persons={}
vehicles=set()
with open('input') as fd:
for line in fd:
line = line.strip()
if ':' in line:
tmp = line.split(':')
p = tmp[0].strip()
v = tmp[1].strip()
persons[p]=[v]
vehicles.add(v)
else:
persons[p].append(line)
vehicles.add(line)
for k,v in persons.iteritems():
print k,v
print 'vehicles', vehicles
Result:
Mike ['Plane', 'Car']
Bill ['Boat', 'Train']
Scott ['Car']
Paula ['Plane', 'Train', 'Boat', 'Car']
vehicles set(['Train', 'Car', 'Plane', 'Boat'])
Now, all the data needed are placed in data-structures. The csv-part is left as an exercise for the reader :-)
The most elegant and simple way would be like so:
vehiclesToPeople = {}
people = []
for root,dirs,files in os.walk('/path/to/folder/with/files'):
for file in files:
person = file
people += [person]
path = os.path.join(root, file)
with open(path) as f:
for vehicle in f:
vehiclesToPeople.setdefault(vehicle,set()).add(person)
people.sort()
table = [ ['']+people ]
for vehicle,owners in peopleToVehicles.items():
table.append([('X' if p in vehiclesToPeople[vehicle] else '') for p in people])
csv = '\n'.join(','.join(row) for row in table)
You can do pprint.pprint(table) as well to look at it.

Categories