I am trying to create a dictionary in python - python

It takes a file of 500 complaints, returns the number of the complaint as the key and a tuple with the make of the car, date of complaint, Crash True or False, City and State as the value.
ex) mydict("Complaints.txt")[416]
('CHRYSLER', datetime.date(1995, 1, 9), False, 'ARCADIA', 'FL')
so far I have :
from collections import defaultdict
import datetime
def fieldict(filename):
with open(filename) as f:
x=[line.split('\t')[0].strip() for line in f] #list of complaint numbers
y= line.split('\t') #list of full complaints
d={}
for j in x:
Y= True
N= False
d[j] = tuple(y[2],datetime.date(y[7]), y[6], y[12], y[13]) #dict with number of complaint as key and tuple with index as values
return d
y is the entire complaint broken up into a list with \t characters removed. If someone could point me in the right direction it would be much appreciated

You could also lean on the csv module a bit (untested):
import csv
def fieldict(filename):
fullDict = {}
with open(filename) as f:
reader = csv.reader(f, delimiter='\t')
for y in reader:
fullDict[y[0].strip()] = (y[2],datetime.date(y[7]), y[6], y[12], y[13])
return fullDict
if __name__ == "__main__":
mydict = fieldict("Complaints.txt")
print mydict[416]

if I am understanding your correctly, I think this is what you are looking for.
import datetime
def fieldict(filename):
returnDict = {}
with open(filename) as f:
for line in f:
lineList = line.split('\t')
index = lineList[0].strip()
complaint = tuple(lineList[2],datetime.date(lineList[7]), lineList[6], lineList[12], lineList[13])
returnDict[index] = complaint
return returnDict
if __name__ == "__main__":
mydict = fieldict("Complaints.txt")
print mydict[416]

Related

Return a dictionary of a function

I want to define a function, that reads a table of a textfile as a dictionary and than use it for returning specific values. The keys are chemical symbols (like "He" for Helium,...). The values return their specific atom masses.
I don't understand, what I have to do...
The first five lines of the textfile read:
H,1.008
He,4.0026
Li,6.94
Be,9.0122
B,10.81
Here are my attempts: (I don't know where to place the parameter key so that I can define it)
def read_masses():
atom_masses = {}
with open["average_mass.csv") as f:
for line in f:
(key, value) = line.split(",")
atom_masses[key] = value
return(value)
m = read_masses("average_mass.csv)
print(m["N"]) #for the mass of nitrogen ```
once return has called, the code below it doesn't execute. What you need to return is the atom_masses not value and you have to place it outside the for loop
def read_masses(file):
atom_masses = {}
with open(file) as f:
for line in f:
(key, value) = line.split(",")
atom_masses[key] = value
return (atom_masses)
m = read_masses("average_mass.csv")
print(m["H"])
>>> 1.008
Try:
def read_masses(name):
data = {}
with open(name, "r") as f_in:
for line in map(str.strip, f_in):
if line == "":
continue
a, b = map(str.strip, line.split(",", maxsplit=1))
data[a] = float(b)
return data
m = read_masses("your_file.txt")
print(m.get("He"))
Prints:
4.0026

How do i find the mean

def get_mean_temperature(filename):
with open(filename) as f:
lst = f.read().splitlines()
lst.pop(0)
result = 0
count = 0
for element in lst:
count += 1
el = int(element[6:])
result += el
print(result)
mn_tem = result / count
return mmn_tem
if __name__ == "__main__":
filename = "temp_log.txt"
with open(filename, "w") as f:
f.write("DATES T.\n07-01 28.0\n08-01 33.5\n09-01 27.0\n")
mean_temperature = get_mean_temperature(filename)
print(f"{mean_temperature:.1f}")
This is the code that I am trying to solve. So what I have to do here is to find the mean of temperature that are given in the text file, which are in this case "DATES T.\n07-01 28.0\n08-01 33.5\n09-01 27.0\n"
The text is sorted by MM-DD TT.T
Please help me have this code to work
from statistics import mean
data = "DATES T.\n07-01 28.0\n08-01 33.5\n09-01 27.0\n"
temperatures = [float(item.split()[1]) for item in data.split("\n")[1:] if item]
temperatures_mean = mean(temperatures)
print(temperatures)
print(temperatures_mean)
Output:
[28.0, 33.5, 27.0]
29.5
Or, as your original function:
from statistics import mean
def get_mean_temperature(filepath):
with open(filepath, "r") as f:
data = f.read()
temperatures = [float(item.split()[1]) for item in data.split("\n")[1:] if item]
return mean(temperatures)

Searching a file with the contents of another file python

I have a file that has a unique ID number on each line. I am trying to search a different file for the occurrences of these ID numbers and return the line where these id numbers are in the second file, in this case into an output file. I am new to programming and this is what I have so far.
outlist = []
with open('readID.txt', 'r') as readID, \
open('GOlines.txt', 'w') as output, \
open('GO.txt', 'r') as GO:
x = readID.readlines()
print x
for line in GO:
if x[1:-1] in line:
outlist.append(line)
outlist.append('\n')
if x[1:-1] in line:
outlist.append(line)
outlist.append('\n')
print outlist
output.writelines(outlist)
The files look like this: readID.txt
00073810.1
00082422.1
00018647.1
00063072.1
GO.txt
#query GO reference DB reference family
HumanDistalGut_READ_00048904.2 GO:0006412 TIGRFAM TIGR00001
HumanDistalGut_READ_00043244.3 GO:0022625 TIGRFAM TIGR00001
HumanDistalGut_READ_00048644.4 GO:0000315 TIGRFAM TIGR00001
HumanDistalGut_READ_00067264.5 GO:0003735 TIGRFAM TIGR00001
The read ids match up with some but not all of the ids after READ...
#!/usr/bin/env python
# encoding: utf-8
import sys
import re
def extract_id(line):
"""
input: HumanDistalGut_READ_00048904.2 GO:0006412 TIGRFAM TIGR00001
returns: 00048904.2
"""
result = re.search(r'READ_(\d{8}\.\d)', line)
if result != None:
return result.group(1)
else:
return None
def extract_go_num(line):
"""
input: HumanDistalGut_READ_00048904.2 GO:0006412 TIGRFAM TIGR00001
returns: 0006412
"""
result = re.search(r'GO:(\d{7})', line)
if result != None:
return result.group(1)
else:
return None
def main(argv = None):
if argv is None:
argv = sys.argv
with open('readID.txt', 'r') as f:
ids = frozenset(f.readlines())
with open('GO.txt', 'r') as haystack, \
open('GOLines.txt', 'w') as output:
for line in haystack:
if extract_id(line) in ids:
output.write(extract_go_num(line) + '\n')
if __name__ == "__main__":
sys.exit(main())
I'm trading memory overhead for an O(n) solution rather than O(n^2).
I'm using regular expressions to extract the ids and go numbers, but it's brittle if the number of digits change.
Maybe something like this:
with open('readID.txt', 'r') as readID, open('GOlines.txt', 'w') as output, open('GO.txt', 'r') as GO:
for ID in readID:
for line in GO:
if ID in line:
output.write(line)
If your files are small enough to fit in your memory.
with open('/somepath/GO.txt') as f:
pool = f.readlines()
with open('/somepath/readID.txt') as f:
tokens = f.readlines()
# strip spaces/new lines
tokens = [t.strip() for t in tokens]
found = [(t, lno) for t in tokens for (lno, l) in enumerate(pool) if t in l]
You could then print your found list into your outfile.

Group and Check-mark using Python

I have several files, each of which has data like this (filename:data inside separated by newline):
Mike: Plane\nCar
Paula: Plane\nTrain\nBoat\nCar
Bill: Boat\nTrain
Scott: Car
How can I create a csv file using python that groups all the different vehicles and then puts a X on the applicable person, like:
Assuming those line numbers aren't in there (easy enough to fix if they are), and with an input file like following:
Mike: Plane
Car
Paula: Plane
Train
Boat
Car
Bill: Boat
Train
Scott: Car
Solution can be found here : https://gist.github.com/999481
import sys
from collections import defaultdict
import csv
# see http://stackoverflow.com/questions/6180609/group-and-check-mark-using-python
def main():
# files = ["group.txt"]
files = sys.argv[1:]
if len(files) < 1:
print "usage: ./python_checkmark.py file1 [file2 ... filen]"
name_map = defaultdict(set)
for f in files:
file_handle = open(f, "r")
process_file(file_handle, name_map)
file_handle.close()
print_csv(sys.stdout, name_map)
def process_file(input_file, name_map):
cur_name = ""
for line in input_file:
if ":" in line:
cur_name, item = [x.strip() for x in line.split(":")]
else:
item = line.strip()
name_map[cur_name].add(item)
def print_csv(output_file, name_map):
names = name_map.keys()
items = set([])
for item_set in name_map.values():
items = items.union(item_set)
writer = csv.writer(output_file, quoting=csv.QUOTE_MINIMAL)
writer.writerow( [""] + names )
for item in sorted(items):
row_contents = map(lambda name:"X" if item in name_map[name] else "", names)
row = [item] + row_contents
writer.writerow( row )
if __name__ == '__main__':
main()
Output:
,Mike,Bill,Scott,Paula
Boat,,X,,X
Car,X,,X,X
Plane,X,,,X
Train,,X,,X
Only thing this script doesn't do is keep the columns in order that the names are in. Could keep a separate list maintaining the order, since maps/dicts are inherently unordered.
Here is an example of how-to parse these kind of files.
Note that the dictionary is unordered here. You can use ordered dict (in case of Python 3.2 / 2.7) from standard library, find any available implmentation / backport in case if you have older Python versions or just save an order in additional list :)
data = {}
name = None
with open(file_path) as f:
for line in f:
if ':' in line: # we have a name here
name, first_vehicle = line.split(':')
data[name] = set([first_vehicle, ]) # a set of vehicles per name
else:
if name:
data[name].add(line)
# now a dictionary with names/vehicles is available
# let's convert it to simple csv-formatted string..
# a set of all available vehicles
vehicles = set(v for vlist in data.values()
for v in vlist)
for name in data:
name_vehicles = data[name]
csv_vehicles = ''
for v in vehicles:
if v in name_vehicles:
csv_vehicles += v
csv_vehicles += ','
csv_line = name + ',' + csv_vehicles
Assuming that the input looks like this:
Mike: Plane
Car
Paula: Plane
Train
Boat
Car
Bill: Boat
Train
Scott: Car
This python script, places the vehicles in a dictionary, indexed by the person:
#!/usr/bin/python
persons={}
vehicles=set()
with open('input') as fd:
for line in fd:
line = line.strip()
if ':' in line:
tmp = line.split(':')
p = tmp[0].strip()
v = tmp[1].strip()
persons[p]=[v]
vehicles.add(v)
else:
persons[p].append(line)
vehicles.add(line)
for k,v in persons.iteritems():
print k,v
print 'vehicles', vehicles
Result:
Mike ['Plane', 'Car']
Bill ['Boat', 'Train']
Scott ['Car']
Paula ['Plane', 'Train', 'Boat', 'Car']
vehicles set(['Train', 'Car', 'Plane', 'Boat'])
Now, all the data needed are placed in data-structures. The csv-part is left as an exercise for the reader :-)
The most elegant and simple way would be like so:
vehiclesToPeople = {}
people = []
for root,dirs,files in os.walk('/path/to/folder/with/files'):
for file in files:
person = file
people += [person]
path = os.path.join(root, file)
with open(path) as f:
for vehicle in f:
vehiclesToPeople.setdefault(vehicle,set()).add(person)
people.sort()
table = [ ['']+people ]
for vehicle,owners in peopleToVehicles.items():
table.append([('X' if p in vehiclesToPeople[vehicle] else '') for p in people])
csv = '\n'.join(','.join(row) for row in table)
You can do pprint.pprint(table) as well to look at it.

How to extract column and row in csv using python

I have this input in a file.csv
"","min","max","rainfall","days_clear"
"Missouri",-2,10,300,23
"Amsterdam",-3,5,1212,34
"LA",10,20,1000,54
I wanted to write a simple program to find the city with the lowest rainfall which is Missouri in this case. How can I do that using Python csv reader?
I can try extract the items but unfortunately the first row of the file has to be there.
I wanted to have something like count[Missouri]=300
count[Amsterdam]=1212 etc.. so that I can do a minimum and reference back to print the city.
Please advise. Thanks.
import csv
def main():
with open('file.csv', 'rb') as inf:
data = [(int(row['rainfall']), row['']) for row in csv.DictReader(inf)]
data.sort()
print data[0]
if __name__=="__main__":
main()
returns
(300, 'Missouri')
One way to do this would be to use the csv module's DictReader class to write a function to extract the column of data. DictReader will take care of handling the first row of field names automatically. The built-in min() function can then be used to determine the item with the smallest value in the column.
import csv
def csv_extract_col(csvinput, colname, key):
""" extract a named column from a csv stream into a dictionary
colname: name of columm to extract
key: name of another columm to use as keys in returned dict
"""
col = {}
for row in csv.DictReader(csvinput):
col[row[key]] = row[colname]
return col
if __name__=='__main__':
import StringIO
csvdata = """\
"","min","max","rainfall","days_clear" # field name row
"Missouri",-2,10,300,23
"Amsterdam",-3,5,1212,34
"LA",10,20,1000,54
"""
csvfile = StringIO.StringIO(csvdata)
rainfall = csv_extract_col(csvfile, 'rainfall', '')
print rainfall
# {'Amsterdam': '1212', 'LA': '1000', 'Missouri': '300'}
print min(rainfall.iteritems(), key=lambda r: float(r[1]))
# ('Missouri', '300')
import StringIO
import csv
example = """"","min","max","rainfall","days_clear"
"Missouri",-2,10,300,23
"Amsterdam",-3,5,1212,34
"LA",10,20,1000,54
"""
data_in = StringIO.StringIO(example)
#data_in = open('mycsvdata.csv')
def read_data(data_in):
reader = csv.reader(data_in)
cols = []
results = {}
for row in reader:
if not cols:
cols = row
continue
row = [ int(x) if x.lstrip('-').isdigit() else x for x in row ]
results[row[0]] = dict(zip(cols[1:],row[1:]))
return results
data = read_data(data_in)
min(data.items(),key=lambda x: x[1].get('rainfall'))
Returns
('Missouri', {'max': 10, 'days_clear': 23, 'rainfall': 300, 'min': -2})
To read from a file, you need to remove all code that deals with a string:
reader = csv.reader(open('file.csv', 'rb'))
rainfall = csv_extract_col(reader, 'rainfall', '')
Update: Sorry, it neads a bit more work than that. The first arg of csv_extract_col will be used as the first arg of csv.DictReader so (in this case) it should be an open file object, and should never be a csv.reader instance. See below:
import csv
### def csv_extract_col(csvinput, colname, key):
### exactly as provided by #martineau
if __name__ == '__main__':
import sys
filename, data_col_name, key_col_name = sys.argv[1:4]
input_file_object = open(filename, 'rb')
result_dict = csv_extract_col(input_file_object, data_col_name, key_col_name)
print result_dict
print min(result_dict.iteritems(), key=lambda r: float(r[1]))
Results:
command-prompt>\python27\python joj_csv.py joj.csv rainfall ""
{'Amsterdam': '1212', 'LA': '1000', 'Missouri': '300'}
('Missouri', '300')
command-prompt>\python27\python joj_csv.py joj.csv days_clear ""
{'Amsterdam': '34', 'LA': '54', 'Missouri': '23'}
('Missouri', '23')
Update 2 in response to comment """there must be something i missed out.. i tried.. [what looks like #martineau's function] with the above main function you define. Then in my shell, i define python rainfall "". But it gives me KeyError: 'rainfall'"""
Two possibilities:
(1) You made a mistake patching the pieces of source code together. Check your work.
(2) Your file doesn't have the expected heading row contents. Try some debugging e.g. change #martineau's code so that you can insert a print statement etc. to show what the csv.DictReader thinks about your heading row:
reader = csv.DictReader(csvinput)
print "fieldnames", reader.fieldnames
assert colname in reader.fieldnames
assert key in reader.fieldnames
for row in reader:
If you are still stuck, show us ALL of your code plus the full traceback and error message -- either edit your question or put it up on pastbin or dropbox; DON'T put it into a comment!!
My code for cases in which there are several cities having the same minimum or several cities having the same maximum:
import csv
def minmax_col(filename,key,colname):
with open(filename,'rb') as csvfile:
rid = csv.DictReader(csvfile,
fieldnames=None,
quoting=csv.QUOTE_NONNUMERIC)
mini = float('inf')
maxi = float('-inf')
limin = limax =[]
for row in rid:
if row[colname] == maxi:
limax.append(row[key])
elif row[colname] > maxi:
maxi = row[colname]
limax = [row[key]]
if row[colname] == mini:
limin.append(row[key])
elif row[colname] < mini:
mini = row[colname]
limin = [row[key]]
return (key,(maxi,limax),(mini,limin))
key = 'rainfall'
city,(Ma,liMa),(mi,limi) = minmax_col('filename.csv','',key)
print 'Cities analysed on ' + repr(key) + ' parameter :'
print 'maximum==',Ma,' cities :',', '.join(liMa)
print 'minimum==',mi,' cities :',', '.join(limi)
print
key = 'min'
city,(Ma,liMa),(mi,limi) = minmax_col('filename.csv','',key)
print 'Cities analysed on ' + repr(key) + ' parameter :'
print 'maximum==',Ma,' cities :',', '.join(liMa)
print 'minimum==',mi,' cities :',', '.join(limi)
On a file like that:
"","min","max","rainfall","days_clear"
"Missouri",-2,10,300,23
"Amsterdam",-3,5,1212,34
"Oslo",-2,8,800,12
"LA",10,20,1000,54
"Kologoro",28,45,1212,1
the result is
Cities analysed according the 'rainfall' parameter :
maximum== 1212.0 cities : Amsterdam, Kologoro
minimum== 300.0 cities : Missouri
Cities analysed according the 'min' parameter :
maximum== 28.0 cities : Kologoro
minimum== -3.0 cities : Amsterdam

Categories