Changing a specific value from a list into an integer - python

I've been trying to have a program print out a sorted list depending on the requested item. When I request the list from the CSV file I'm not sure how to set only 2 of the 4 values into an integer as when it displays in the program the numbers are treated as strings and it doesn't sort properly.
Eg:
['Jess', 'F', '2009', '6302']
['Kat', 'F', '1999', '6000']
['Alexander', 'M', '1982', '50']
['Bill', 'M', '2006', '2000']
['Jack', 'M', '1998', '1500']
def sortD(choice):
clear()
csv1 = csv.reader(open('TestUnsorted.csv', 'r'), delimiter=',')
sort = sorted(csv1, key=operator.itemgetter(choice))
for eachline in sort:
print (eachline)
open('TestUnsorted.csv', 'r').close()
#From here up is where I'm having difficulty
with open('TestSorted.csv', 'w') as csvfile:
fieldnames = ['Name', 'Gender', 'Year','Count']
csv2 = csv.DictWriter(csvfile, fieldnames=fieldnames,
extrasaction='ignore', delimiter = ';')
csv2.writeheader()
for eachline in sort:
csv2.writerow({'Name': eachline[0] ,'Gender': eachline[1],'Year':eachline[2],'Count':eachline[3]})
List1.insert(0, eachline)
open('TestSorted.csv', 'w').close
Here's what my TestUnsorted file looks like:
Jack,M,1998,1500
Bill,M,2006,2000
Kat,F,1999,6000
Jess,F,2009,6302
Alexander,M,1982,50

sort = sorted(csv1, key=lambda ch: (ch[0], ch[1], int(ch[2]), int(ch[3])))
That will sort the last two values as integers.
EDIT:
Upon further reading the question, I realize choice is the index of the list that you want to sort on. You could do this instead:
if choice < 2: # or however you want to determine whether to cast to int
sort = sorted(csv1, key=operator.itemgetter(choice))
else:
sort = sorted(csv1, key=lambda ch: int(ch[choice]))

Related

Looping through list of dicts and appending to csv file - index error

I'm trying append values to a csv file by looping through the list then looping through dicts of a json data but i get a index erorr:
import csv
data=json_resp.json()
with open('Meteroits.csv','w+') as file:
writer=csv.DictWriter(file,fieldnames=['name','id','nametype','recclass','mass','fall','year','reclat','reclong','geolocation'])
writer.writeheader()
for i in range(len(data)):
for x in data[i]:
name=x[1]
i_d=x[2]
nametype=x[3]
recclass=x[4]
mass=x[5]
fall=x[6]
year=x[7]
reclat=x[8]
reclong=x[9]
geolocation=x[10]
writer.writerow({'name':name,'id':i_d,'nametype':nametype,'recclass':recclass,'mass':mass,'fall':fall,'year':year,'reclat':reclat,'reclong':reclong,'geolocation':geolocation})
I'm getting the error at index 4:
---> 12 recclass=x[4]
IndexError: string index out of range
And here's a sample of data:
{
'name': 'Abee',
'id': '6',
'nametype': 'Valid',
'recclass': 'EH4',
'mass': '107000',
'fall': 'Fell',
'year': '1952-01-01T00:00:00.000',
'reclat': '54.216670',
'reclong': '-113.000000',
'geolocation': {'latitude': '54.21667', 'longitude': '-113.0'}
}
The problem is
i is one index
data[i] is one item, a dict
x is a key of the data[i] dict, the first one being 'name'
x[1] is the letter n, ... x[3] the letter 'e', so x[4] is out of range
Just write the dict as you have it already
with open('Meteroits.csv', 'w+') as file:
writer = csv.DictWriter(file,
fieldnames=['name', 'id', 'nametype', 'recclass', 'mass',
'fall', 'year', 'reclat', 'reclong', 'geolocation'])
writer.writeheader()
for item in data:
writer.writerow(item)
Or just use pandas
import pandas as pd
df = pd.DataFrame(data)
df.to_csv('Meteroits.csv', index=False)
The error is that x is the key of the dict, and not the dict itself. It should be:
for i in range(len(data)):
new_dict = {}
for x, val in data[i].items():
new_dict[x] = val
writer.writerow(new_dict)

Python unique values per column in csv file row

Crunching on this for a long time. Is there an easy way using Numpy or Pandas or fixing my code to get the unique values for the column in a row separated by "|"
I.e the data:
"id","fname","lname","education","gradyear","attributes"
"1","john","smith","mit|harvard|ft|ft|ft","2003|207|212|212|212","qa|admin,co|master|NULL|NULL"
"2","john","doe","htw","2000","dev"
Output should be:
"id","fname","lname","education","gradyear","attributes"
"1","john","smith","mit|harvard|ft","2003|207|212","qa|admin,co|master|NULL"
"2","john","doe","htw","2000","dev"
My broken code:
import csv
import pprint
your_list = csv.reader(open('out.csv'))
your_list = list(your_list)
#pprint.pprint(your_list)
string = "|"
cols_no=6
for line in your_list:
i=0
for col in line:
if i==cols_no:
print "\n"
i=0
if string in col:
values = col.split("|")
myset = set(values)
items = list()
for item in myset:
items.append(item)
print items
else:
print col+",",
i=i+1
It outputs:
id, fname, lname, education, gradyear, attributes, 1, john, smith, ['harvard', 'ft', 'mit']
['2003', '212', '207']
['qa', 'admin,co', 'NULL', 'master']
2, john, doe, htw, 2000, dev,
Thanks in advance!
numpy/pandas is a bit overkill for what you can achieve with csv.DictReader and csv.DictWriter with a collections.OrderedDict, eg:
import csv
from collections import OrderedDict
# If using Python 2.x - use `open('output.csv', 'wb') instead
with open('input.csv') as fin, open('output.csv', 'w') as fout:
csvin = csv.DictReader(fin)
csvout = csv.DictWriter(fout, fieldnames=csvin.fieldnames, quoting=csv.QUOTE_ALL)
csvout.writeheader()
for row in csvin:
for k, v in row.items():
row[k] = '|'.join(OrderedDict.fromkeys(v.split('|')))
csvout.writerow(row)
Gives you:
"id","fname","lname","education","gradyear","attributes"
"1","john","smith","mit|harvard|ft","2003|207|212","qa|admin,co|master|NULL"
"2","john","doe","htw","2000","dev"
If you don't care about the order when you have many items separated with |, this will work:
lst = ["id","fname","lname","education","gradyear","attributes",
"1","john","smith","mit|harvard|ft|ft|ft","2003|207|212|212|212","qa|admin,co|master|NULL|NULL",
"2","john","doe","htw","2000","dev"]
def no_duplicate(string):
return "|".join(set(string.split("|")))
result = map(no_duplicate, lst)
print result
result:
['id', 'fname', 'lname', 'education', 'gradyear', 'attributes', '1', 'john', 'smith', 'ft|harvard|mit', '2003|207|212', 'NULL|admin,co|master|qa', '2', 'john', 'doe', 'htw', '2000', 'dev']

How to sort CSV files by alphabet and highest number. Python 3

Here is my code (Sorry, I know it's messy):
import csv
with open('test.csv', 'w', newline='') as fp:
a = csv.writer(fp, delimiter=',')
data = [['Bob', '4', '6', '3'],
['Dave', '7', '9', '10'],
['Barry', '5', '2', '3']]
a.writerows(data)
print("1 for for alphabetical orderwith each students highest score\n2 for highest score, highest to lowest\n3 for average score, highest to lowest")
cho_two=int(input())
class_a = open("test.csv")
csv_a = csv.reader(class_a)
a_list = []
for row in csv_a:
row[1] = int(row[1])
row[2] = int(row[2])
row[3] = int(row[3])
minimum = min(row[1:3])
row.append(minimum)
maximum = max(row[1:3])
row.append(maximum)
average = sum(row[1:3])//3
row.append(average)
a_list.append(row[0:9])
if cho_two == 1:
alphabetical = [[x[0],x[6]] for x in a_list]
print("\nCLASS A\nEach students highest by alphabetical order \n")
for alpha_order in sorted(alphabetical):
print(alpha_order)
class_a.close()
Here is what is should output:
['Barry', 5]
['Bob', 6]
['Dave', 10]
I want it to output the highest score that the person got in alphabetical order.
It actually outputs this:
['Barry', 2]
['Bob', 3]
['Dave', 5]
Thanks.
You have made a few mistakes here:
First you store the average in the 5th position then check the list in the 6th position.
Secondly you don't actually slice the values from the row correctly.
minimum = min(row[1:3])
This only gets the first 2 elements, you need to get all of the elements. I'd suggest reading from 1 onwards with row[1:]
Then you need to fix your code that computes the average:
average = sum(row[1:3])//3
This does not do what you think it does! The difference is between integer division and real division.
Then when you append the row, you probably should append all of it with a_list.append(row). If you need a copy then slice all of it.

Handeling Dictionary of lists using Python

I have a single dictionary that contains four keys each key representing a file name and the values is nested lists as can be seen below:
{'file1': [[['1', '909238', '.', 'G', 'C', '131', '.', 'DP=11;VDB=3.108943e02;RPB=3.171491e-01;AF1=0.5;AC1=1;DP4=4,1,3,3;MQ=50;FQ=104;PV4=0.55,0.29,1,0.17', 'GT:PL:GQ', '0/1:161,0,131:99'], ['1', '909309', '.', 'T', 'C', '79', '.', 'DP=9;VDB=8.191851e-02;RPB=4.748531e-01;AF1=0.5;AC1=1;DP4=5,0,1,3;MQ=50;FQ=81.7;PV4=0.048,0.12,1,1', 'GT:PL:GQ', '0/1:109,0,120:99']......,'008_NTtrfiltered': [[['1', '949608', '.', 'G', 'A',...}
My question is how to check only the first two elements in the list for instance "1", "909238" for each of the key if they are the same and then write them to a file. The reason I want to do this is I want to filter only common values (only the first two elements of the list) for the four files (keys).
Thanks a lot in advance
Best.
You can access to the keys of the dictionary dictio and make your comparison using :
f = open('file.txt','w')
value_to_check_1 = '1'
value_to_check_2 = '909238'
for k in dictio:
value_1 = dictio[k][0][0][0]
value_2 = dictio[k][0][0][1]
if (( value_1 == value_to_check_1) and (value_2 == value_to_check_2)):
f.write('What you want to write\n')
f.close()
If you want to do a check that imply every values of your dictionary dictio.
Maybe you want to store couples of values from dictio.
couples = [(dictio[k][0][0][0], dictio[k][0][0][1]) for k in dictio]
Then, you can do a loop and iterate over the couples to do your check.
Example you can adapt according to your need :
for e in values_to_check:
for c in couples:
if (float(e[0][0][0]) >= float(c[0]) and float(e[0][0][1]) <= float(c[1])):
f.write(str(e[0][0][0]) + str(e[0][0][1]) + '\n')

Iterate over columns in a text file in python

I have a text file in the following format:
1,"20130219111529","90UP:34","0000","9999","356708","2"
"-2","20130219105824","0001:11","0000","","162_005",""
I want to compare row 1 and row 2 (In this case 1 and -2) for some purpose. To strip out all the quotes and parse this file I have the following code:
if os.path.exists(FileName):
with open(FileName) as File:
for row in csv.reader(File, delimiter= ',', skipinitialspace= True):
print(row)
The following is the output:
['1', '20130219111529', '90UP:34', '0000', '9999', '356708', '2']
['-2', '20130219105824', '0001:11', '0000', '', '162_005', '']
I want to iterate through the columns. For example, iterate through '1' then '-2' and so on.
How do I go about doing this?
Use zip(). It turns two iterables into one iterable of tuples, with elements coming from both lists.
l1 = ['1', '20130219111529', '90UP:34', '0000', '9999', '356708', '2']
l2 = ['-2', '20130219105824', '0001:11', '0000', '', '162_005', '']
for elem1, elem2 in zip(l1, l2):
print("elem1 is {0} and elem2 is {1}.".format(elem1, elem2)
Perhaps the following.
if os.path.exists(FileName):
with open(FileName) as File:
lastRow = []
# loop over the lines in the file
for row in csv.reader(File, delimiter= ',', skipinitialspace= True):
# saves the first row, for comparison below
if lastRow == []:
lastRow = row
continue
# loop over the columns, if all rows have the same number
for colNum in range(len(row)):
# compare row[colNum] and lastRow[colNum] as you wish
# save this row, to compare with the next row in the loop
lastRow = row
just print the first element in the row:
for row in csv.reader(File, delimiter= ',', skipinitialspace= True):
print(row[0])
EDIT
rows = csv.reader(File, delimiter= ',', skipinitialspace= True)
print len(rows) # how many rows were read from the file
for row in rows:
print(row[0])
If (as you said in the question, though I'm not sure if you wanted this) you want to iterate through the columns, you can do the following:
if os.path.exists(file_name):
with open(file_name) as csv_file:
for columns in zip(*csv.reader(csv_file, delimiter=',', skipinitialspace=True)):
print columns
This will output the following:
('1', '-2')
('20130219111529', '20130219105824')
('90UP:34', '0001:11')
('0000', '0000')
('9999', '')
('356708', '162_005')
('2', '')

Categories