changing given text to list of lists - python

I have the following text in a given file:
1234,A,7.99,10.3,12.8,101,0.11843,0.27276,0.30101
87635,B,19.69,21.25,130,1203,0.1096,0.1599,0.1974
First, I want to get rid of the 1234 and 87635 in the front, and I also want to change A into the integer "1" and B into the integer "0".
This is my code:
def convert(file):
data = open(file, 'r')
list1 = []
for line in data:
line_data = line.strip().split(',')
if line_data[0] == "B":
line_data[0] = 0
else:
line_data[0] = 1
for i in range(len(line)):
datalist.append(line)
list1 = np.array(list1), float
data.close()
return list1
This is the output I want:
[[1234,A,7.99,10.3,12.8,101,0.11843,0.27276,0.30101], [87635,B,19.69,21.25,130,1203,0.1096,0.1599,0.1974]]
The output I'm currently getting is a list of strings, instead of the list of lists.

You are getting a list of strings , because you are appending to the list as -
datalist.append(line)
This appends the original line , not the changed line_data . Also, all the elements in line_data would still be strings (expect for the first element) , as you are never converting them to int.
This easiest here would be to use csv module , parse the file as csv and then pop the first element from each row that is returned and then change the second element (now first element after removing , as you want). Example -
def convert(file):
import csv
lst = []
with open('<filename>','r') as f:
reader = csv.reader(f)
for row in reader:
row.pop(0)
if row[0] == 'B':
row[0] = 0
else:
row[0] = 1
lst.append(list(map(float, row)))
return lst
Example/Demo -
My a.csv -
1234,A,7.99,10.3,12.8,101,0.11843,0.27276,0.30101
87635,B,19.69,21.25,130,1203,0.1096,0.1599,0.1974
Code and Result -
>>> import csv
>>> with open('a.csv','r') as f:
... reader = csv.reader(f)
... lst = []
... for row in reader:
... row.pop(0)
... if row[0] == 'B':
... row[0] = 0
... else:
... row[0] = 1
... lst.append(list(map(float, row)))
...
'1234'
'87635'
>>> lst
[[1.0, 7.99, 10.3, 12.8, 101.0, 0.11843, 0.27276, 0.30101], [0.0, 19.69, 21.25, 130.0, 1203.0, 0.1096, 0.1599, 0.1974]]

Related

Sum values in column B for lines with matching column A values

Extract from large csv looks like this:
Description,Foo,Excl,GST,Incl
A,foo,$154.52,$15.44,$169.96
A,foo,$45.44,$4.54,$49.98
A,foo,$45.44,$4.54,$49.98
A,foo,$154.52,$15.44,$169.96
A,foo,$0.00,$0.00,$0.00
A,foo,$50.16,$5.02,$55.18
B,foo,$175.33,$15.65,$190.98
C,foo,$204.52,$15.44,$219.96
D,foo,$154.52,$15.44,$169.96
D,foo,$154.52,$15.44,$169.96
D,foo,$45.44,$4.54,$49.98
D,foo,$154.52,$15.44,$169.96
D,foo,$145.44,$14.54,$159.98
I need to strip the dollar sign and for all lines containing matching Description values (A or B or whatever it may be), sum the Excl column values separately, the GST column values separately and Incl column values separately for that Description value.
End result should be a dictionary object containing the Description column as key and the sum totals of the Excl, GST and Incl columns matching the Description, example:
{
"A": [450.08,44.98,495.06],
"B": [175.33,15.65,190.98],
"C": [204.52,15.44,219.96],
"D": [654.44,65.40,719.84]
}
I'm completely stumped on how to perform the sum operation. My code only goes as far as opening the csv and reading in values on each line. Any enlightenment is appreciated.
import csv
def getField(rowdata, index):
try:
val = rowdata[index]
except IndexError:
val = '-1'
return val
with open(csv, 'r') as f:
reader = csv.reader(f)
order_list = list(reader)
# Remove the header row in csv
order_list.pop(0)
for row in order_list:
Desc = getField(row, 0)
Excl = getField(row, 2)
GST = getField(row, 3)
Incl = getField(row, 4)
This might help
import csv
import decimal
path = "Path to CSV_File.csv"
def removeSym(s):
return float(s.replace("$", ""))
with open(path, 'r') as f:
reader = csv.reader(f)
order_list = list(reader)
d = {}
for i in order_list[1:]: #Skip reading the first line
if i[0] not in d:
d[i[0]] = map(removeSym, i[2:]) #Check if desc is a key the result dict. if not create
else:
d[i[0]] = [float(round(sum(k),2)) for k in zip(d[i[0]], map(removeSym, i[2:]))]
print d
Output:
{'A': [450.08, 44.98, 495.06], 'C': [204.52, 15.44, 219.96], 'B': [175.33, 15.65, 190.98], 'D': [654.44, 65.4, 719.84]}

python to list a file

This script is actually not working with the desired input
script:
import csv
file1 = csv.reader(open("1.csv"))
file2 = csv.reader(open("2.csv"))
file3 = open("3.csv", "w")
k, l = list(file1),list(file2)
length_file1 = len(k)
length_file2 = len(l)
n = []
file3.write(",".join(str(i) for i in l[0])+'\n')
for i in xrange(1, length_file1):
arr = k[i][1]
for j in xrange(1, length_file2):
arr2 = l[j][1]
if arr == arr2:
l[j][0] = k[i][0]
print l[j]
n.append(l[j])
file3.write(",".join(str(i) for i in l[j])+'\n')
so i want the code to be replaced
You can create a dictionary with the key:value pairs from 1.csv and use compare each value in 2.csv with the keys from the dictionary. This is using Python3, there is no need to use range and xrange here, you can iterate over the lists directly.
import csv
with open("2.csv", 'r') as f:
file2 = csv.reader(f)
file2 = [j for _,j in file2] # This is to remove the blank item at the start of each row
with open("1.csv", 'r') as f:
file1 = csv.reader(f)
file1 = {i:j for j,i in file1}
toWrite = []
for i in file2:
if i in file1.keys():
toWrite.append("{},{}".format(file1[i],i))
with open("bdsp_updated.csv", "w") as f:
f.write('\n'.join(toWrite))
Content of bdsp_updated.csv:
1,99277050
10,92782013
2,71269815
3,99724582
7,92043333
4,92011116
8,99799635

Miminum of each list in a list of lists

I was wondering if it was possible to get the minimum value of each list in a list of lists.
import csv
data=[]
file=input ("Enter file name: ")
with open(file,"r") as f:
reader = csv.reader(f, delimiter=';')
for row in reader:
data.append(row)
print(data)
Output:
[['13.25', '12.97', '13.12', '13.47', '13.44', '13.09', '12.86', '12.78', '12.91', '12.93', '12.91', '13.11'], ['12.92', '13.42', '13.58', '13.7', '13.62', '13.7', '13.31', '12.86', '12.59', '12.81', '13.46', '12.9'], ['13.39', '13.5', '13.29', '13.26', '13.38', '13.45', '13.46', '11.95', '', '12.57', '13.22', '12.88'], ['12.48', '13.76', '13.7', '13.77', '13.08', '13.48', '13.25', '12.31', '12.56', '12.56', '12.95', '13.38'], ['12.52', '14.07', '14.46', '14.13', '13.98', '14.07', '13.92', '12.7', '13.01', '12.79', '13', '13.13']]
The minimum value of a list of lists of numbers can be achieved with a simple call to map:
numbers = [[1,3,6], [6,7,2], [0,-4]]
minimum_numbers = map(min, numbers)
print minimum_numbers
# This prints: [1, 2, -4]
This will return a list where min was called for each element in the "outer list" numbers.
Python has a min function that can be called on lists to get the lowest value. You can loop through your data list and call min() on each list contained.
for list in data:
print(min(list))
Given your output this would return:
12.78
12.59
12.31
12.52
The third one is blank because the array has an empty value in it.
If you want to remove empty strings you can use filter()
for list in data:
list = filter(None, list)
print(min(list))
This outputs
12.78
12.59
11.95
12.31
12.52
Also I noticed all the values are floats, you can also convert them before checking min value so you're not comparing strings.
for list in data:
list = filter(None, list)
list = [float(i) for i in list]
print(min(list))
First you'll need to convert each of the items to floats. Do this when you read the data in:
import csv
data=[]
file=input ("Enter file name: ")
with open(file,"r") as f:
reader = csv.reader(f, delimiter=';')
for row in reader:
data.append([float(x) for x in row if x]) # converts values to floats, ignores empty ones
print(data)
Once your done with that, just call min() on each list:
>>> [min(x) for x in data]
[12.78, 12.59, 11.95, 12.31, 12.52]
You can shorten your reading loop to this:
>>> with open(file, "r") as f:
... reader = csv.reader(f, delimiter=";")
... data = [[float(x) for x in row] for row in reader]
>>> print([min(x) for x in data])
I think that you should use Emil Vikström's answer, but since you have control over your loading code you can prevent unnecessary storing of data and simply do:
with open(file,"r") as f:
reader = csv.reader(f, delimiter=';')
for row in reader:
data.append(min(map(float, row)))
I would use the map function...
listOfLists = [ [2, 3, 1], [5, 7, 8], .... ]
minValues = map(lambda x: min(x), listOfLists)
Result:
minValues = [1, 5, ...]

Creating a dictionary

my goal is to create a dictionary in Python. I have a .csv file which contains two columns, first one being 'word', other being 'meaning'. I am trying to read the csv file in the dictionary format and get the 'meaning' when 'word' is given.
Can you please help me by telling me how to get the value of 'word'? this is what I tried:
My codes are,
>>> with open('wordlist.csv', mode = 'r') as infile:
... reader = csv.reader(infile)
... with open('wordlist.csv', mode = 'w') as outfile:
... writer = csv.writer(outfile)
... mydict = {rows[0]:rows[1] for rows in reader}
... print(mydict)
...
The result turns out to be,
{}
the next one I tried was,
>>> reader = csv.reader(open('wordlist.csv', 'r'))
>>> d = {}
>>> for row in reader:
... k, v = row
... d[k] = v
...
But when I wanted to use this, the result was like this-
>>> d['Try']
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
KeyError: 'Try'
The next code I tried was,
>>> reader = csv.DictReader(open('wordlist.csv'))
>>> result = {}
>>> for row in reader:
... key = row.pop('word')
... if key in result:
... pass
... result[key] = row
... print result
...
It didn't give me any answer at all.
>>> for row in reader:
... for column, value in row.iteritems():
... result.setdefault(column, []).append(value)
... print result
...
Neither did this give me a result.
I would use pandas. You could then use zip two create the dictionaries.
import pandas as pd
df = pd.read_csv('wordlist.csv')
words = list(df.word)
meaning = dict( zip( df.word, df.meaning ) )
if your file doesn't have a header row, that is ok. just print out the each column is still given some name which can then be referenced.
Alternative:
import pandas as pd
df = pd.read_csv('wordlist.csv')
dictionary = {}
for w, s, m, p in zip(df.words, df.meaning):
dictionary[w] = [m, p]
If "final_word.csv" looks like this:
word1, synonym1, meaning1, POS_tag1
word2, synonym2, meaning2, POS_tag2
This will read it in as a dictionary:
with open("final_word.csv",'r') as f:
rows = f.readlines()
dictionary = {}
for row in rows:
row = row.strip()
word, synonym, meaning, POS_tag = row.split(", ")
dictionary[word] = [synonym, meaning, POS_tag]
print(dictionary['word1'])
#out>> ['synonym1', 'meaning1', 'POS_tag1']
print(dictionary['word2'][0])
#out>> synonym2
The strip() is used to get rid of the newlines "\n" that's in the end of each csv-row

How to convert values to float and assign them to a dictionary in Python?

I am trying to read content from a CSV file and store them in a dictionary.
Each row of my CSV file is formatted as:
'Node_001', '0.0067', '0.2456', '0.7896', ......
The first element will be used as key in dictionary, while the rest part are values.
Since these values are generated by equations in excel, I don't think there are anything wrong with the format itself.
Here is my code:
with open(path, "rb") as file:
reader = csv.reader(file)
my_dictionary = dict()
for row in reader:
node_id = row[0]
temp_values = row[1:]
[float(x) for x in temp_values]
my_dictionary[node_id] = temp_values
print isinstance(temp_values[0], float)
I print the first element of the numeric part of my rows to exam if they are converted to float. However, all I got is False.
So, may I know what is wrong with my code?
Thanks.
The line [float(x) for x in temp_values] does not modify temp_values but creates a new list. you have to reassign it like:
with open(path, "rb") as file:
reader = csv.reader(file)
my_dictionary = dict()
for row in reader:
node_id = row[0]
temp_values = row[1:]
temp_values = [float(x) for x in temp_values]
my_dictionary[node_id] = temp_values
print isinstance(temp_values[0], float)
This chunk of code:
for row in reader:
node_id = row[0]
temp_values = row[1:]
[float(x) for x in temp_values]
my_dictionary[node_id] = temp_values
print isinstance(temp_values[0], float)
creates a list of float values with this line:
[float(x) for x in temp_values]
...but since it's not assigned to anything, it goes away immediately.
changing that line to
temp_values = [float(x) for x in temp_values]
creates the converted list and assigns it to temp_values so the rest of your code can use those values.
Try this for a change, assuming you only have unique keys in your file:
with open(path, 'r') as f:
reader = csv.reader(f)
d = {r[0]:map(float, r[1:]) for r in reader}
print(d)
You can also stick with a list comprehension with this:
with open(path, 'r') as f:
reader = csv.reader(f)
d = {r[0]: [float(i) for i in r[1:]] for r in reader}
You are not saving the conversion:
temp_values = [float(x) for x in temp_values]
If you replace your list comprehension with this one, your code should work.

Categories