CSV calculation with Python. Get data from list - python

Input txt:
May 2014, 156
May 2013, 556
May 2013, 651
I add this data to input.csv file:
import csv
with open("text_file.txt") as inputFile:
for line in inputFile:
vars = [line.strip().split(",") for line in inputFile]
print vars
#set True if you want to export input as csv file
convert_to_csv = True
if convert_to_csv == True:
with open('input.csv', 'w') as fp:
a = csv.writer(fp, delimiter=',')
data = [['Text', 'Count']] + vars
a.writerows(data)
How can I get variables from my vars and add 10 (example: 156 + 10) to my 'Count' value?
print vars output:
[['May 2013', 156], ['May 2013', 556]]
Count after calculation = Count + 10 and I want to write output also to csv file (output.csv) with header ['Text', 'Count after calc']

Refactoring a bit:
#!/usr/bin/python
import csv
with open('text_file.txt') as input_file:
csv_data = []
for row in csv.reader(input_file, delimiter=','):
csv_data += [[row[0], int(row[1].strip()) + 10]]
convert_to_csv = True
if convert_to_csv:
with open('input.csv', 'w') as output_file:
csv_file = csv.writer(output_file, delimiter=',')
csv_file.writerow(['Text', 'Count'])
for row in csv_data:
csv_file.writerow(row)
For what you've described it's working well. Good luck! :)

What you need is to parse the string into an integer, and add 10 to it.
vars = [line.strip().split(",") for line in inputFile]
vars = [[v[0],int(v[1])+10] for v in vars]
# ^^^^^^^^^^^^
print vars
Note that this is not a complete code, you will have to perform a few more checks, like what if the second item is not an integer? Upon splitting on ,, what if the number of items is not equal to 2 ?

Related

Deleting specific lines in csv files

The csv file looks like below: (with a thousand more lines)
step0:
141
step1:
140
step2:
4
step3:
139
step4:
137
step5:
136
15
step6:
134
13
139
step7:
133
19
I am trying to read each line and remove lines (the ones that includes numbers only) that are, say, greater than 27.
Originally, my csv file a string file, so all of the lines are considered strings.
What I have done is the following:
first loop through the lines that does not include "step" in them
change them into float
remove all that are greater than 27
Now I want to save (overwrite) my file after deleting these numbers but I am stuck.
Could someone assist?
import csv
f = open('list.csv', 'r')
reader = csv.reader(f, delimiter="\n")
for row in reader:
for e in row:
if 'step' not in e:
d=float(e)
if d>27:
del(d)
import csv
with open('output.csv', 'w+') as output_file:
with open('input.csv') as input_file: #change you file name here
reader = csv.reader(input_file, delimiter = '\n')
line_index = 0 # debugging
for row in reader:
line_index += 1
line = row[0]
if 'step' in line:
output_file.write(line)
output_file.write('\n')
else:
try:
number = int(line) # you can use float, but then 30 become 30.0
if number <= 27:
output_file.write(line)
output_file.write('\n')
except:
print("Abnormal data at line %s", str(line_index))
I assume that your input file is input.csv. This program writes to new output file. The output is output.csv:
step0:
step1:
step2:
4
step3:
step4:
step5:
15
step6:
13
step7:
19
One solution with re module:
import re
with open('file.txt', 'r') as f_in:
data = f_in.read()
data = re.sub(r'\b(\d+)\n*', lambda g: '' if int(g.group()) > 27 else g.group(), data)
with open('file_out.txt', 'w') as f_out:
f_out.write(data)
The content of file_out.txt will be:
step0:
step1:
step2:
4
step3:
step4:
step5:
15
step6:
13
step7:
19
26
import csv
with open('list.csv', 'r') as list:
with open('new_list.csv', 'w') as new_list:
reader = csv.reader(list, delimiter="\n")
writer = csv.writer(new_list, delimiter="\n")
for row in reader:
if 'step' not in e:
if float(e) < 27:
writer.writerow(e)
else:
writer.writerow(e)
Essentially you're going to just copy over the rows you want to your new file. If the line is step, we write it. If the line is less than 27, we write it. If you'd prefer to just overwrite your file when you're done:
import csv
rows_to_keep = []
with open('list.csv', 'r') as list:
reader = csv.reader(list, delimiter="\n")
for row in reader:
if 'step' not in e:
if float(e) < 27:
rows_to_keep.append(e)
else:
rows_to_keep.append(e)
with open('list.csv', 'w') as new_list:
writer = csv.writer(list, delimiter="\n")
writer.write_rows(rows_to_keep)

how to change multiple rows of csv file with corresponding dictionary values

My csv file ("challenges.csv") contains multiple rows as shown below (the number of columns are different, about 8000 rows):
2937 ,58462bc9a559fa7d29819028 ,29 ,57eb63d813fd7c0329bdb01f ,
2938 ,58462bc9a559fa7d29819028 ,30 ,57eb63d713fd7c0329bdafb5 ,57eb63d713fd7c0329bdafb6
And I also have
a dictionary named mydic from "forDic.csv" for example:
{ '58462bc9a559fa7d29819028':'negative chin up', '57eb63d813fd7c0329bdb01f':'knee squeeze squat', '57eb63d713fd7c0329bdafb5: 'squat', '57eb63d713fd7c0329bdafb6':'lunge', ... }
I want to change values of "challenges.csv" with values of mydic
if values of "challenges.csv" is equal to keys of mydic.
How can i do? Please help me.
Expected output:
a csv file which contains rows like below
2937 ,'negative chin up' ,29 ,'knee squeeze squat' ,
2938 ,'negative chin up' ,30 ,'squat' ,'lunge'
import csv
with open('./forDic.csv', mode='r')as infile:
reader = csv.reader(infile)
mydic = dict((rows[0], rows[1]) for rows in reader)
print(mydic)
def replace_all()
with open('./challenges.csv', mode='r')as infile, open('./challenges_new.csv', mode='w') as outfile:
r = csv.reader(infile)
w = csv.writer(outfile)
for row in r:
for k in iter(mydic.keys()):
print(', '.join(row))
rl = [w.replace(str(k), str(mydic.values())) for w in rl]
print(rl[0])
row_list_string = ' / '.join(map(str, rl))
for k in list(mydic.keys()):
k = k.replace(k, mydic.get(k))
print(k)
replace_all()
Assuming
chanllenges.csv
317, change1, 89, change2, change3
318, change1, 89, change3, change4
fordic.csv
change1, changedto1
change2, changedto2
change3, changedto3
change4, changedto4
The following code just prints the replaced line
import re, csv
with open('fordic.csv', mode='r')as infile:
reader = csv.reader(infile)
mydic = dict((rows[0], rows[1]) for rows in reader)
print(mydic)
mydic = dict((re.escape(k), v) for k, v in mydic.iteritems())
pattern = re.compile("|".join(mydic.keys()))
with open('./challenges.csv', mode='r') as infile:
lines = infile.readlines()
for row in lines:
print pattern.sub(lambda m: mydic[re.escape(m.group(0))], row)
output
317, changedto1, 89, changedto2, changedto3
318, changedto1, 89, changedto3, changedto4
to understand the multi string replace follow this SO Answer
It is best not to try and update the values in place, but rather create a new temporary file as output. This script attempts the dictionary substitution on all of your column values and writes each row back to the new temporary file. By using this approach the file can be of any size without needing to be loaded completely into memory:
The following approach should work:
import csv
import os
challenges = 'challenges.csv'
temp = '_temp.csv'
with open('forDic.csv', newline='') as f_fordic:
mydic = {row[0] : row[1] for row in csv.reader(f_fordic)}
with open(challenges, newline='') as f_challenges, open(temp, 'w', newline='') as f_temp:
csv_temp = csv.writer(f_temp)
for row in csv.reader(f_challenges):
csv_temp.writerow([mydic.get(c.strip(), c.strip()) for c in row])
# Rename the temp file back to challenges (optional)
os.remove(challenges)
os.rename(temp, challenges)
Giving you an updated challenges.csv file as follows:
2937,negative chin up,29,knee squeeze squat,
2938,negative chin up,30,squat,lunge

Python: How can I sum integers in a CSV file, while only summing the integers of a certain variable?

I'm trying to program some data in a csvfile by using Python. I have a list of countries and results of the Eurovision Songcontest, and it looks like this:
Country,Points,Year
Belgium;181;2016
Netherlands;153;2016
Australia;511;2016
Belgium;217;2015
Australia;196;2015
Et cetera.
In summary, I want to sum the total of points that any country received throughout the years, so the output should look something like this:
'Belgium: 398','Netherlands: 153','Australia: 707' and so on.
This is what my code looks like:
import csv
with open('euro20042016.csv', 'r') as csvfile:
pointsallyears = []
countriesallyears = []
readFILE = csv.reader(csvfile, delimiter=';')
for row in readFILE:
countriesallyears.append(row[0])
pointsallyears.append(row[1])
csvfile.close()
results = []
for result in pointsallyears:
result = int(result)
results.append(result)
scorebord = zip(countriesallyears,results)
So I already made sure that the results / points are actual integers and I filtered out the third row (Year), but I have no idea how to proceed from here. Thanks a lot in advance!
Just put #Mikk's comment into an actual answer. Two lines except the import
import pandas as pd
df = pd.read_csv('euro20042016.csv', sep = ';')
print df.groupby('Country')['Points'].sum()
The only extra thing you need to do is to change the first line of your file to be delimited by ; instead of ,.
I slightly changed your code to use a dictionary and used country names as keys. In result dictionary d will have country names as key and value is the total points.
import csv
d = dict()
with open('euro20042016.csv', 'r') as csvfile:
readFILE = csv.reader(csvfile, delimiter=';')
print (readFILE)
c_list = []
for row in readFILE:
if row[0] in c_list:
d[row[0]] = d[row[0]] + int(row[1])
else:
c_list.append(row[0])
d[row[0]] = int(row[1])
csvfile.close()
print(d)
I decided to play around a bit with your code, and this is what I came up with. Here, row[0] contains the country names, and row[1] contains the values we need. We check if the country already exists in the dictionary we use to maintain the aggregates, and if it doesn't we create it.
import csv
with open('euro20042016.csv', 'r') as csvfile:
score_dict={}
readFILE = csv.reader(csvfile, delimiter=';')
for row in readFILE:
# Only rows with 3 elements have the data we need
if len(row) == 3:
if row[0] in score_dict:
score_dict[row[0]]+=int(row[1])
else:
score_dict[row[0]]=int(row[1])
csvfile.close()
print score_dict
What I get as output is this
{'Belgium': 398, 'Australia': 707, 'Netherlands': 153}
which I believe is what you were aiming for.
Let me know in the comments if you face a problem understanding anything.
I have solution of that. but make sure your euro20042016.csv file same as
Belgium;181;2016
Netherlands;153;2016
Australia;511;2016
Belgium;217;2015
Australia;196;2015
and this code get output in list. like
[('Belgium', 398), ('Australia', 707), ('Netherlands', 153)]
Code is here
try:
f = open('euro20042016.csv', 'r+')
s = f.read()
lst = list(map(lambda x: x.split(';'), s.split('\n')))
points, country = [], []
for line in lst:
points.append(int(line[1]))
country.append(line[0])
countrypoints = sorted(zip(country, points), key=lambda x: x[1])
country = list(set(country))
total = [0]*len(country)
for rec in countrypoints:
total[country.index(rec[0])] = total[country.index(
rec[0])] + rec[1]
f.close()
finalTotal = list(zip(country, total))
print finalTotal
except IOError as ex:
print ex
except Exception as ex:
print ex
I hope this will help you.

search column, return row from excel using python

I have a csv file with column A that has dates. I want to search the dates and return the corresponding row in array (VERY IMPT) format. How would I do that using python? Thank you.
excel file:
A B C
1 12202014 403 302
2 12212014 312 674
3 12222014 193 310
input:
Search csv file for 12212014
output:
[12212014,312,674]
attempt code:
date = '12212014'
with open('dates.csv', 'rt') as f:
reader = csv.reader(f, delimiter=',')
for row in reader:
if date == row[0]:
print "found the date"
How do I return the row instead of just a print statement?
The basic idea is to create a dictionary/mapping date->line and get the value by the key:
import csv
data = {}
with open('test.csv', 'r') as f:
reader = csv.reader(f)
for line in reader:
data[line[0]] = line
date_to_find = '12212014'
print data.get(date_to_find, 'Date not found')
prints:
['12212014', '312', '674']
This helps if you need the mapping afterwards to find the dates in it.
Another approach is to use generator expression to iterate over lines in a file until we find the date:
import csv
date_to_find = '12212014'
with open('test.csv', 'r') as f:
print next((line for line in csv.reader(f) if line[0] == date_to_find),
'Date not found')
prints:
['12212014', '312', '674']
Hope that helps.

Error while reading csv file using python

I am trying to read a specific comma value from a csv file but i am getting the full row value how can i get the specific comma value
My csv looks like this
Index,Time,Energy
1,1.0,45.034
i need to get the values of Energy in each column.
import csv
with open('somefile.csv') as f:
reader = csv.DictReader(f, delimiter=',')
rows = list(reader)
for row in rows:
print(row['Energy'])
f = open('file.txt')
f.readline() # To skip header
for line in f:
print(line.split(',')[2])
f.close()
If you want it working even if the position of column Energy changes, you can do this:
with open('your_file.csv') as f:
# Get header
header = f.readline()
energy_index = header.index('Energy')
# Get Energy value
for line in f.readlines():
energy = line.split(',')[energy_index]
# do whatever you want to do with Energy
Check the below code. Hoping this is what you are looking for.
import csv
try:
fobj = open(file_name, 'r')
file_content = csv.reader(fobj, delimiter=',', quotechar='|')
except:
fobj.close()
file_content = False
if file_content:
for row_data in file_content:
try:
# This will return the 3rd columns value i.e 'energy'
row_data[2] = row_data[2].strip()
print row_data[2]
except:
pass
fobj.close()

Categories