Create new file for each row in csv (trouble iterating) - python

Hi I am trying to iterate over each row in a csv file with python and create new csv files for each row. So my thought process is open the file, and loop through each row and for each row create a file named n_file.csv (where 'n' is the iteration), so here is my code:
import csv
csvfile = open('sample.csv','rb')
csvFileArray = []
for row in csv.reader(csvfile, delimiter = '.'):
csvFileArray.append(row)
print(row)
n = 0
n += 1
file = open(str(n) + "_file.csv", 'w+')
file.write(str(row))
print(n) # returns 1 every time
Unfortunately this is not iterating properly (because it is only create a file named 1_file.csv and overwriting it each time). How can I fix this?

for row in csv.reader(csvfile, delimiter='.'):
csvFileArray.append(row)
print(row)
n = 0 # << you do n=0 each loop!!
n += 1
so it's better be,
for idx, row in enumerate(csv.reader(csvfile, delimiter='.')):
csvFileArray.append(row)
print(row)
file = open(str(idx) + "_file.csv", 'w+') # enumerate do same as you want!
file.write(str(row))

You set n to 0 each time, because you declared it inside the loop. Declare it before the for statement.

Try this:
import csv
with open('sample.csv','rb') as csvfile:
for i, row in enumerate(csv.reader(csvfile, delimiter = '.')):
with open("{}_file.csv".format(i), "w") as file:
file.write(str(row))

Related

Extract two columns sorted from CSV

I have a large csv file, containing multiple values, in the form
Date,Dslam_Name,Card,Port,Ani,DownStream,UpStream,Status
2020-01-03 07:10:01,aart-m1-m1,204,57,302xxxxxxxxx,0,0,down
I want to extract the Dslam_Name and Ani values, sort them by Dslam_name and write them to a new csv in two different columns.
So far my code is as follows:
import csv
import operator
with open('bad_voice_ports.csv') as csvfile:
readCSV = csv.reader(csvfile, delimiter=',')
sortedlist = sorted(readCSV, key=operator.itemgetter(1))
for row in sortedlist:
bad_port = row[1][:4],row[4][2::]
print(bad_port)
f = open("bad_voice_portsnew20200103SORTED.csv","a+")
f.write(row[1][:4] + " " + row[4][2::] + '\n')
f.close()
But my Dslam_Name and Ani values are kept in the same column.
As a next step I would like to count how many times the same value appears in the 1st column.
You are forcing them to be a single column. Joining the two into a single string means Python no longer regards them as separate.
But try this instead:
import csv
import operator
with open('bad_voice_ports.csv') as readfile, open('bad_voice_portsnew20200103SORTED.csv', 'w') as writefile:
readCSV = csv.reader(readfile)
writeCSV = csv.writer(writefile)
for row in sorted(readCSV, key=operator.itemgetter(1)):
bad_port = row[1][:4],row[4][2::]
print(bad_port)
writeCSV.writerow(bad_port)
If you want to include the number of times each key occurred, you can easily include that in the program, too. I would refactor slightly to separate the reading and the writing.
import csv
import operator
from collections import Counter
with open('bad_voice_ports.csv') as readfile:
readCSV = csv.reader(readfile)
rows = []
counts = Counter()
for row in readCSV:
rows.append([row[1][:4], row[4][2::]])
counts[row[1][:4]] += 1
with open('bad_voice_portsnew20200103SORTED.csv', 'w') as writefile:
writeCSV = csv.writer(writefile)
for row in sorted(rows):
print(row)
writeCSV.writerow([counts[row[0]]] + row)
I would recommend to remove the header line from the CSV file entirely; throwing away (or separating out and prepending back) the first line should be an easy change if you want to keep it.
(Also, hard-coding input and output file names is problematic; maybe have the program read them from sys.argv[1:] instead.)
So my suggestion is failry simple. As i stated in a previous comment there is good documentation on CSV read and write in python here: https://realpython.com/python-csv/
As per an example, to read from a csv the columns you need you can simply do this:
>>> file = open('some.csv', mode='r')
>>> csv_reader = csv.DictReader(file)
>>> for line in csv_reader:
... print(line["Dslam_Name"] + " " + line["Ani"])
...
This would return:
aart-m1-m1 302xxxxxxxxx
Now you can just as easilly create a variable and store the column values there and later write them to a file or just open up a new file wile reading lines and writing the column values in there. I hope this helps you.
After the help from #tripleee and #marxmacher my final code is
import csv
import operator
from collections import Counter
with open('bad_voice_ports.csv') as csv_file:
readCSV = csv.reader(csv_file, delimiter=',')
sortedlist = sorted(readCSV, key=operator.itemgetter(1))
line_count = 0
rows = []
counts = Counter()
for row in sortedlist:
Dslam = row[1][:4]
Ani = row[4][2:]
if line_count == 0:
print(row[1], row[4])
line_count += 1
else:
rows.append([row[1][:4], row[4][2::]])
counts[row[1][:4]] += 1
print(Dslam, Ani)
line_count += 1
for row in sorted(rows):
f = open("bad_voice_portsnew202001061917.xls","a+")
f.write(row[0] + '\t' + row[1] + '\t' + str(counts[row[0]]) + '\n')
f.close()
print('Total of Bad ports =', str(line_count-1))
As with this way the desired values/columns are extracted from the initial csv file and a new xls file is generated with the desired values stored in different columns and the total values per key are counted, along with the total of entries.
Thanks for all the help, please feel free for any improvement suggestions!
You can use sorted:
import csv
_h, *data = csv.reader(open('filename.csv'))
with open('new_csv.csv', 'w') as f:
write = csv.writer(f)
csv.writerows([_h, *sorted([(i[1], i[4]) for i in data], key=lambda x:x[0])])

Unsure how to write text into specific column in csv file.

Hey I'm working on this project where I take this text and translate it and store it back into the same CSV file. The next open column is at index 10 or Column K. I've been trying to write the data but I just can't get it.
Reading works fine. I tried to do all this into single while loop but I couldn't get it to work. Sorry for any formatting errors!
from googletrans import Translator
import csv
translater = Translator()
f = open("#ElNuevoDia.csv", "r+")
csv_f = csv.reader(f)
csv_wf = csv.writer(f)
tmp = {}
x = 0
for row in csv_f:
tmp[x] = translater.translate(row[4], dest="en")
#print(tmp[x].text)
#print("\n")
#print(tmp[x].text)
x = x + 1
x = 0
f.close()
csv_wf = csv.writer(f)
for row in csv_wf:
csv_wf[10].writerow(tmp[x].text)
f.close()
You should update row in reader and then write it back (as you mentioned in the comment, writer is not iterable). Something like that (part of your code):
for row in csv_f:
row[10] = translater.translate(row[4], dest="en")
tmp[x] = row
x = x + 1
x = 0
f.close()
csv_wf = csv.writer(f)
for row in tmp:
csv_wf.writerow(row)
f.close()
Edit 1:
For text variable you can do that:
row[10] = translater.translate(row[4], dest="en").text
and you can write it back in one step:
csv_wf.writerows(tmp)

how to add a column to a csv file that is the sum of the rows and a row that is the sum of the columns

I have data in a csv file e.g
1,2,3,4
4,5,6,7
what I want is to create an extra column that sums the first rows so that the result will look like.
1,2,3,4,10
4,5,6,7,22
And an extra row that sums the columns.
1,2,3,4,10
4,5,6,7,22
5,7,9,11,32
This is probably really basic but I could do with the help please?
#!/usr/bin/python
import sys
from itertools import imap, repeat
from operator import add
total = repeat(0) # See how to handle initialization without knowing the number of columns ?
for line in sys.stdin:
l = map(int, line.split(','))
l.append(sum(l))
print ','.join(map(str,l))
total = imap(add, total, l)
print ','.join(map(str, total))
I know, I'm treating Python like Haskell these days.
import csv
thefile = ["1,2,3,4","4,5,6,7"]
reader = csv.reader(thefile)
temp = []
final = []
# read your csv into a temporary array
for row in reader:
temp.append([int(item) for item in row])
# add a row for sums along the bottom
temp.append(final)
for item in temp[0]:
final.append(0)
for row in temp:
sum = 0
for index, item in enumerate(row):
sum += item #total the items in each row
temp[-1][index] = temp[-1][index] + item #add each item to the column sum
row.append(sum) #add the row sum
print temp
import sys
import csv
def is_number(s):
try:
float(s)
return True
except ValueError:
return False
with open(sys.argv[2], 'wb') as writefile:
writer = csv.writer(writefile, delimiter=',',quotechar='"', quoting=csv.QUOTE_MINIMAL)
with open(sys.argv[1], 'rb') as readfile:
reader = csv.reader(readfile, delimiter=',', quotechar='"')
for row in reader:
writer.writerow(row+[sum([float(r) for r in row if is_number(r)])])
How about some pythonic list comprehension:
import csv
in_file = ["1,2,3,4","4,5,6,7"]
in_reader = list(csv.reader(in_file))
row_sum = [ sum(map(int,row)) for row in in_reader]
col_sum = [sum(map(int,row)) for row in map(list, zip (*in_file)[::2])]
for (index,row_run) in enumerate([map(int,row) for row in in_reader]):
for data in row_run:
print str(data)+",",
print row_sum[index]
for data in col_sum:
print str(data)+",",
print str(sum(col_sum))
Let me know if you need anything else.

Error while editing many CSV files

I asked this question a few days ago because I needed help with editing some CSV files: Fix numbering on CSV files that have deleted lines.
The people of Stack Overflow helped me out a great deal however I keep getting an error saying AttributeError: 'int' object has no attribute 'strip'. The problem is that all the information in my CSV files are not integers. Being the python newbie that I am, a few days of trying to fix it only made things worse. Here is what I have from my previous question that gives me the error:
import csv
import glob
import os
import re
numbered = re.compile(r'N\d+').match
for fn in fns:
# open for counting
reader = csv.reader(open(fn,"rb"))
count = sum(1 for row in reader if row and not any(r.strip() == 'DIF' for r in row) and numbered(row[0]))
# reopen for filtering
reader = csv.reader(open(fn,"rb"))
with open (os.path.join('out', fn), 'wb') as f:
counter = 0
w = csv.writer(f)
for row in reader:
if row and 'Count' in row[0].strip():
row = ['Count', count]
if row and not any(r.strip() == 'DIF' for r in row): #remove DIF
if numbered(row[0]):
counter += 1
row[0] = 'N%d' % counter
w.writerow(row)
The code is essentially supposed to run through a bunch of CSV files and delete all the lines that have 'DIF' in them and fix the numbering due to deleted lines. Does anyone have any suggestions?
Easiest might be to wrap r in str(). But at the same time, why don't you read the file in just once, makes it easier:
import csv
import glob
import os
import re
numbered = re.compile(r'N\d+').match
for fn in fns:
reader = csv.reader(open(fn,"rb"))
# filter out 'DIF' rows here
rows = [ row for row in reader
if not any(str(r).strip() == 'DIF'
for r in row) ]
# count numbered rows
count = len([row for row in rows if row and numbered(row[0])])
with open (os.path.join('out', fn), 'wb') as f:
counter = 0
w = csv.writer(f)
for row in rows:
if row and 'Count' in row[0].strip():
row = ['Count', count]
if row and numbered(row[0]):
counter += 1
row[0] = 'N%d' % counter
w.writerow(row)

How can I get a specific field of a csv file?

I need a way to get a specific item(field) of a CSV. Say I have a CSV with 100 rows and 2 columns (comma seperated). First column emails, second column passwords. For example I want to get the password of the email in row 38. So I need only the item from 2nd column row 38...
Say I have a csv file:
aaaaa#aaa.com,bbbbb
ccccc#ccc.com,ddddd
How can I get only 'ddddd' for example?
I'm new to the language and tried some stuff with the csv module, but I don't get it...
import csv
mycsv = csv.reader(open(myfilepath))
for row in mycsv:
text = row[1]
Following the comments to the SO question here, a best, more robust code would be:
import csv
with open(myfilepath, 'rb') as f:
mycsv = csv.reader(f)
for row in mycsv:
text = row[1]
............
Update: If what the OP actually wants is the last string in the last row of the csv file, there are several aproaches that not necesarily needs csv. For example,
fulltxt = open(mifilepath, 'rb').read()
laststring = fulltxt.split(',')[-1]
This is not good for very big files because you load the complete text in memory but could be ok for small files. Note that laststring could include a newline character so strip it before use.
And finally if what the OP wants is the second string in line n (for n=2):
Update 2: This is now the same code than the one in the answer from J.F.Sebastian. (The credit is for him):
import csv
line_number = 2
with open(myfilepath, 'rb') as f:
mycsv = csv.reader(f)
mycsv = list(mycsv)
text = mycsv[line_number][1]
............
#!/usr/bin/env python
"""Print a field specified by row, column numbers from given csv file.
USAGE:
%prog csv_filename row_number column_number
"""
import csv
import sys
filename = sys.argv[1]
row_number, column_number = [int(arg, 10)-1 for arg in sys.argv[2:])]
with open(filename, 'rb') as f:
rows = list(csv.reader(f))
print rows[row_number][column_number]
Example
$ python print-csv-field.py input.csv 2 2
ddddd
Note: list(csv.reader(f)) loads the whole file in memory. To avoid that you could use itertools:
import itertools
# ...
with open(filename, 'rb') as f:
row = next(itertools.islice(csv.reader(f), row_number, row_number+1))
print row[column_number]
import csv
def read_cell(x, y):
with open('file.csv', 'r') as f:
reader = csv.reader(f)
y_count = 0
for n in reader:
if y_count == y:
cell = n[x]
return cell
y_count += 1
print (read_cell(4, 8))
This example prints cell 4, 8 in Python 3.
There is an interesting point you need to catch about csv.reader() object. The csv.reader object is not list type, and not subscriptable.
This works:
for r in csv.reader(file_obj): # file not closed
print r
This does not:
r = csv.reader(file_obj)
print r[0]
So, you first have to convert to list type in order to make the above code work.
r = list( csv.reader(file_obj) )
print r[0]
Finaly I got it!!!
import csv
def select_index(index):
csv_file = open('oscar_age_female.csv', 'r')
csv_reader = csv.DictReader(csv_file)
for line in csv_reader:
l = line['Index']
if l == index:
print(line[' "Name"'])
select_index('11')
"Bette Davis"
Following may be be what you are looking for:
import pandas as pd
df = pd.read_csv("table.csv")
print(df["Password"][row_number])
#where row_number is 38 maybe
import csv
inf = csv.reader(open('yourfile.csv','r'))
for row in inf:
print row[1]

Categories