Python: add value and write output - python

I need to get information from a list and add a column year from name. I still not sure how to add one field 'year' in record. Can I use append?
And about output file, I just need use outputcsv.writerow(records) isn't it?
This is a part of code that I stuck:
filenames = ('babyQld2010.csv',
'babyQld2011.csv',
'babyQld2012.csv',
'babyQld2012.csv',
'babyQld2014.csv')
outFile = open('babyQldAll.csv','w')
csvFile_out = csv.writer(outFile, delimiter=',')
for filename in filenames:
name, ext = filename.split('.')
year = name[-4:] #extract year from file names
records = extract_names(filename)
# Get (name, count, gender) from list "records",
# and add value of "year" and write into output file (using "for" loop )
Output file look like:
2010,Lola,69,Girl
And input, I have 5 file babyQld2010.csv, babyQld2011.csv, babyQld2012.csv, babyQld2012.csv, babyQld2014.csv which contains:
Mia,425,William,493
and I have to sort it in format and I already done it and save in list 'records'
Lola,69,Girl
now I need to add one field 'year' on 'record' list and export csv file.
This is my full code:
import csv
def extract_names(filename):
''' Extract babyname, count, gender from a csv file,
and return the data in a list.
'''
inFile = open(filename, 'rU')
csvFile = csv.reader(inFile, delimiter=',')
# Initialization
records = []
rowNum = 0
for row in csvFile:
if rowNum != 0:
# +++++ You code here ++++
# Read each row of csv file and save information in list 'records'
# as (name, count, gender)
records.append([row[0], row[1], "Female"])
records.append([row[2], row[3], "Male"])
print('Process each row...')
rowNum += 1
inFile.close()
return(records)
#### Start main program #####
filenames = ('babyQld2010.csv',
'babyQld2011.csv',
'babyQld2012.csv',
'babyQld2012.csv',
'babyQld2014.csv')
with open('babyQldAll.csv','w') as outFile:
csvFile_out = csv.writer(outFile, delimiter=',')
for filename in filenames:
name, ext = filename.split('.')
year = name.split('.')[0][-4:] #extract year from file names
records = extract_names(filename)
for record in records:
csvFile_out.write([year] + record)
print("Write in csv file...")
outFile.close()

To get the year from the csv file you can simply split the string at '.' and then take the last four characters from the first part of the split. Example -
>>> s = 'babyQld2010.csv'
>>> s.split('.')[0][-4:]
'2010'
Then just simply iterate over your list of records, which you say is correct, for each list within in, use list contatenation to create a new list with year at the start and write that to csv file.
I would also suggest that you use with statement for opening the file to write to (and even in the function where you are reading from the other csv files). Example -
filenames = ('babyQld2010.csv',
'babyQld2011.csv',
'babyQld2012.csv',
'babyQld2012.csv',
'babyQld2014.csv')
with open('babyQldAll.csv','w') as outFile:
csvFile_out = csv.writer(outFile, delimiter=',')
for filename in filenames:
name, ext = filename.split('.')
year = name.split('.')[0][-4:] #extract year from file names
records = extract_names(filename)
for record in records:
csvFile_out.writerow([year] + record)

Yes, you can just append the year column to each row as you read it in from your source files. You can read in & write out each row as a dictionary so that you can use your existing column headers to address the data if you need to massage it on the way through.
Using the csv.DictWriter() method you specify your headers (fieldnames) when you set it up. You can then write them out with the writeheader() method.
import csv
file_list = ['babyQld2010.csv',
'babyQld2011.csv',
'babyQld2012.csv',
'babyQld2012.csv',
'babyQld2014.csv']
outFile = open('babyQldAll.csv', 'wb')
csv_writer = csv.DictWriter(outFile,
fieldnames=['name','count','gender','year'])
csv_write_out.writeheader()
for a_file in file_list:
name,ext = a_file.split('.')
year = name[-4:]
with open(a_file, 'rb') as inFile:
csv_read_in = csv.DictReader(inFile)
for row in csv_read_in:
row['year'] = year
csv_writer.writerow(row)
outfile.close()
Hope this helps.

Related

Getting unique values from csv file, output to new file

I am trying to get the unique values from a csv file. Here's an example of the file:
12,life,car,good,exellent
10,gift,truck,great,great
11,time,car,great,perfect
The desired output in the new file is this:
12,10,11
life,gift,time
car,truck
good.great
excellent,great,perfect
Here is my code:
def attribute_values(in_file, out_file):
fname = open(in_file)
fout = open(out_file, 'w')
# get the header line
header = fname.readline()
# get the attribute names
attrs = header.strip().split(',')
# get the distinct values for each attribute
values = []
for i in range(len(attrs)):
values.append(set())
# read the data
for line in fname:
cols = line.strip().split(',')
for i in range(len(attrs)):
values[i].add(cols[i])
# write the distinct values to the file
for i in range(len(attrs)):
fout.write(attrs[i] + ',' + ','.join(list(values[i])) + '\n')
fout.close()
fname.close()
The code currently outputs this:
12,10
life,gift
car,truck
good,great
exellent,great
12,10,11
life,gift,time
car,car,truck
good,great
exellent,great,perfect
How can I fix this?
You could try to use zip to iterate over the columns of the input file, and then eliminate the duplicates:
import csv
def attribute_values(in_file, out_file):
with open(in_file, "r") as fin, open(out_file, "w") as fout:
for column in zip(*csv.reader(fin)):
items, row = set(), []
for item in column:
if item not in items:
items.add(item)
row.append(item)
fout.write(",".join(row) + "\n")
Result for the example file:
12,10,11
life,gift,time
car,truck
good,great
exellent,great,perfect

How to remove specifc row from csv file using python

I am working on one program and trying to achieve following functionalities.
add new student
Remove student based on id
here is my code
from csv import writer
import csv
def add(file_name, list_of_elem):
# Open file in append mode
with open(file_name, 'a+', newline='') as write_obj:
# Create a writer object from csv module
csv_writer = writer(write_obj)
# Add contents of list as last row in the csv file
csv_writer.writerow(list_of_elem)
def remove():
id = input("Enter ID : ")
with open('students.csv', 'rb') as inp, open('students.csv', 'wb') as out:
writer = csv.writer(out)
for row in csv.reader(inp):
if row[0] != id:
writer.writerow(row)
# List of strings
row_contents = [11,'mayur','Java','Tokyo','Morning']
# Append a list as new line to an old csv file
add('students.csv', row_contents)
remove()
add function works properly but when i tried remove function it removes all existing entries.Could anyone please help me.
First I will show the code and below I will left some comments about the changes.
from csv import writer
import csv
def add(file_name, list_of_elem):
# Open file in append mode
with open(file_name, 'a+', newline = '') as write_obj:
# Create a writer object from csv module
csv_writer = writer(write_obj)
# Add contents of list as last row in the csv file
csv_writer.writerow(list_of_elem)
def remove():
idt = input("Enter ID : ")
with open('students.csv', 'r') as inp:
newrows = []
data = csv.reader(inp)
for row in data:
if row[0] != idt:
newrows.append(row)
with open('students.csv', 'w') as out:
csv_writer = writer(out)
for row in newrows:
csv_writer.writerow(row)
def display():
with open('students.csv','r') as f:
data = csv.reader(f)
for row in data:
print(row)
# List of strings
row_contents = [10,'mayur','Java','Tokyo','Morning']
add('students.csv', row_contents)
row_contents = [11,'mayur','Java','Tokyo','Morning']
add('students.csv', row_contents)
row_contents = [12,'mayur','Java','Tokyo','Morning']
add('students.csv', row_contents)
# Append a list as new line to an old csv file
display()
remove()
If your file is a CSV, you should use a text file, instead of a binary one.
I changed the name of the variable id to ìdt because id is built-in to return the identity of an object and it's not a good practice overwrite built-in functions.
To remove only rows with an specific idt you should read all the file, store into a var (list), remove what you want to delete and only after that save the result.
You should use a temporary file instead of opening and writing to the same file simultaneously. Checkout this answer: https://stackoverflow.com/a/17646958/14039323

How can I create a dictionary within a function out of an argument from that function?

Here's what I've tried:
def CreateDict(BDF):
with open('%sdata\\%s.csv' % (mainDir,BDF), mode='r') as infile:
reader = csv.reader(infile)
for row in reader:
key = row[0]
print("%s%s = %s" % (BDF,[key],row[1:]))
CreateDict(FileName)
Print gives me the correct answer. However I want to put it inside a dictionary instead of just printing it out. And I want that dictionary to have the name of FileName).
To create the dictionary:
def CreateDict(BDF):
saved={}
with open('%sdata\\%s.csv' % (mainDir,BDF), mode='r') as infile:
reader = csv.reader(infile)
for row in reader:
key = row[0]
saved[key]=row[1:]
return saved
I don't know how to give to the dict a specific name, if you need to save more than one CSV file you can use a dictionary where the keys are the fileNames:
fileNames=['test0', 'test1', 'test2']
all_files={}
for fileName in fileNames:
all_files[fileName]=CreateDict(fileName)

How to read list which contains comma from CSV file as a column?

I want to read CSV file which contains following data :
Input.csv-
10,[40000,1][50000,5][60000,14]
20,[40000,5][50000,2][60000,1][70000,1][80000,1][90000,1]
30,[60000,4]
40,[40000,5][50000,14]
I want to parse this CSV file and parse it row by row. But these lists contains commas ',' so I'm not getting correct result.
Program-Code-
if __name__ == "__main__":
with open(inputfile, "r") as f:
reader = csv.reader(f,skipinitialspace=True)
next(reader,None)
for read in reader:
no = read[0]
splitted_record = read[1]
print splitted_record
Output-
[40000
[40000
[60000
[40000
I can understand read.csv method reads till commas for each column. But how I can read whole lists as a one column?
Expected Output-
[40000,1][50000,5][60000,14]
[40000,5][50000,2][60000,1][70000,1][80000,1][90000,1]
[60000,4]
[40000,5][50000,14]
Writing stuff to other file-
name_list = ['no','splitted_record']
file_name = 'temp/'+ no +'.csv'
if not os.path.exists(file_name):
f = open(file_name, 'a')
writer = csv.DictWriter(f,delimiter=',',fieldnames=name_list)
writer.writeheader()
else:
f = open(file_name, 'a')
writer = csv.DictWriter(f,delimiter=',',fieldnames=name_list)
writer.writerow({'no':no,'splitted_record':splitted_record})
How I can write this splitted_record without quote ""?
you can join those items together, since you know it split by comma
if __name__ == "__main__":
with open(inputfile, "r") as f:
reader = csv.reader(f,skipinitialspace=True)
next(reader,None)
for read in reader:
no = read[0]
splitted_record = ','.join(read[1:])
print splitted_record
output
[40000,1][50000,5][60000,14]
[40000,5][50000,2][60000,1][70000,1][80000,1][90000,1]
[60000,4]
[40000,5][50000,14]
---update---
data is the above output
with open(filepath,'wb') as f:
w = csv.writer(f)
for line in data:
w.writerow([line])
You can use your own dialect and register it to read as you need.
https://docs.python.org/2/library/csv.html

How to split text file by id in python

I have a bunch of text files containing tab separated tables. The second column contains an id number, and each file is already sorted by that id number. I want to separate each file into multiple files by the id number in column 2. Here's what I have.
readpath = 'path-to-read-file'
writepath = 'path-to-write-file'
for filename in os.listdir(readpath):
with open(readpath+filename, 'r') as fh:
lines = fh.readlines()
lastid = 0
f = open(writepath+'checkme.txt', 'w')
f.write(filename)
for line in lines:
thisid = line.split("\t")[1]
if int(thisid) <> lastid:
f.close()
f = open(writepath+thisid+'-'+filename,'w')
lastid = int(thisid)
f.write(line)
f.close()
What I get is simply a copy of all the read files with the first id number from each file in front of the new filenames. It is as if
thisid = line.split("\t")[1]
is only done once in the loop. Any clue to what is going on?
EDIT
The problem was my files used \r rather than \r\n to terminate lines. Corrected code (simply adding 'rU' when opening the read file and swapping != for <>):
readpath = 'path-to-read-file'
writepath = 'path-to-write-file'
for filename in os.listdir(readpath):
with open(readpath+filename, 'rU') as fh:
lines = fh.readlines()
lastid = 0
f = open(writepath+'checkme.txt', 'w')
f.write(filename)
for line in lines:
thisid = line.split("\t")[1]
if int(thisid) != lastid:
f.close()
f = open(writepath+thisid+'-'+filename,'w')
lastid = int(thisid)
f.write(line)
f.close()
If you're dealing with tab delimited files, then you can use the csv module, and take advantage of the fact that itertools.groupby will do the previous/current tracking of the id for you. Also utilise os.path.join to make sure your filenames end up joining correctly.
Untested:
import os
import csv
from itertools import groupby
readpath = 'path-to-read-file'
writepath = 'path-to-write-file'
for filename in os.listdir(readpath):
with open(os.path.join(readpath, filename)) as fin:
tabin = csv.reader(fin, delimiter='\t')
for file_id, rows in groupby(tabin, lambda L: L[1]):
with open(os.path.join(writepath, file_id + '-' + filename), 'w') as fout:
tabout = csv.writer(fout, delimiter='\t')
tabout.writerows(rows)

Categories