Add a column in a csv file using python

Add a column in a csv file using python - python

I am Trying to insert one column on an existing CSV file name as test.csv, on column number E.
If the column E is already occupied, i Need to shift that column to right and insert new.
Column Head should be Day, columns should be filled with current date
Cunnernt Data
Name Age location school
Adam 12 abc xyz
eve 14 abc xyz
Joy 12 abc xyz
Need Out
Name Age location school Day
Adam 12 abc xyz =today()
eve 14 abc xyz =today()
Joy 12 abc xyz =today()
I will have normally 2000 rows
I tried the following code that didn't work for me
import csv
Path = 'C:\\Users\\saquib.khan\\Desktop\\Profile_All\\Demo\\New'
infilename = Path + '\\Test.csv'
outfilename = Path + '\\Out1.csv'
with open(infilename, 'rb') as fp_in, open(outfilename, 'wb') as fp_out:
reader = csv.reader(fp_in, delimiter=",")
writer = csv.writer(fp_out, delimiter=",")
headers = next(reader) # read title row
headers[E:E] = ['New Label']
writer.writerow(headers)
for row in reader:
row[E:E] = [0]
writer.writerow(row)

You are opening the 2 files in b mode (bytes instead of text mode).
Is there a reason why you are doing it this way?
Have you tried to open them in text mode?
with open(infilename, 'r') as fp_in, open(outfilename, 'w') as fp_out:
Does it work in this case?

You can try this:
import csv
Path = 'C:\\Users\\saquib.khan\\Desktop\\Profile_All\\Demo\\New'
infilename = Path + '\\Test.csv'
outfilename = Path + '\\Out1.csv'
with open(infilename, 'rb') as fp_in, open(outfilename, 'wb') as fp_out:
reader = csv.reader(fp_in, delimiter=",")
writer = csv.writer(fp_out, delimiter=",")
headers = next(reader) # read title row
idx = ord('E') - ord('A')
headers[idx:idx] = ['New Label']
writer.writerow(headers)
for row in reader:
row[idx:idx] = [0]
writer.writerow(row)
Convert the column name to the column index to operate with the Python list.

Add Date
now = datetime.datetime.now()
now -= datetime.timedelta(days=1)
dt = now.strftime("%m/%d/20%y")
F = 'in.csv'
df = pd.read_csv(Path1 + F)
df["Day"]=df.shape[0]*[dt]
df.to_csv(Path +"out.csv")

Related

read and save a specific column from csv file

need help! using previous topics, I found how I can read data form csv file and I do not have problem with this but I can not save a specific column (e.g. column 4 from file.csv) as new.csv file. my script prints column 4 correctly but it does not save it.
import csv
with open('file.csv') as csvfile:
file1 = csv.reader(csvfile, delimiter=',')
fourth_col = []
for cols in file1:
fourth_col = cols[3]
print (fourth_col)
new_file = open('new.csv', 'w')
writer = csv.writer(new_file)
writer.writerows(fourth_col)
new_file.close()

I tried the following code and it is working fine
import csv
new_file = open('new.csv', 'w')
writer = csv.writer(new_file)
with open('file.csv') as csvfile:
file1 = csv.reader(csvfile, delimiter=',')
fourth_col = []
for cols in file1:
fourth_col = cols[3]
print(fourth_col)
writer.writerows(fourth_col)
new_file.flush()
new_file.close()
sample files:
file.csv
a,b,c,d,e
f,g,h,i,j
k,l,m,,n
,,,o,
new.csv
d
i
o

Python csv - replace any columns with specified value

I have the following input file with a header row:
test_in.csv
LON,LAT,NUMBER,STREET,UNIT,CITY,DISTRICT,REGION,POSTCODE,ID,HASH
-72.5708234,41.4155142,39,HICKORY LA,,,,,,,8a0df668e0d49b02
-72.5647745,41.4160301,1213,KILLINGWORTH RD,,,,,,,b3ecaab86e476f46
I need to replace any of the columns with a specified string
for example CITY column's data should be replaced from "" to "MyCity"
My code only outputs the header and first row
python test_forcefld.py test_in.csv MyCity CITY out_test.csv
import csv
import sys
in_file_name = sys.argv[1]
force_data = sys.argv[2]
force_fld = sys.argv[3]
out_file_name = sys.argv[4]
# First read top row/header from input file
fieldnames = []
for filename in [in_file_name]:
with open(filename, "rb") as f_in:
reader = csv.reader(f_in)
headers = next(reader)
for h in headers:
fieldnames.append(h)
#print headers to output file
with open(out_file_name, 'w') as fou:
dw = csv.DictWriter(fou, delimiter=',', fieldnames=fieldnames)
dw.writeheader()
f_in2 = open(in_file_name, "rb")
reader2 = csv.DictReader(f_in2) # Uses the field names in this file
datarow = next(reader2)
datarow[force_fld] = force_data
with open(out_file_name, 'wa') as fou:
dw2 = csv.DictWriter(fou, delimiter=',', fieldnames=fieldnames)
dw2.writeheader()
dw2.writerow(data row)
Output shows
LON,LAT,NUMBER,STREET,UNIT,CITY,DISTRICT,REGION,POSTCODE,ID,HASH
-72.5708234,41.4155142,39,HICKORY LA,,MyCity,,,,,8a0df668e0d49b02

Your code is a little difficult to read, but assuming datarow is a dictionary containing your records:
In your last row, change
dw2.writerow(datarow)
Into
dw2.writerows(datarow)
While you're at it, you should also consider using datarow.keys() for your fieldnames, for conciseness.

This should do it, you just need pandas:
import pandas as pd
df = pd.read_csv(in_file_name, sep=',')
df['CITY'].fillna('MyCity', inplace=True)
And to save it:
df.to_csv(out_file_name)

You can try somthing like this in order to have your desired file:
I'm assuming your input file is called f_input.txt and your output file is called f_output.txt:
data = list(k.rstrip().split(',') for k in open("f_input.txt", 'r'))
with open("f_output.txt", 'a+') as f:
f.write(",".join(data[0]) + '\n')
for k in data[1:]:
# Modify the positions of k[:n] + your data + k[n+1]
# if you need to handle another position
f.write(",".join(k[:6]) + "MyCity" + ",".join(k[7:]) + "\n")

This worked in the end:
import csv
import sys
in_file_name = sys.argv[1]
force_data = sys.argv[2]
force_fld = sys.argv[3]
out_file_name = sys.argv[4]
# First read top row/header from input file
fieldnames = []
for filename in [in_file_name]:
with open(filename, "rb") as f_in:
reader = csv.reader(f_in)
headers = next(reader)
for h in headers:
fieldnames.append(h)
f_in2 = open(in_file_name, "r")
#print headers to output file
fou = open(out_file_name, 'wa')
dw = csv.DictWriter(fou, delimiter=',', fieldnames=fieldnames)
dw.writeheader()
reader2 = csv.DictReader(f_in2) # Uses the field names in this file
for row in reader2:
row[force_fld] = force_data
dw2 = csv.DictWriter(fou, delimiter=',', fieldnames=fieldnames)
dw2.writerow(row)

How to not just add a new first column to csv but alter the header names

I would like to do the following
read a csv file, Add a new first column, then rename some of the columns
then load the records from csv file.
Ultimately, I would like the first column to be populated with the file
name.
I'm fairly new to Python and I've kind of worked out how to change the fieldnames however, loading the data is a problem as it's looking for the original fieldnames which no longer match.
Code snippet
import csv
import os
inputFileName = "manifest1.csv"
outputFileName = os.path.splitext(inputFileName)[0] + "_modified.csv"
with open(inputFileName, 'rb') as inFile, open(outputFileName, 'wb') as outfile:
r = csv.DictReader(inFile)
fieldnames = ['MapSvcName','ClientHostName', 'Databasetype', 'ID_A', 'KeepExistingData', 'KeepExistingMapCache', 'Name', 'OnPremisePath', 'Resourcestype']
w = csv.DictWriter(outfile,fieldnames)
w.writeheader()
*** Here is where I start to go wrong
# copy the rest
for node, row in enumerate(r,1):
w.writerow(dict(row))
Error
File "D:\Apps\Python27\ArcGIS10.3\lib\csv.py", line 148, in _dict_to_list
+ ", ".join([repr(x) for x in wrong_fields]))
ValueError: dict contains fields not in fieldnames: 'Databases [xsi:type]', 'Resources [xsi:type]', 'ID'
Would like to some assistance to not just learn but truly understand what I need to do.
Cheers and thanks
Peter
Update..
I think I've worked it out
import csv
import os
inputFileName = "manifest1.csv"
outputFileName = os.path.splitext(inputFileName)[0] + "_modified.csv"
with open(inputFileName, 'rb') as inFile, open(outputFileName, 'wb') as outfile:
r = csv.reader(inFile)
w = csv.writer(outfile)
header = next(r)
header.insert(0, 'MapSvcName')
#w.writerow(header)
next(r, None) # skip the first row from the reader, the old header
# write new header
w.writerow(['MapSvcName','ClientHostName', 'Databasetype', 'ID_A', 'KeepExistingData', 'KeepExistingMapCache', 'Name', 'OnPremisePath', 'Resourcestype'])
prevRow = next(r)
prevRow.insert(0, '0')
w.writerow(prevRow)
for row in r:
if prevRow[-1] == row[-1]:
val = '0'
else:
val = prevRow[-1]
row.insert(0,val)
prevRow = row
w.writerow(row)

How to split up data from a column in a csv file into two separate output csv files?

I have a .csv file, e.g.:
ID NAME CATEGORIES
1, x, AB
2, xx, AA
3, xxx, BA
How would I get this to form two output .csv files based on the category e.g.:
File 1:
ID NAME CATEGORY
1, x, A
2, xx, A
3, xxx, B
File 2:
ID NAME CATEGORY
1, x, B
2, xx, A
3, xxx, A
I have the input and output set up, but just an empty for loop where I'm stumped:
records = [line for line in csv.reader(open('test_input.csv', 'rt'), delimiter=',')]
outfile = open('test_output1.csv', 'wt')
outfileWriter = csv.writer(outfile, delimiter=',')
for record in records:
#something!
outfileWriter.writerow(record)
outfile.close()
I'd appreciate any help!

import csv
records = [line for line in csv.reader(open('test_input.csv', 'rt'), delimiter=',')]
outfile1 = open('test_output1.csv', 'wt')
outfile2 = open('test_output2.csv', 'wt')
outfileWriter1 = csv.writer(outfile1, delimiter=',')
outfileWriter2 = csv.writer(outfile2, delimiter=',')
# headers always the same
outfileWriter1.writerow(records[0])
outfileWriter2.writerow(records[0])
for record in records[1:]:
cat = record[-1].strip() # get category in form "AB"
new_record = record
new_record[-1] = "\t%s" % cat[0] # set category for file 1 with tab as a prefix
outfileWriter1.writerow(new_record)
new_record[-1] = "\t%s" % cat[1] # set category for file 2 with tab as a prefix
outfileWriter2.writerow(new_record)
outfile1.close()
outfile2.close()

import csv
with open('input.csv') as f, open('file1.csv', 'w') as f1, open('file2.csv', 'w') as f2:
header = next(f) #read header
reader = csv.reader(f, delimiter=',', skipinitialspace=True)
f1.write(header) #write header
f2.write(header) #write header
writ1 = csv.writer(f1, delimiter=',')
writ2 = csv.writer(f2, delimiter=',')
for row in reader:
c1, c2 = row[-1] #split the category into c1 and c2
writ1.writerow(row[:-1] + [c1]) #write c1 to file1
writ2.writerow(row[:-1] + [c2]) #write c2 to file2

How to do nested loop to search the string in a file

I have 2 excel file having thousands of data.I want to take the row from file1 and search the entire file2 and output the repeated to file3.
file 1 file2 file3
abc.bcg#gmail.com abc.bcg_12253 abc.bcg_12253
bcg.abc#gmail.com efx.rfz_12345 def.xyz_08345
def.xyz#gmail.com wqr.qtf_34567
zxc.mnb_98764
def.xyz_08345
FileReader = csv.DictReader(f)
for row in FileReader:
emailLegalFile = row['email']
name_emailFile = emailFile[:emailLegalFile.find('#')]
print name_emailLegalFile
#with open(inputfile, 'rb') as d:
inputFileReader = csv.DictReader(d)
for r in inputFileReader:
if name_emailFile in r['google_email']:
date = r['date']
time = r['time']
t_format = r['format']
file_size = r['file_size']
google_email = r['google_email']
#writer = csv.writer(w)
#dic = {'date': date, 'time':time,'format':t_format,'file_size':file_size, 'google_email':google_email}
#writer.writerow(dic)
list = [date,time,t_format,file_size,google_email]
with open('result.csv','a') as e:
writer_1 = csv.writer(e,delimiter=',',quotechar='|', quoting=csv.QUOTE_MINIMAL)
writer_1.writerow(list)
File2 has 5 columns, but wanted to match with the 5th column only.
The output I am getting is just the 1st value i.e. abc.bcg_12253.
Please help me in solving this.
Thank you

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Add a column in a csv file using python - python

You are opening the 2 files in b mode (bytes instead of text mode). Is there a reason why you are doing it this way? Have you tried to open them in text mode? with open(infilename, 'r') as fp_in, open(outfilename, 'w') as fp_out: Does it work in this case?

Add Date now = datetime.datetime.now() now -= datetime.timedelta(days=1) dt = now.strftime("%m/%d/20%y") F = 'in.csv' df = pd.read_csv(Path1 + F) df["Day"]=df.shape[0]*[dt] df.to_csv(Path +"out.csv")

Related

read and save a specific column from csv file

Python csv - replace any columns with specified value

How to not just add a new first column to csv but alter the header names

How to split up data from a column in a csv file into two separate output csv files?

How to do nested loop to search the string in a file

Categories

Resources