How to do nested loop to search the string in a file

How to do nested loop to search the string in a file - python

I have 2 excel file having thousands of data.I want to take the row from file1 and search the entire file2 and output the repeated to file3.
file 1 file2 file3
abc.bcg#gmail.com abc.bcg_12253 abc.bcg_12253
bcg.abc#gmail.com efx.rfz_12345 def.xyz_08345
def.xyz#gmail.com wqr.qtf_34567
zxc.mnb_98764
def.xyz_08345
FileReader = csv.DictReader(f)
for row in FileReader:
emailLegalFile = row['email']
name_emailFile = emailFile[:emailLegalFile.find('#')]
print name_emailLegalFile
#with open(inputfile, 'rb') as d:
inputFileReader = csv.DictReader(d)
for r in inputFileReader:
if name_emailFile in r['google_email']:
date = r['date']
time = r['time']
t_format = r['format']
file_size = r['file_size']
google_email = r['google_email']
#writer = csv.writer(w)
#dic = {'date': date, 'time':time,'format':t_format,'file_size':file_size, 'google_email':google_email}
#writer.writerow(dic)
list = [date,time,t_format,file_size,google_email]
with open('result.csv','a') as e:
writer_1 = csv.writer(e,delimiter=',',quotechar='|', quoting=csv.QUOTE_MINIMAL)
writer_1.writerow(list)
File2 has 5 columns, but wanted to match with the 5th column only.
The output I am getting is just the 1st value i.e. abc.bcg_12253.
Please help me in solving this.
Thank you

Related

CSV data matching: 'dict_keys' object is not subscriptable

I have put the together code to compare and match data between two CSVs and collate that data into a new CSV. The CSVs have similar data but the column names and their positions are different.
When I Run and Debug, it throws the following error at line 42:
Exception has occurred: TypeError
'dict_keys' object is not subscriptable
File "D:\Documents\Python\iris\accountmanagement2.py", line 42, in <module>
file1_columns[0]: file2_columns[1],
TypeError: 'dict_keys' object is not subscriptable
This is the code I have put together for this task. I tried using the type(data1[0].keys()) but that just says 'type' object is not subscriptable.
Really need some advice or ideas as I am completely stumped :)
import csv
# specify the file paths for the two CSV files
file1 = "D:\Documents\Python\iris\esr.csv"
file2 = "D:\Documents\Python\iris\iris.csv"
def define_columns(file_path):
with open(file_path, "r") as f:
reader = csv.reader(f)
# get the first row (column headers)
columns = next(reader)
return columns
# Define the keys of the two files
file1_keys = define_columns(file1)
file2_keys = define_columns(file2)
# read the data from the first CSV file
data1 = []
with open(file1, "r") as f:
reader = csv.DictReader(f)
for row in reader:
# Convert data to ASCII
for key in file1_keys:
row[key] = row[key].encode("ascii", "ignore").decode()
data1.append(row)
# read the data from the second CSV file
data2 = []
with open(file2, "r") as f:
reader = csv.DictReader(f)
for row in reader:
# Convert data to ASCII
for key in file2_keys:
row[key] = row[key].encode("ascii", "ignore").decode()
data2.append(row)
# Define the columns name of the two files
file1_columns = data1[0].keys()
file2_columns = data2[0].keys()
col_map = {
file1_columns[0]: file2_columns[1],
file1_columns[2]: file2_columns[5],
file1_columns[1]: file2_columns[6],
file1_columns[4]: file2_columns[7]
}
# match the data from the two files based on a specific column (e.g. "ID")
matching_data = []
non_matching_data1 = []
non_matching_data2 = []
for row1 in data1:
matched = False
for row2 in data2:
if row1[col_map[file1_columns[0]]] == row2[col_map[file2_columns[1]]]:
matching_data.append({**row1, **row2})
matched = True
break
if not matched:
non_matching_data1.append(row1)
for row2 in data2:
matched = False
for row1 in data1:
if row1[col_map[file1_columns[0]]] == row2[col_map[file2_columns[1]]]:
matched = True
break
if not matched:
non_matching_data2.append(row2)
# create a new CSV file with the matched data
with open("matched_data.csv", "w") as f:
writer = csv.DictWriter(f, fieldnames=list(col_map.keys())+list(col_map.values()))
writer.writeheader()
for row in matching_data:
writer.writerow(row)
# create a new CSV file with the non-matching data from file1
with open("non_matching_data1.csv", "w") as f:
writer = csv.DictWriter(f, fieldnames=list(data1[0].keys()))
writer.writeheader()
for row in non_matching_data1:
writer.writerow(row)
# create a new CSV file with the non-matching data from file2
with open("non_matching_data2.csv", "w") as f:
writer = csv.DictWriter(f, fieldnames=list(data2[0].keys()))
writer.writeheader()
for row in non_matching_data2:
writer.writerow(row)

Compare 2 different csv files and output all the changes into a new csv

I have 2 CSVs which are New.csv and Old.csv shown below:
Old.csv
longName,shortName,eventType,number,severity
ACTAGENT201,ACAT201,RES,1,INFO
ACTAGENT202,ACAT202,RES,2,ALERT
ACODE801,AC801,ADMIN,1,MINOR
ACODE802,AC802,ADMIN,2,MINOR
ACODE102,AC102,COMM,2,CRITICAL
ACODE103,AC103,COMM,3,CRITICAL
ACODE104,AC104,COMM,4,CRITICAL
ACODE105,AC105,COMM,5,CRITICAL
ACODE106,AC106,COMM,6,CRITICAL
New.csv
longName,shortName,eventType,number,severity
ACTAGENT201,ACAT201,RES,1,INFO
ACTAGENT202,ACAT202,RES,2,ALERT
ACODE801,AC801,ADMIN,1,MINOR
ACODE802,AC802,ThisHasBeenChanged,2,MINOR
ACODE102,AC102,COMM,2,CRITICAL
ACODE103,AC103,COMM,3,CRITICAL
ACODE104,AC104,COMM,4,THISHASBEENCHANGED
ACODE105,AC105,COMM,5,CRITICAL
ACODE106,AC106,COMM,6,CRITICAL
If there is data in one of the columns in the row that has been modified/changed between the old.csv and the new.csv then that whole row should be appended to the changes.csv like this with each column from old.csv and new.csv beside each other:
I know how to find new and deleted items in the csv, but could not figure out how to get the modified items. Code below:
import csv
def DeletedItems(old_csv, new_csv, changes_csv):
with open(new_csv, newline="", encoding="utf8") as new_fp:
csv_reader = csv.reader(new_fp)
csv_headings = next(csv_reader)
new_long_names = {row[0] for row in csv.reader(new_fp)}
with open(old_csv, newline="", encoding="utf8") as old_fp:
with open(changes_csv, "a", newline="", encoding="utf8") as changes_fp:
writer = csv.writer(changes_fp)
writer.writerow("")
for row in csv.reader(old_fp):
if row[0] not in new_long_names:
writer.writerow(row)
def NewItems(old_csv, new_csv, changes_csv):
with open(old_csv, newline="", encoding="utf8") as old_fp:
csv_reader = csv.reader(old_fp)
csv_headings = next(csv_reader)
old_long_names = {row[0] for row in csv.reader(old_fp)}
with open(new_csv, newline="", encoding="utf8") as new_fp:
with open(changes_csv, "w", newline="", encoding="utf8") as changes_fp:
writer = csv.writer(changes_fp)
for row in csv.reader(new_fp):
if row[0] not in old_long_names:
writer.writerow(row)
NewItems("old.csv", "new.csv", "changes.csv")
DeletedItems("old.csv", "new.csv", "changes.csv")

First, read both CSV files into a dictionary, using the longName values as keys.
import csv
with open(old_csv_file, "r") as fh:
reader = csv.reader(fh)
old_csv = {row[0]: row for row in reader}
with open(new_csv_file, "r") as fh:
reader = csv.reader(fh)
new_csv = {row[0]: row for row in reader}
Then, it's easy to find newly added and deleted keys using set operations.
old_longNames = set(old_csv.keys())
new_longNames = set(new_csv.keys())
# common: set intersection
common_longNames = old_longNames.intersection(new_longNames)
# removed: whatever's in old but not in new
removed_longNames = old_longNames - new_longNames
# added: whatever's in new but not in old
added_longNames = new_longNames - old_longNames
Finally, iterate over the common set to find where there are changes:
changed_longNames = []
for key in common_longNames:
old_row = old_csv[key]
new_row = new_csv[key]
# if any(o != n for o, n in zip(old_row, new_row)):
if old_row != new_row:
# this row has at least one column changed. Do whatever
print(f"LongName {key} has changes")
changed_longNames.append(key)
Or, as a list comprehension:
changed_longNames = [key for key in common_longNames if old_csv[key] != new_csv[key]]
Writing everything to a new csv file is also fairly trivial. Note that the sets don't preserve the order, so you might not get the result in the same order.
with open("deleted.csv", "w") as fh:
writer = csv.writer(fh)
for key in removed_longNames:
writer.writerow(old_csv[key])
with open("inserted.csv", "w") as fh:
writer = csv.writer(fh)
for key in added_longNames:
writer.writerow(new_csv[key])
with open("changed.csv", "w") as fh:
writer = csv.writer(fh)
for key in changed_longNames:
old_row = old_csv[key]
new_row = new_csv[key]
merged_row = []
for oi, ni in zip(old_row, new_row):
merged_row.append(oi)
merged_row.append(ni)
writer.writerow(merged_row)

Add a column in a csv file using python

I am Trying to insert one column on an existing CSV file name as test.csv, on column number E.
If the column E is already occupied, i Need to shift that column to right and insert new.
Column Head should be Day, columns should be filled with current date
Cunnernt Data
Name Age location school
Adam 12 abc xyz
eve 14 abc xyz
Joy 12 abc xyz
Need Out
Name Age location school Day
Adam 12 abc xyz =today()
eve 14 abc xyz =today()
Joy 12 abc xyz =today()
I will have normally 2000 rows
I tried the following code that didn't work for me
import csv
Path = 'C:\\Users\\saquib.khan\\Desktop\\Profile_All\\Demo\\New'
infilename = Path + '\\Test.csv'
outfilename = Path + '\\Out1.csv'
with open(infilename, 'rb') as fp_in, open(outfilename, 'wb') as fp_out:
reader = csv.reader(fp_in, delimiter=",")
writer = csv.writer(fp_out, delimiter=",")
headers = next(reader) # read title row
headers[E:E] = ['New Label']
writer.writerow(headers)
for row in reader:
row[E:E] = [0]
writer.writerow(row)

You are opening the 2 files in b mode (bytes instead of text mode).
Is there a reason why you are doing it this way?
Have you tried to open them in text mode?
with open(infilename, 'r') as fp_in, open(outfilename, 'w') as fp_out:
Does it work in this case?

You can try this:
import csv
Path = 'C:\\Users\\saquib.khan\\Desktop\\Profile_All\\Demo\\New'
infilename = Path + '\\Test.csv'
outfilename = Path + '\\Out1.csv'
with open(infilename, 'rb') as fp_in, open(outfilename, 'wb') as fp_out:
reader = csv.reader(fp_in, delimiter=",")
writer = csv.writer(fp_out, delimiter=",")
headers = next(reader) # read title row
idx = ord('E') - ord('A')
headers[idx:idx] = ['New Label']
writer.writerow(headers)
for row in reader:
row[idx:idx] = [0]
writer.writerow(row)
Convert the column name to the column index to operate with the Python list.

Add Date
now = datetime.datetime.now()
now -= datetime.timedelta(days=1)
dt = now.strftime("%m/%d/20%y")
F = 'in.csv'
df = pd.read_csv(Path1 + F)
df["Day"]=df.shape[0]*[dt]
df.to_csv(Path +"out.csv")

Python csv - replace any columns with specified value

I have the following input file with a header row:
test_in.csv
LON,LAT,NUMBER,STREET,UNIT,CITY,DISTRICT,REGION,POSTCODE,ID,HASH
-72.5708234,41.4155142,39,HICKORY LA,,,,,,,8a0df668e0d49b02
-72.5647745,41.4160301,1213,KILLINGWORTH RD,,,,,,,b3ecaab86e476f46
I need to replace any of the columns with a specified string
for example CITY column's data should be replaced from "" to "MyCity"
My code only outputs the header and first row
python test_forcefld.py test_in.csv MyCity CITY out_test.csv
import csv
import sys
in_file_name = sys.argv[1]
force_data = sys.argv[2]
force_fld = sys.argv[3]
out_file_name = sys.argv[4]
# First read top row/header from input file
fieldnames = []
for filename in [in_file_name]:
with open(filename, "rb") as f_in:
reader = csv.reader(f_in)
headers = next(reader)
for h in headers:
fieldnames.append(h)
#print headers to output file
with open(out_file_name, 'w') as fou:
dw = csv.DictWriter(fou, delimiter=',', fieldnames=fieldnames)
dw.writeheader()
f_in2 = open(in_file_name, "rb")
reader2 = csv.DictReader(f_in2) # Uses the field names in this file
datarow = next(reader2)
datarow[force_fld] = force_data
with open(out_file_name, 'wa') as fou:
dw2 = csv.DictWriter(fou, delimiter=',', fieldnames=fieldnames)
dw2.writeheader()
dw2.writerow(data row)
Output shows
LON,LAT,NUMBER,STREET,UNIT,CITY,DISTRICT,REGION,POSTCODE,ID,HASH
-72.5708234,41.4155142,39,HICKORY LA,,MyCity,,,,,8a0df668e0d49b02

Your code is a little difficult to read, but assuming datarow is a dictionary containing your records:
In your last row, change
dw2.writerow(datarow)
Into
dw2.writerows(datarow)
While you're at it, you should also consider using datarow.keys() for your fieldnames, for conciseness.

This should do it, you just need pandas:
import pandas as pd
df = pd.read_csv(in_file_name, sep=',')
df['CITY'].fillna('MyCity', inplace=True)
And to save it:
df.to_csv(out_file_name)

You can try somthing like this in order to have your desired file:
I'm assuming your input file is called f_input.txt and your output file is called f_output.txt:
data = list(k.rstrip().split(',') for k in open("f_input.txt", 'r'))
with open("f_output.txt", 'a+') as f:
f.write(",".join(data[0]) + '\n')
for k in data[1:]:
# Modify the positions of k[:n] + your data + k[n+1]
# if you need to handle another position
f.write(",".join(k[:6]) + "MyCity" + ",".join(k[7:]) + "\n")

This worked in the end:
import csv
import sys
in_file_name = sys.argv[1]
force_data = sys.argv[2]
force_fld = sys.argv[3]
out_file_name = sys.argv[4]
# First read top row/header from input file
fieldnames = []
for filename in [in_file_name]:
with open(filename, "rb") as f_in:
reader = csv.reader(f_in)
headers = next(reader)
for h in headers:
fieldnames.append(h)
f_in2 = open(in_file_name, "r")
#print headers to output file
fou = open(out_file_name, 'wa')
dw = csv.DictWriter(fou, delimiter=',', fieldnames=fieldnames)
dw.writeheader()
reader2 = csv.DictReader(f_in2) # Uses the field names in this file
for row in reader2:
row[force_fld] = force_data
dw2 = csv.DictWriter(fou, delimiter=',', fieldnames=fieldnames)
dw2.writerow(row)

Python: add value and write output

I need to get information from a list and add a column year from name. I still not sure how to add one field 'year' in record. Can I use append?
And about output file, I just need use outputcsv.writerow(records) isn't it?
This is a part of code that I stuck:
filenames = ('babyQld2010.csv',
'babyQld2011.csv',
'babyQld2012.csv',
'babyQld2012.csv',
'babyQld2014.csv')
outFile = open('babyQldAll.csv','w')
csvFile_out = csv.writer(outFile, delimiter=',')
for filename in filenames:
name, ext = filename.split('.')
year = name[-4:] #extract year from file names
records = extract_names(filename)
# Get (name, count, gender) from list "records",
# and add value of "year" and write into output file (using "for" loop )
Output file look like:
2010,Lola,69,Girl
And input, I have 5 file babyQld2010.csv, babyQld2011.csv, babyQld2012.csv, babyQld2012.csv, babyQld2014.csv which contains:
Mia,425,William,493
and I have to sort it in format and I already done it and save in list 'records'
Lola,69,Girl
now I need to add one field 'year' on 'record' list and export csv file.
This is my full code:
import csv
def extract_names(filename):
''' Extract babyname, count, gender from a csv file,
and return the data in a list.
'''
inFile = open(filename, 'rU')
csvFile = csv.reader(inFile, delimiter=',')
# Initialization
records = []
rowNum = 0
for row in csvFile:
if rowNum != 0:
# +++++ You code here ++++
# Read each row of csv file and save information in list 'records'
# as (name, count, gender)
records.append([row[0], row[1], "Female"])
records.append([row[2], row[3], "Male"])
print('Process each row...')
rowNum += 1
inFile.close()
return(records)
#### Start main program #####
filenames = ('babyQld2010.csv',
'babyQld2011.csv',
'babyQld2012.csv',
'babyQld2012.csv',
'babyQld2014.csv')
with open('babyQldAll.csv','w') as outFile:
csvFile_out = csv.writer(outFile, delimiter=',')
for filename in filenames:
name, ext = filename.split('.')
year = name.split('.')[0][-4:] #extract year from file names
records = extract_names(filename)
for record in records:
csvFile_out.write([year] + record)
print("Write in csv file...")
outFile.close()

To get the year from the csv file you can simply split the string at '.' and then take the last four characters from the first part of the split. Example -
>>> s = 'babyQld2010.csv'
>>> s.split('.')[0][-4:]
'2010'
Then just simply iterate over your list of records, which you say is correct, for each list within in, use list contatenation to create a new list with year at the start and write that to csv file.
I would also suggest that you use with statement for opening the file to write to (and even in the function where you are reading from the other csv files). Example -
filenames = ('babyQld2010.csv',
'babyQld2011.csv',
'babyQld2012.csv',
'babyQld2012.csv',
'babyQld2014.csv')
with open('babyQldAll.csv','w') as outFile:
csvFile_out = csv.writer(outFile, delimiter=',')
for filename in filenames:
name, ext = filename.split('.')
year = name.split('.')[0][-4:] #extract year from file names
records = extract_names(filename)
for record in records:
csvFile_out.writerow([year] + record)

Yes, you can just append the year column to each row as you read it in from your source files. You can read in & write out each row as a dictionary so that you can use your existing column headers to address the data if you need to massage it on the way through.
Using the csv.DictWriter() method you specify your headers (fieldnames) when you set it up. You can then write them out with the writeheader() method.
import csv
file_list = ['babyQld2010.csv',
'babyQld2011.csv',
'babyQld2012.csv',
'babyQld2012.csv',
'babyQld2014.csv']
outFile = open('babyQldAll.csv', 'wb')
csv_writer = csv.DictWriter(outFile,
fieldnames=['name','count','gender','year'])
csv_write_out.writeheader()
for a_file in file_list:
name,ext = a_file.split('.')
year = name[-4:]
with open(a_file, 'rb') as inFile:
csv_read_in = csv.DictReader(inFile)
for row in csv_read_in:
row['year'] = year
csv_writer.writerow(row)
outfile.close()
Hope this helps.

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

How to do nested loop to search the string in a file - python

Related

CSV data matching: 'dict_keys' object is not subscriptable

Compare 2 different csv files and output all the changes into a new csv

Add a column in a csv file using python

Python csv - replace any columns with specified value

Python: add value and write output

Categories

Resources