Redundant instructions in two conditions - python

I want to optimize the following code:
for myFile in myFiles:
file = open(filename, 'rt')
try:
if CLIENT == "C1":
head = rows[:7]
tail = rows[7:]
for row in rows:
if "".join(row)!= "":
if not u_pass:
header = [ row.strip().replace(" ", "_") for row in row[3:] ]
u_pass = True
else:
self.usecases(row, data, index)
elif CLIENT == 'C2':
reader = csv.reader(file)
firstline = next(reader)
secondline = next(reader)
else:
for row in rows:
if "".join(row)!= "":
if not u_pass:
header = [ row.strip().replace(" ", "_") for row in row[3:] ]
u_pass = True
# Recuperation des donnees
else:
self.usecases(row, data, index)
The code below is repeated twice in the previous code, meaning there are some common instructions between these conditions "
for row in rows:
if "".join(row)!= "":
if not u_pass:
header = [ row.strip().replace(" ", "_") for row in row[3:] ]
u_pass = True
else:
self.usecases(row, data, index)

Below should do the trick:
for myFile in myFiles:
file = open(filename, 'rt')
try:
if CLIENT == "C2":
reader = csv.reader(file)
firstline = next(reader)
secondline = next(reader)
else:
if CLIENT == "C1":
head = rows[:7]
tail = rows[7:]
for row in rows:
if "".join(row)!= "":
if not u_pass:
header = [ row.strip().replace(" ", "_") for row in row[3:] ]
u_pass = True
else:
self.usecases(row, data, index)

Related

Deleting all rows with the same email in csv file

I'd like to ask, how to delete all rows with the same email in csv file? Idea is to pick random rows, and if that row picked and printed out, delete all rows with it's email from file. Now code deletes only rows that are picked and printed. But if there were more rows with the same email it wouldn't delete them. How to fix it?
Full code: https://pastebin.com/qzHm4NSA
Data structure: https://ibb.co/wWXfL6X
def generate():
global winningRows
filename = enterFile()
noOfWinners = 5
winningNumbers = []
while len(winningNumbers) < noOfWinners:
luckyNumber = random.randint(1, totalEntries)
if luckyNumber not in winningNumbers:
winningNumbers.append(luckyNumber)
with open(filename, newline='\n') as entriesCSV:
entriesDict = csv.DictReader(entriesCSV,dialect="excel")
allRows = [row for row in entriesDict]
winningRows = [row for row in allRows if int(row["#"]) in winningNumbers]
nonWinningRows = [row for row in allRows if int(row["#"]) not in winningNumbers]
for row in winningRows:
winnerName = row["Name"]
winnerID = row["ID"]
winnerEmail = row["Email"]
print(f"The winner is {winnerName}, ID {winnerID}, email {winnerEmail}")
with open(filename, "w", newline='\n') as entriesCSV:
writer = csv.DictWriter(entriesCSV, fieldnames=["#", "Name", "ID", "Email"])
writer.writeheader()
writer.writerows(nonWinningRows)
Maintain a list of emails of the picked winners and then use it to filter out rows of non winners. For that,
Just modify the code segment as follows and that will solve your problem:
def generate():
global winningRows
filename = enterFile()
noOfWinners = 5
winningNumbers = []
nonWinningRows = []
winnerEmails = [] #change 1
while len(winningNumbers) < noOfWinners:
luckyNumber = random.randint(1, totalEntries)
if luckyNumber not in winningNumbers:
winningNumbers.append(luckyNumber)
with open(filename, newline='\n') as entriesCSV:
entriesDict = csv.DictReader(entriesCSV,dialect="excel")
allRows = [row for row in entriesDict]
winningRows = [row for row in allRows if int(row["#"]) in winningNumbers]
for row in winningRows:
if row["Email"] not in winnerEmails: #change 2
winnerName = row["Name"]
winnerID = row["ID"]
winnerEmail = row["Email"]
print(f"The winner is {winnerName}, ID {winnerID}, email {winnerEmail}")
winnerEmails.append(winnerEmail) #change 3
nonWinningRows = [row for row in allRows if int(row["#"]) not in winningNumbers and row["Email"] not in winnerEmails] #change 4
with open(filename, "w", newline='\n') as entriesCSV:
writer = csv.DictWriter(entriesCSV, fieldnames=["#", "Name", "ID", "Email"])
writer.writeheader()
writer.writerows(nonWinningRows)

Sort and Order columns Python

I have a code to create a CSV with information from another CSV file. In my new CSV file, I would like to save only 20 rows sorted from highest to lowest of row ['impressions']
I read something about pandas but I don't find anything about how to do it!
To be more clear, I shared some images:
before:
enter image description here
after:
enter image description here
Code:
import csv
input_file = 'report_2017_12_11_12_31_19UTC.csv'
output_file= "All_Data_Tags.csv"
with open(input_file) as csvfile, open(output_file, "w") as output:
reader = csv.DictReader(csvfile)
cols = ("domain","ddomain","opportunities", "impressions", "fillRate", "DATA")
writer = csv.DictWriter(output, fieldnames=cols, extrasaction='ignore')
writer.writeheader()
for row in reader:
row['fillRate'] = '{:.2f}'.format(float(row['fillRate']) * 100)
if row['ddomain'] == "":
if row['domain'] == "":
row['ddomain'] = "App"
row['domain'] = " "
if row['domain'] == row['ddomain']:
row['domain'] = "Real Site"
if row['domain'] == "":
row['domain'] = "Detected Only"
if row['ddomain'] == "":
row['ddomain'] = "Vast Media"
if row['ddomain'] != row['domain']:
if row['ddomain'] != "Vast Media":
if row['domain'] != "Real Site":
if row['domain'] != "Detected Only":
if row['ddomain'] != "App":
row['DATA'] = "FAKE"
else:
row['DATA'] = "OK"
else:
row['DATA'] = "OK"
else:
row['DATA'] = "OK"
else:
row['DATA'] = "OK"
writer.writerow(row)
Here is the Answer:
code:
import pandas as pd
movies = pd.read_csv('Top20_Media_Yesterday.csv')
movies = movies.sort_values(['impressions'], ascending=False)
movies = movies.to_csv("Top20_Media_Yesterday.csv")
movies = pd.read_csv('Top20_Media_Yesterday.csv', nrows=21)
movies = movies.to_csv("Top20_Media_Yesterday.csv")
Use the DataFrame.sort_values function of the pandas framework, passing the column name(s),you wish to sort, to the by argument and setting axis to 1.
You can find similar examples here.

My function to extract totals is exhausting my input file for future reading

The client includes 3 rows at the bottom that contain totals for me to reconcile against in my program. Only problem is that my program is exhausting the input file with readlines() before it can do anything else. Is there a way to keep the file from being exhausted during my get_recon_total function call?
#!/usr/bin/env python
# pre_process.py
import csv
import sys
def main():
infile = sys.argv[1]
outfile = sys.argv[2]
with open(infile, 'rbU') as in_obj:
# Create reader object, get fieldnames for later on
reader, fieldnames = open_reader(in_obj)
nav_tot_cnt, nav_rec_cnt, nav_erec_cnt = get_recon_totals(in_obj)
print nav_tot_cnt, nav_rec_cnt, nav_erec_cnt
# This switches the dictionary to a sorted list... necessary??
reader_list = sorted(reader, key=lambda key: (key['PEOPLE_ID'],
key['DON_DATE']))
# Create a list to contain section header information
header_list = create_header_list(reader_list)
# Create dictionary that contains header list as the key,
# then all rows that match as a list of dictionaries.
master_dict = map_data(header_list, reader_list)
# Write data to processed file, create recon counts to compare
# to footer record
tot_cnt, rec_cnt, erec_cnt = write_data(master_dict, outfile, fieldnames)
print tot_cnt, rec_cnt, erec_cnt
def open_reader(file_obj):
'''
Uses DictReader from the csv module to take the first header line
as the fieldnames, then applies them to each element in the file.
Returns the DictReader object and the fieldnames being used (used
later when data is printed out with DictWriter.)
'''
reader = csv.DictReader(file_obj, delimiter=',')
return reader, reader.fieldnames
def create_header_list(in_obj):
p_id_list = []
for row in in_obj:
if (row['PEOPLE_ID'], row['DON_DATE']) not in p_id_list:
p_id_list.append((row['PEOPLE_ID'], row['DON_DATE']))
return p_id_list
def map_data(header_list, data_obj):
master_dict = {}
client_section_list = []
for element in header_list:
for row in data_obj:
if (row['PEOPLE_ID'], row['DON_DATE']) == element:
client_section_list.append(row)
element = list(element)
element_list = [client_section_list[0]['DEDUCT_AMT'],
client_section_list[0]['ND_AMT'],
client_section_list[0]['DEDUCT_YTD'],
client_section_list[0]['NONDEDUCT_YTD']
]
try:
element_list.append((float(client_section_list[0]['DEDUCT_YTD']) +
float(client_section_list[0]['NONDEDUCT_YTD'])
))
except ValueError:
pass
element.extend(element_list)
element = tuple(element)
master_dict[element] = client_section_list
client_section_list = []
return master_dict
def write_data(in_obj, outfile, in_fieldnames):
with open(outfile, 'wb') as writer_outfile:
writer = csv.writer(writer_outfile, delimiter=',')
dict_writer = csv.DictWriter(writer_outfile,
fieldnames=in_fieldnames,
extrasaction='ignore')
tot_cnt = 0
rec_cnt = 0
email_cnt = 0
for k, v in in_obj.iteritems():
writer_outfile.write(' -01- ')
writer.writerow(k)
rec_cnt += 1
for i, e in enumerate(v):
if v[i]['INT_CODE_EX0006'] != '' or v[i]['INT_CODE_EX0028'] != '':
email_cnt += 1
writer_outfile.write(' -02- ')
dict_writer.writerow(e)
tot_cnt += 1
return tot_cnt, rec_cnt, email_cnt
def get_recon_totals(in_obj):
print in_obj
client_tot_cnt = 0
client_rec_cnt = 0
client_erec_cnt = 0
for line in in_obj.readlines():
line = line.split(',')
if line[0] == 'T' and line[1] == 'Total Amount':
print 'Total Amount found.'
client_tot_cnt = line[2]
elif line[0] == 'T' and line[1] == 'Receipt Count':
print 'Receipt Count found.'
client_rec_cnt = line[2]
elif line[0] == 'T' and line[1] == 'Email Receipt Count':
print 'E-Receipt Count Found.'
client_erec_cnt = line[2]
return client_tot_cnt, client_rec_cnt, client_erec_cnt
if __name__ == '__main__':
main()
If your file is not very large, you can convert reader generator to a list of dcitonary , by calling list() on reader and then use it in your code instead of trying to read from the file directly.
Example -
def main():
infile = sys.argv[1]
outfile = sys.argv[2]
with open(infile, 'rbU') as in_obj:
# Create reader object, get fieldnames for later on
reader, fieldnames = open_reader(in_obj)
reader_list = list(reader)
nav_tot_cnt, nav_rec_cnt, nav_erec_cnt = get_recon_totals(reader_list)
print nav_tot_cnt, nav_rec_cnt, nav_erec_cnt
# This switches the dictionary to a sorted list... necessary??
reader_list = sorted(reader_list, key=lambda key: (key['PEOPLE_ID'],
key['DON_DATE']))
.
.
def get_recon_totals(reader_list):
print in_obj
client_tot_cnt = 0
client_rec_cnt = 0
client_erec_cnt = 0
for line in reader_list: #line here is a dict
if line[<fieldname for first column>] == 'T' and line[<fieldname for secondcolumn>] == 'Total Amount':
print 'Total Amount found.'
client_tot_cnt = line[<fieldname for third column>]
.
. #continued like above
.
return client_tot_cnt, client_rec_cnt, client_erec_cnt

Not writing into an Excel file

I'm reading the data from one file named SPD_file. Matching the data with another file named Custom. And all the records which are matching in both the files will be written into the third file.
But it seems that something is wrong, because the code is matching the records and printing on console. But when I'm writing into another file nothing is coming into the new file, other than the header.
workbook = xlrd.open_workbook(SPD_file)
worksheets = workbook.sheet_names()
mapping_records = {}
for worksheet_name in worksheets:
worksheet = workbook.sheet_by_name(worksheet_name)
mapping_record = MappingRecord()
if worksheet_name == "CD":
for curr_row in range(0,worksheet.nrows):
mapping_record = worksheet.row(curr_row)
print worksheet_name
print mapping_record[0].value
for curr_row in mapping_record:
#print "In Loop...."
spd_record = MappingRecord()
spd_record.id = "00002269"
spd_record.erocode = None
spd_record.scno = None
mapping_records[mapping_record[8]] = spd_record
print "Read SPD File....."
custom_file_name = "Custom_" + today.strftime('%Y-%m-%d') + ".csv"
custom_file = ops_home + path + "\\" + custom_file_name
custom = open(custom_file, 'rb')
reader = csv.reader(custom, delimiter=',', quotechar='"')
for line in reader:
if mapping_records.has_key(mapping_record[8]):
spd_record = mapping_records[mapping_record[8]]
if line[7] == "ERO Code":
spd_record.erocode = line[8]
elif line[7] == "Service Number":
spd_record.scno = line[8]
#create a new file.
New_file = ops_home + '\\Reports\\SPD_new_' + today.strftime('%d%m%Y') + '.xlsx'
workbook = xlsxwriter.Workbook(New_file)
# Add a bold format to use to highlight cells.
bold = workbook.add_format({'bold': 1})
money = workbook.add_format({'num_format': '#,##0.00'})
worksheetCd = workbook.add_worksheet("CD")
cdHeader = ("Merchant ID", "EroCode", "Service Number")
cd_row = 0
cd_col = 0
for columnHeader in cdHeader:
worksheetCd.write(cd_row, cd_col, columnHeader,bold)
cd_col += 1
for ctx in mapping_records:
spd_record = mapping_records[ctx]
if spd_record.payment_mode == "CRD":
cd_row += 1
cd_col = 0
cdRow = (spd_record.id, spd_record.erocode, spd_record.scno)
for columnData in cdRow:
if cd_col == 5 or cd_col == 19 or cd_col ==20 or cd_col ==21:
worksheetCd.write_number(cd_row, cd_col, columnData, money)
else:
worksheetCd.write(cd_row, cd_col, columnData)
cd_col += 1
workbook.close()

Comparing two CSV files and print the difference

I'm trying to compare CSV files, I have 2 CSV Files - CSVFileOne, CSVFileTwo.
my desired output is printing CSVFileOne but only the rows that not existing in CSVFileTwo.
My code:
input_file = CSVFileOne
ABGSOne = []
with open(input_file, encoding='UTF-8') as fone:
rowsOne = csv.reader(fone,delimiter=",",lineterminator="\n")
next(rowsOne, None)
for rowOne in rowsOne:
abbgone = {}
abbgone['astringOne'] = row[0]
abbgone['bstringOne'] = row[1]
abbgone['cstringOne'] = row[2]
abbgone['dstringOne'] = row[3]
ABGSOne.append(abbgone)
input_fileTwo = CSVFileTwo
ABGSTwo = []
with open(input_fileTwo, encoding='UTF-8') as ftwo:
rowsTwo = csv.reader(ftwo,delimiter=",",lineterminator="\n")
next(rowsTwo, None)
for rowTwo in rowsTwo:
abbgtwo = {}
abbgtwo['astringTwo'] = row[0]
abbgtwo['bstringTwo'] = row[1]
abbgtwo['cstringTwo'] = row[2]
abbgtwo['dstringTwo'] = row[3]
ABGSOne.append(abbgTwo)
for abbgone in ABGSOne:
if abbgone['bstringOne'] == abbgtwo['bstringTwo']:
print('abbgone['bstringOne']
try this out .
with open('CSVFileOne.csv', 'r') as t1, open('CSVFileTwo.csv', 'r') as t2:
fileone = t1.readlines()
filetwo = t2.readlines()
with open('Desired.csv', 'w') as outFile:
for line in filetwo:
if line not in fileone:
outFile.write(line)

Categories