Python edit multiple worksheets - python

I would like to edit multiple worksheets present in the same Excel File and then save them with the adjustments made. These worksheets have the same columns headers and are called Credit and Debit. The code that I have created is the following:
import pandas as pd
import numpy as np
class blah:
def __init__(self, path, file_in, file_out):
self.path = path
self.file_inviato = file_in
self.file_out = file_out
def process_file(self):
df = pd.read_excel(self.path + self.file_in, sheet_name=None, skiprows=4)
****Here is where I am struggling in amending both worksheets at the same time****
# df = df.columns.str.strip()
# df['Col1'] = np.where((df['Col2'] == 'KO') | (df['Col2'] == 'OK'), 0, df['Col1'])
writer = pd.ExcelWriter(self.path + self.file_out, engine='xlsxwriter')
for sheet_name in df.keys():
df[sheet_name].to_excel(writer, sheet_name=sheet_name, index=False)
writer.save()
b = blah('path....',
'file in....xlsx',
'file out.xlsx')
b.process_file()

found a workaround:
for sheet_name in df.keys():
df[sheet_name] = df[sheet_name].rename(columns=lambda x: x.strip())
df[sheet_name]['Col1'] = np.where((df[sheet_name]['Col2'] == 'KO') |
(df[sheet_name]['Col2'] == 'OK'), 0, df[sheet_name]['Col1'])

Related

Python - writerow and dataframe to csv

Need some help on python insert to csv.
Would like insert info and data frame to to csv.
After insert info with writerow, when insert data frame into csv, there was missing some header from data frame.
Correct header without : writer.writerow(info)
Wrong data frame header with : writer.writerow(info)
Missing 'No' to 'Billno' from data frame.
df = pd.read_sql(query, cnxn)
info = ['Date From:','',fromdate,'','To',todate]
with open('C:/my_csv/'+reportname+'.csv', 'w', newline='') as file:
writer = csv.writer(file)
writer.writerow(info)
folder_path = r'C:/my_csv'
file_type = r'/*csv'
files = glob.glob(folder_path + file_type)
max_file = max(files, key=os.path.getctime)
df.to_csv(max_file, index=True, index_label="No", header=True)
Using this answer to a similar question as a template you could try something like:
import pandas as pd
data = {"Index" : [0,1], "A": [1, 1], "B": [2, 2], "C": [3,3]}
df = pd.DataFrame(data)
df.set_index("Index", inplace=True)
date_a = 19022023
date_b = 20022023
f = open('foo', 'a')
f.write(f"Info_1, {date_a}, Info_2, {date_b}\n")
df.to_csv(f, sep=",", header=True)
f.close()
>>> more foo
Info_1, 19022023, Info_2, 20022023
Index,A,B,C
0,1,2,3
1,1,2,3

How to merge excel file without losing its format

So here is a sample of my excel layout:
But after merging it has two header and loses the layout.
Here is my code:
import pandas as pd
import glob
path = r"C:/Users//"
fname = glob.glob(path + "/*.xlsx")
result_DFs1 = pd.DataFrame()
result_DFs2 = pd.DataFrame()
for i in fname:
try:
df1 = pd.read_excel(i,sheet_name = "Test1")
result_DFs1 = pd.concat([result_DFs1, df1])
except:
pass
for i in fname:
try:
df2 = pd.read_excel(i,sheet_name = "Test2")
result_DFs2 = pd.concat([result_DFs2, df2])
except:
pass
with pd.ExcelWriter('pandas_to_excel.xlsx') as writer:
result_DFs1.to_excel (writer, sheet_name='Test1')
result_DFs2.to_excel (writer, sheet_name='Test2')
Is there a way I can just have one header and without losing the excel layout format?
You can keep track of your sheets and only include headers for the first one. Something like:
first = True
for i in fname:
try:
if first:
df1 = pd.read_excel(i,sheet_name = "Test1", skiprows=0, header=0)
first = False
else:
df1 = pd.read_excel(i,sheet_name = "Test1", skiprows=1, header=None)
result_DFs1 = pd.concat([result_DFs1, df1])
except:
pass

Change Colume Name in dataframe and melt it

I am have a code to merge few excel together using Python, but i cant realy rename any thing in that dataframe using df.rename(). could someone explain why? Thanks!
import os
import xlrd
import pandas as pd
def file_name(file_dir):
list=[]
for file in os.listdir(file_dir):
if os.path.splitext(file)[1] == '.xlsx':
list.append(file)
return list
path = r'E:\Sync\External\Test'
wks = file_name(path)
data = []
for i in range(len(wks)):
read_xlsx = xlrd.open_workbook(path + '\\' + wks[i])
sheet1 = read_xlsx.sheets()[1]
nrow = sheet1.nrows
title = sheet1.row_values(0)
location = os.path.splitext(wks[i])[0]
for j in range(6,nrow):
a = sheet1.row_values(j)
a.insert(0,location)
print(a)
data.append(a)
content= pd.DataFrame(data)
content.rename({'0': 'X', '1': 'Y'}, axis=1, inplace=True)
#content.to_csv(path+'\\test.xlsx', sep=',', header=True, index=False)
content.to_excel(path+'\\test.xlsx', header=True, index=False)
Code as above, no error shows,but it's just doesn't work (rename part)

i want to write looping dataframe to excel

1.I am new to python.this task for mainly read the excel files in directory and filter the data in excel. After filtering write into excel.When iam trying to write to excel its storing only last iteration values.Please give advise to write all data to excel . I want to write df_filter and df_filter1 to excel which is for loop .Please help me i need to write these dataframe to excell
import os
import xlrd
import pandas as pd
import xlwt
from openpyxl import load_workbook
import xlsxwriter
from pyexcelerate import Workbook
import numpy as np
from pandas import ExcelWriter
from tempfile import TemporaryFile
ALL_SHEETS = []
sheet_list = ""
file_path = os.path.join(input("enter Dir path"))
config_path = os.path.join(input("enter your config file path here"))
output_path = os.path.join(input("Dude where you want store outputfile"))
output1 = pd.ExcelWriter(output_path, engine='xlsxwriter')
ALL_SHEETS = [os.path.join(file_path, f) for f in os.listdir(file_path)
if os.path.isfile(os.path.join(file_path, f))
and f.endswith('.xlsx')]
i = 0
data1 = []
data = []
Packet_size = []
Trail_numbers = []
Though_put = []
Latency = []
Jitter = []
df_filter = pd.DataFrame(columns=['packetsize', 'throughput', 'latency (us)', 'jitter (us)'])
df_filter1 = pd.DataFrame(columns=['packetsize', 'throughput', 'latency (us)', 'jitter (us)'])
#df_sheet = pd.DataFrame(columns=['zsheet'])
merged_inner=pd.DataFrame([])
def sheets(val):
s = wb.worksheets[val]
df_sheet = pd.DataFrame( data=['%s' % str(s) + '\n'])
#Name_sheet(s)
HeaderList = pd.read_csv(config_path)
column_list = []
for col in HeaderList:
col = col.lstrip("'")
col = col.rstrip("'")
column_list.append(col)
df1 = xl.parse(sheet_list[val], skiprows=i)
df1 = df1.filter(column_list)
df2 = df1[(df1['Result'] != 'Failed') & (df1['Frame Size Type'] == 'iMIX')]
if df2.empty:
pass
else:
final3= df2.groupby(['Trial Number', 'iMIX Distribution'], sort=False).apply(lambda x: x.loc[x['Throughput (%)'].idxmax()])
#df_filter['sheetaname']=df_sheet(lambda a:'%s' % a['sheetvise'],axis=1)
final = final3.groupby(['iMIX Distribution'], sort=False).apply(lambda x: x.loc[x['Throughput (%)'].idxmax()])
df_filter['packetsize'] = final.apply(lambda z: '%s' % (z['iMIX Distribution']), axis=1)
df_filter['throughput'] = final.apply(lambda z: '%s' % (z['Throughput (%)']), axis=1)
df_filter['latency (us)'] = final.apply(lambda x: '%s/%s/%s' % (x['Minimum Latency (us)'], x['Maximum Latency (us)'], x['Average Latency (us)']),axis=1)
df_filter['jitter (us)'] = final.apply(lambda y: '%s/%s/%s' % (y['Minimum Jitter (us)'], y['Maximum Jitter (us)'], y['Average Jitter (us)']),axis=1)
df_filter.to_excel(output1,sheet_name='mani')
output1.save()
df_filter.to_excel(output1, startrow=len(df_filter1)+len(df_filter)+2,sheet_name='mani')
output1.save()
df3 = df1[(df1['Result'] != 'Failed') & (df1['Frame Size Type'] == 'Fixed')]
if df3.empty:
pass
else:
final2 = df3.groupby(['Trial Number', 'Configured Frame Size'], sort=False).apply(lambda x: x.loc[x['Throughput (%)'].idxmax()])
final1=final2.groupby(['Configured Frame Size'],sort=False).apply(lambda x: x.loc[x['Throughput (%)'].idxmax()])
df_filter1['packetsize'] = final1.apply(lambda z: '%s' % (z['Configured Frame Size']), axis=1)
df_filter1['throughput'] = final1.apply(lambda z: '%s' % (z['Throughput (%)']), axis=1)
df_filter1['latency (us)'] = final1.apply(lambda x: '%s/%s/%s' % (x['Minimum Latency (us)'], x['Maximum Latency (us)'], x['Average Latency (us)']),axis=1)
df_filter1['jitter (us)'] = final1.apply(lambda y: '%s/%s/%s' % (y['Minimum Jitter (us)'], y['Maximum Jitter (us)'], y['Average Jitter (us)']),axis=1)
df_filter1.to_excel(output1, sheet_name='mani')
df_filter1.to_excel(output1, startrow=len(df_filter1)+len(df_filter) + 2, sheet_name='mani')
output1.save()
def sheet_every():
for sheet in range(0, sheet_list_lenght):
sheets(sheet)
for file in (ALL_SHEETS):
df_file = pd.DataFrame(data=[file])
workbook = xlrd.open_workbook(file)
wb = load_workbook(file)
xl = pd.ExcelFile(file)
i = 0
sheet_list = workbook.sheet_names()
sheet_list_lenght = (len(sheet_list))
for sheet in sheet_list:
worksheet = workbook.sheet_by_name(sheet)
for i in range(0, worksheet.nrows):
row = worksheet.row_values(i)
if 'Trial Number' in row:``
break
sheet_every()
Not sure if this answers your question or not, but if you want to read from a dataframe and add rows to a new dataframe thorugh a loop you can refer the code below:
dummyData = pd.read_csv("someexcelfile.csv")
#You can merge mutiple dataframes into dummyData and make it a big dataframe
dummyInsertTable = pd.DataFrame(columns=["Col1","Col2","Col3"])
for i in range(len(dummyData)):
dummyInsertTable.loc[i,"Col1"] = dummyData["Col1"][i]
dummyInsertTable.loc[i, "Col2"] = dummyData["Col2"][i]
dummyInsertTable.loc[i, "Col3"] = dummyData["Col3"][i]
dummyInsertTable.to_csv("writeCSVFile.csv")
And next time be precise where you are facing the problem.
EDIT
Try loading the first dataframe and then loop through the other files and append the files in the first dataframe. Refer the code:
import pandas as pd
#Make a list of all the file you have
filesList = ["/home/bhushan/firstFile.csv","/home/bhushan/secondFile.csv","/home/bhushan/thirdFile.csv","/home/bhushan/fourthFile.csv"]
#Read the first csv file using pandas.read_csv
firstFile = pd.read_csv(filesList[0])
#Loop through the rest of the files and append the files in the first DataFrame
for i in range(1,len(filesList)):
fileToBeAdded = pd.read_csv(filesList[i])
firstFile = firstFile.append(fileToBeAdded)
#Write the final file
finalFile = firstFile
finalFile.to_csv("finalFile.csv")
If I get your question correctly, you have two data frames which you want to write to one excel file but you are only getting the last one.
You should write them to two different sheets instead, then you can retrieve them as per requirement, either individually or combined.
Follow the below links for more details and implementation :
https://xlsxwriter.readthedocs.io/example_pandas_multiple.html
https://campus.datacamp.com/courses/importing-managing-financial-data-in-python/importing-stock-listing-data-from-excel?ex=11
Also, you can instead write to a csv file, that is also excel compatible and easier to handle. Also I have observed that it is faster and more space efficient compared to writing to .xlsx file.
https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_csv.html

TypeError: invalid file: None when exporting dataframes with xlsxWriter

I've been using XLSXWriter on my Python/Flask app to export dataframes to Excel. Until today, I managed to export my dataframes on one worksheet; now I'm trying to export multiple dataframes on different worksheets inside one single file.
When I try locally (see first code attached), everything works perfectly. But when I write to an IOStream, I receive the following error:
TypeError: invalid file: None
Here is the functional local code:
path_user = expanduser("~").split("\\")[-1]
xlsx_path = r"C:\Users\{}\Desktop\ExportAllGraphs2.xlsx".format(path_user)
writer = pd.ExcelWriter(xlsx_path, engine='xlsxwriter')
count = 0
for df in df_list:
sheet_title = df["title"].split(' ', 1)[0]
sheet_name = "{}_{}".format(count, sheet_title)
df_header.to_excel(writer,startrow = 0, merge_cells = False, sheet_name = sheet_name)
df["df"].to_excel(writer,startrow = len(df_header) + 5, merge_cells = False, sheet_name = sheet_name)
workbook = writer.book
worksheet = writer.sheets[sheet_name]
format = workbook.add_format()
format.set_bg_color('#eeeeee')
worksheet.set_column(0,50,20)
worksheet.write(len(df_header) + 4, 1, df["title"])
count += 1
writer.close()
os.system('start excel.exe "{}"'.format(xlsx_path))
and here is the non-functioning live code:
output = BytesIO()
writer = pd.ExcelWriter(output, engine='xlsxwriter')
count = 0
for df in df_list:
sheet_title = df["title"].split(' ', 1)[0]
sheet_name = "{}_{}".format(count, sheet_title)
df_header.to_excel(writer,startrow = 0, merge_cells = False, sheet_name = sheet_name)
df["df"].to_excel(writer,startrow = len(df_header) + 5, merge_cells = False, sheet_name = sheet_name)
workbook = writer.book
worksheet = writer.sheets[sheet_name]
format = workbook.add_format()
format.set_bg_color('#eeeeee')
worksheet.set_column(0,50,20)
worksheet.write(len(df_header) + 4, 1, df["title"])
count += 1
writer.close()
output.seek(0)
return send_file(output,
attachment_filename="test_export_{}.xlsx".format(current_date),
as_attachment=True)
Am I missing anything?

Categories