Add worksheet to existing Excel file with pandas - python

# Set the working folder to the same folder as the script
os.chdir(os.path.dirname(os.path.abspath(__file__)))
test = send_request().content
df = pd.read_csv(io.StringIO(test.decode('utf-8')))
writer = pd.ExcelWriter('NHL_STATS_JSB_final.xlsx', \
engine = 'xlsxwriter')
df.to_excel(writer, 'Player statistics', index=False)
writer.save()
I don't understand why, but I am trying to add the worksheet Player statistics to my current NHL_STATS_JSB_final.xlsx file, but it is not working. Instead of adding the worksheet to the file, my code use the current file and erase all previous worksheet to add the new one.
How could I add Player statistics to my current Excel file with erasing all other worksheets?

Here is a snippet of code from one of my projects. This should do exactly what you want. You need to use openpyxl rather than xlsxwriter to allow you to update an existing file.
writer = pd.ExcelWriter(file_name, engine='openpyxl')
if os.path.exists(file_name):
book = openpyxl.load_workbook(file_name)
writer.book = book
df.to_excel(writer, sheet_name=key)
writer.save()
writer.close()

As the OP mentioned, xlsxwriter will overwrite your existing workbook. Xlsxwriter is for writing original .xlsx files. Openpyxl, on the other hand, can modify existing .xlsx files.
#Brad Campbell answer using openpyxl is the best way to do this. Since the OP was using the xlsxwriter engine, I wanted to demonstrate that it is possible to read in your existing .xlsx file and then create a new workbook (of the same name) containing that data from the original sheets and the new sheet that you'd like to add on.
import pandas as pd
import os
xl = pd.ExcelFile('NHL_STATS_JSB_final.xlsx')
sheet_names = xl.sheet_names # a list of existing sheet names
#the next three lines are OPs original code
os.chdir(os.path.dirname(os.path.abspath(__file__)))
test = send_request().content
df = pd.read_csv(io.StringIO(test.decode('utf-8')))
#beginning the process of creating new workbook with the same name
writer = pd.ExcelWriter('NHL_STATS_JSB_final.xlsx', engine = 'xlsxwriter')
d = {} #creating an empty dictionary
for i in range (0, len(sheet_names)):
current_sheet_name = sheet_names[i]
d[current_sheet_name] = pd.read_excel('NHL_STATS_JSB_final.xlsx', sheetname = i)
d[current_sheet_name].to_excel(writer, '%s' % (current_sheet_name), index=False)
# adding in the new worksheet
df.to_excel(writer, 'Player statistics', index=False)
writer.save()

# I needed to append tabs to a workbook only if data existed
# OP wants to append sheets to a workbook.
# using mode 'a' appends if the file exists
# mode 'w' creates a new file if failed to append.
# ended up with this:
def create_POC_file_tab(df, sheetname):
# within function before the 'if' code below, prep data.
# Like extracting df_SA values from df,
# building POC_file name using df_SA+date, etc.
#
# might not have data after filtering so check length.
if len(df_SA) > 0: # extracted dataframe contains data
# Have data so finalize workbook path/name
POC_file = PATH + POC_file # build file path
try:
# mode='a' tries to append a new tab if the
# workbook exists already
writer_SA = pd.ExcelWriter(POC_file + ' ' +
process_date + '.xlsx', engine='openpyxl', mode='a')
print(POC, 'File exists. Appending to POC',POC,sheetname)
except:
# mode='w' creates a new workbook if one does not exist
writer_SA = pd.ExcelWriter(POC_file + ' ' +
process_date + '.xlsx', engine='openpyxl', mode='w')
print(POC, ' !!! Creating !!! ', sheetname)
try:
df_SA.to_excel(writer_SA, sheet_name=sheetname,
index=False)
writer_SA.save()
except:
print ("error on writing sheetname: ", sheetname,
"for: ",POC)
return
# when I exit the file seems to be closed properly.
# In brief, to append a new tab to a workbook use:
writer=pd.ExcelWriter('filename.xlsx',engine='openpyxl', mode='a')
df.to_excel(writer, sheet_name='my_sheet_name', index=False)
writer_SA.save()

Related

Appending sheets to Excel in Pandas

I am trying to add a new excel sheet for every day.
I have some parts of this working os path checks if the file exists and if it doesn't it makes a new one.
Else it should just append data to a new sheet labeled with the date. It works to actually make the sheet but it overwrites any previous data.
It also sometimes throws this error when it gets to the for loop: zipfile.BadZipFile(File is not a zip file)
As a side issue I believe it is also overwriting data on the sheet when I need to append it but I think its the same problem.
import os
import datetime as dt
import pandas as pd
import xlsxwriter
from openpyxl import load_workbook
filename = "exceltest.xlsx"
current = dt.datetime.now().strftime("%m-%d-%y")
def makeXL():
df = pd.DataFrame({"fname":["First Name"], "lname":["Last Name"], "id":["ID"], "time":["Time"]})
print(df)
lastSheet = ""
if os.path.exists(filename) == False:
writer = pd.ExcelWriter(filename, engine='openpyxl')
df.to_excel(writer, sheet_name=current, index=False)
writer.close()
else:
for sheet in pd.read_excel(filename, engine='openpyxl').sheet_names:
print("sheet",sheet)
lastSheet = sheet
if lastSheet != current:
writer = pd.ExcelWriter(filename, engine='openpyxl')
df.to_excel(writer, sheet_name=current, index=False, mode="a")
writer.close()
makeXL()

Loop update existing excel template

Trying to write a script where I currently have an excel VBA sheet that has two tabs with 1st being a graph and second being a backend file. Backend is updated by a master file. In the master file there is a city column where I want to loop through all the unique city rows write those rows in to the VBA file and save the VBA file with the city's name.
master_backend = pd.read_excel(path)
city = master_backend[(master_backend["City"]=="NY")]
def append_df_to_excel(filename, df, sheet_name='Sheet1', startrow=None,
truncate_sheet=False,
**to_excel_kwargs):
from openpyxl import load_workbook
import pandas as pd
if 'engine' in to_excel_kwargs:
to_excel_kwargs.pop('engine')
writer = pd.ExcelWriter(filename, engine='openpyxl')
try:
FileNotFoundError
except NameError:
FileNotFoundError = IOError
try:
writer.book = load_workbook(filename, keep_vba = True)
if startrow is None and sheet_name in writer.book.sheetnames:
startrow = writer.book[sheet_name].max_row
if truncate_sheet and sheet_name in writer.book.sheetnames:
idx = writer.book.sheetnames.index(sheet_name)
writer.book.remove(writer.book.worksheets[idx])
writer.book.create_sheet(sheet_name, idx)
writer.sheets = {ws.title:ws for ws in writer.book.worksheets}
except FileNotFoundError:
pass
if startrow is None:
startrow = 0
df.to_excel(writer, sheet_name, startrow=startrow, **to_excel_kwargs)
writer.save()
Essentially what I want is 5 files since there are 5 cities all named with their city name
as I don't know VBA and you posted this under the python tag I'll provide my take on this.
assuming your datasheet is called file you could try something like this :
import shutil
for city in master_backend.City.unique():
df = master_backend.loc[master_backend.City == city]
shutil.copy(file,f"{city}.xlsx")
append_df_excel(f"{city}.xlsx", df,sheet_name='Backend')
cracking function btw, I would use put some doc strings in it for easy of use : )
I think you can simplify this script significantly by understanding that pandas will create a dataframe for you when you read the excel file. Then it's just a simple matter of collecting the info you want from the dataframe and re-writting it to a file. It's unclear what you want in your new file, but suppose you just want to filter the second sheet and keep everything in the first sheet it might look like this.
# Open the file,
# NOTE: when you open the file, if there are multiple sheets
# then the result is a dictionary of dataframes keyed on the sheet name
master_data = pd.read_excel(file_path, ....)
# Assuming second sheet name is 'City'
city_df=master_data['City']
# Replace 'columnName' with the name of the column (if includes headers) or column number
for city in pd.unique(city_df['columnName']):
with pd.ExcelWriter(city + '.xlsx') as writer:
master_data['Sheet1'].to_excel(writer, sheet_name='Sheet1')
city_df[city_df['columnName']==city].to_excel(writer, sheet_name='City')

Open existing workbook with ExcelWriter [duplicate]

I use pandas to write to excel file in the following fashion:
import pandas
writer = pandas.ExcelWriter('Masterfile.xlsx')
data_filtered.to_excel(writer, "Main", cols=['Diff1', 'Diff2'])
writer.save()
Masterfile.xlsx already consists of number of different tabs. However, it does not yet contain "Main".
Pandas correctly writes to "Main" sheet, unfortunately it also deletes all other tabs.
Pandas docs says it uses openpyxl for xlsx files. Quick look through the code in ExcelWriter gives a clue that something like this might work out:
import pandas
from openpyxl import load_workbook
book = load_workbook('Masterfile.xlsx')
writer = pandas.ExcelWriter('Masterfile.xlsx', engine='openpyxl')
writer.book = book
## ExcelWriter for some reason uses writer.sheets to access the sheet.
## If you leave it empty it will not know that sheet Main is already there
## and will create a new sheet.
writer.sheets = dict((ws.title, ws) for ws in book.worksheets)
data_filtered.to_excel(writer, "Main", cols=['Diff1', 'Diff2'])
writer.save()
UPDATE: Starting from Pandas 1.3.0 the following function will not work properly, because functions DataFrame.to_excel() and pd.ExcelWriter() have been changed - a new if_sheet_exists parameter has been introduced, which has invalidated the function below.
Here you can find an updated version of the append_df_to_excel(), which is working for Pandas 1.3.0+.
Here is a helper function:
import os
from openpyxl import load_workbook
def append_df_to_excel(filename, df, sheet_name='Sheet1', startrow=None,
truncate_sheet=False,
**to_excel_kwargs):
"""
Append a DataFrame [df] to existing Excel file [filename]
into [sheet_name] Sheet.
If [filename] doesn't exist, then this function will create it.
#param filename: File path or existing ExcelWriter
(Example: '/path/to/file.xlsx')
#param df: DataFrame to save to workbook
#param sheet_name: Name of sheet which will contain DataFrame.
(default: 'Sheet1')
#param startrow: upper left cell row to dump data frame.
Per default (startrow=None) calculate the last row
in the existing DF and write to the next row...
#param truncate_sheet: truncate (remove and recreate) [sheet_name]
before writing DataFrame to Excel file
#param to_excel_kwargs: arguments which will be passed to `DataFrame.to_excel()`
[can be a dictionary]
#return: None
Usage examples:
>>> append_df_to_excel('d:/temp/test.xlsx', df)
>>> append_df_to_excel('d:/temp/test.xlsx', df, header=None, index=False)
>>> append_df_to_excel('d:/temp/test.xlsx', df, sheet_name='Sheet2',
index=False)
>>> append_df_to_excel('d:/temp/test.xlsx', df, sheet_name='Sheet2',
index=False, startrow=25)
(c) [MaxU](https://stackoverflow.com/users/5741205/maxu?tab=profile)
"""
# Excel file doesn't exist - saving and exiting
if not os.path.isfile(filename):
df.to_excel(
filename,
sheet_name=sheet_name,
startrow=startrow if startrow is not None else 0,
**to_excel_kwargs)
return
# ignore [engine] parameter if it was passed
if 'engine' in to_excel_kwargs:
to_excel_kwargs.pop('engine')
writer = pd.ExcelWriter(filename, engine='openpyxl', mode='a')
# try to open an existing workbook
writer.book = load_workbook(filename)
# get the last row in the existing Excel sheet
# if it was not specified explicitly
if startrow is None and sheet_name in writer.book.sheetnames:
startrow = writer.book[sheet_name].max_row
# truncate sheet
if truncate_sheet and sheet_name in writer.book.sheetnames:
# index of [sheet_name] sheet
idx = writer.book.sheetnames.index(sheet_name)
# remove [sheet_name]
writer.book.remove(writer.book.worksheets[idx])
# create an empty sheet [sheet_name] using old index
writer.book.create_sheet(sheet_name, idx)
# copy existing sheets
writer.sheets = {ws.title:ws for ws in writer.book.worksheets}
if startrow is None:
startrow = 0
# write out the new sheet
df.to_excel(writer, sheet_name, startrow=startrow, **to_excel_kwargs)
# save the workbook
writer.save()
Tested with the following versions:
Pandas 1.2.3
Openpyxl 3.0.5
With openpyxlversion 2.4.0 and pandasversion 0.19.2, the process #ski came up with gets a bit simpler:
import pandas
from openpyxl import load_workbook
with pandas.ExcelWriter('Masterfile.xlsx', engine='openpyxl') as writer:
writer.book = load_workbook('Masterfile.xlsx')
data_filtered.to_excel(writer, "Main", cols=['Diff1', 'Diff2'])
#That's it!
Starting in pandas 0.24 you can simplify this with the mode keyword argument of ExcelWriter:
import pandas as pd
with pd.ExcelWriter('the_file.xlsx', engine='openpyxl', mode='a') as writer:
data_filtered.to_excel(writer)
I know this is an older thread, but this is the first item you find when searching, and the above solutions don't work if you need to retain charts in a workbook that you already have created. In that case, xlwings is a better option - it allows you to write to the excel book and keeps the charts/chart data.
simple example:
import xlwings as xw
import pandas as pd
#create DF
months = ['2017-01','2017-02','2017-03','2017-04','2017-05','2017-06','2017-07','2017-08','2017-09','2017-10','2017-11','2017-12']
value1 = [x * 5+5 for x in range(len(months))]
df = pd.DataFrame(value1, index = months, columns = ['value1'])
df['value2'] = df['value1']+5
df['value3'] = df['value2']+5
#load workbook that has a chart in it
wb = xw.Book('C:\\data\\bookwithChart.xlsx')
ws = wb.sheets['chartData']
ws.range('A1').options(index=False).value = df
wb = xw.Book('C:\\data\\bookwithChart_updated.xlsx')
xw.apps[0].quit()
Old question, but I am guessing some people still search for this - so...
I find this method nice because all worksheets are loaded into a dictionary of sheet name and dataframe pairs, created by pandas with the sheetname=None option. It is simple to add, delete or modify worksheets between reading the spreadsheet into the dict format and writing it back from the dict. For me the xlsxwriter works better than openpyxl for this particular task in terms of speed and format.
Note: future versions of pandas (0.21.0+) will change the "sheetname" parameter to "sheet_name".
# read a single or multi-sheet excel file
# (returns dict of sheetname(s), dataframe(s))
ws_dict = pd.read_excel(excel_file_path,
sheetname=None)
# all worksheets are accessible as dataframes.
# easy to change a worksheet as a dataframe:
mod_df = ws_dict['existing_worksheet']
# do work on mod_df...then reassign
ws_dict['existing_worksheet'] = mod_df
# add a dataframe to the workbook as a new worksheet with
# ws name, df as dict key, value:
ws_dict['new_worksheet'] = some_other_dataframe
# when done, write dictionary back to excel...
# xlsxwriter honors datetime and date formats
# (only included as example)...
with pd.ExcelWriter(excel_file_path,
engine='xlsxwriter',
datetime_format='yyyy-mm-dd',
date_format='yyyy-mm-dd') as writer:
for ws_name, df_sheet in ws_dict.items():
df_sheet.to_excel(writer, sheet_name=ws_name)
For the example in the 2013 question:
ws_dict = pd.read_excel('Masterfile.xlsx',
sheetname=None)
ws_dict['Main'] = data_filtered[['Diff1', 'Diff2']]
with pd.ExcelWriter('Masterfile.xlsx',
engine='xlsxwriter') as writer:
for ws_name, df_sheet in ws_dict.items():
df_sheet.to_excel(writer, sheet_name=ws_name)
There is a better solution in pandas 0.24:
with pd.ExcelWriter(path, mode='a') as writer:
s.to_excel(writer, sheet_name='another sheet', index=False)
before:
after:
so upgrade your pandas now:
pip install --upgrade pandas
The solution of #MaxU is not working for the updated version of python and related packages. It raises the error:
"zipfile.BadZipFile: File is not a zip file"
I generated a new version of the function that works fine with the updated version of python and related packages and tested with python: 3.9 | openpyxl: 3.0.6 | pandas: 1.2.3
In addition I added more features to the helper function:
Now It resize all columns based on cell content width AND all variables will be visible (SEE "resizeColumns")
You can handle NaN, if you want that NaN are displayed as NaN or as empty cells (SEE "na_rep")
Added "startcol", you can decide to start to write from specific column, oterwise will start from col = 0
Here the function:
import pandas as pd
def append_df_to_excel(filename, df, sheet_name='Sheet1', startrow=None, startcol=None,
truncate_sheet=False, resizeColumns=True, na_rep = 'NA', **to_excel_kwargs):
"""
Append a DataFrame [df] to existing Excel file [filename]
into [sheet_name] Sheet.
If [filename] doesn't exist, then this function will create it.
Parameters:
filename : File path or existing ExcelWriter
(Example: '/path/to/file.xlsx')
df : dataframe to save to workbook
sheet_name : Name of sheet which will contain DataFrame.
(default: 'Sheet1')
startrow : upper left cell row to dump data frame.
Per default (startrow=None) calculate the last row
in the existing DF and write to the next row...
truncate_sheet : truncate (remove and recreate) [sheet_name]
before writing DataFrame to Excel file
resizeColumns: default = True . It resize all columns based on cell content width
to_excel_kwargs : arguments which will be passed to `DataFrame.to_excel()`
[can be dictionary]
na_rep: default = 'NA'. If, instead of NaN, you want blank cells, just edit as follows: na_rep=''
Returns: None
*******************
CONTRIBUTION:
Current helper function generated by [Baggio]: https://stackoverflow.com/users/14302009/baggio?tab=profile
Contributions to the current helper function: https://stackoverflow.com/users/4046632/buran?tab=profile
Original helper function: (c) [MaxU](https://stackoverflow.com/users/5741205/maxu?tab=profile)
Features of the new helper function:
1) Now it works with python 3.9 and latest versions of pandas and openpxl
---> Fixed the error: "zipfile.BadZipFile: File is not a zip file".
2) Now It resize all columns based on cell content width AND all variables will be visible (SEE "resizeColumns")
3) You can handle NaN, if you want that NaN are displayed as NaN or as empty cells (SEE "na_rep")
4) Added "startcol", you can decide to start to write from specific column, oterwise will start from col = 0
*******************
"""
from openpyxl import load_workbook
from string import ascii_uppercase
from openpyxl.utils import get_column_letter
from openpyxl import Workbook
# ignore [engine] parameter if it was passed
if 'engine' in to_excel_kwargs:
to_excel_kwargs.pop('engine')
try:
f = open(filename)
# Do something with the file
except IOError:
# print("File not accessible")
wb = Workbook()
ws = wb.active
ws.title = sheet_name
wb.save(filename)
writer = pd.ExcelWriter(filename, engine='openpyxl', mode='a')
# Python 2.x: define [FileNotFoundError] exception if it doesn't exist
try:
FileNotFoundError
except NameError:
FileNotFoundError = IOError
try:
# try to open an existing workbook
writer.book = load_workbook(filename)
# get the last row in the existing Excel sheet
# if it was not specified explicitly
if startrow is None and sheet_name in writer.book.sheetnames:
startrow = writer.book[sheet_name].max_row
# truncate sheet
if truncate_sheet and sheet_name in writer.book.sheetnames:
# index of [sheet_name] sheet
idx = writer.book.sheetnames.index(sheet_name)
# remove [sheet_name]
writer.book.remove(writer.book.worksheets[idx])
# create an empty sheet [sheet_name] using old index
writer.book.create_sheet(sheet_name, idx)
# copy existing sheets
writer.sheets = {ws.title:ws for ws in writer.book.worksheets}
except FileNotFoundError:
# file does not exist yet, we will create it
pass
if startrow is None:
# startrow = -1
startrow = 0
if startcol is None:
startcol = 0
# write out the new sheet
df.to_excel(writer, sheet_name, startrow=startrow, startcol=startcol, na_rep=na_rep, **to_excel_kwargs)
if resizeColumns:
ws = writer.book[sheet_name]
def auto_format_cell_width(ws):
for letter in range(1,ws.max_column):
maximum_value = 0
for cell in ws[get_column_letter(letter)]:
val_to_check = len(str(cell.value))
if val_to_check > maximum_value:
maximum_value = val_to_check
ws.column_dimensions[get_column_letter(letter)].width = maximum_value + 2
auto_format_cell_width(ws)
# save the workbook
writer.save()
Example Usage:
# Create a sample dataframe
df = pd.DataFrame({'numbers': [1, 2, 3],
'colors': ['red', 'white', 'blue'],
'colorsTwo': ['yellow', 'white', 'blue'],
'NaNcheck': [float('NaN'), 1, float('NaN')],
})
# EDIT YOUR PATH FOR THE EXPORT
filename = r"C:\DataScience\df.xlsx"
# RUN ONE BY ONE IN ROW THE FOLLOWING LINES, TO SEE THE DIFFERENT UPDATES TO THE EXCELFILE
append_df_to_excel(filename, df, index=False, startrow=0) # Basic Export of df in default sheet (Sheet1)
append_df_to_excel(filename, df, sheet_name="Cool", index=False, startrow=0) # Append the sheet "Cool" where "df" is written
append_df_to_excel(filename, df, sheet_name="Cool", index=False) # Append another "df" to the sheet "Cool", just below the other "df" instance
append_df_to_excel(filename, df, sheet_name="Cool", index=False, startrow=0, startcol=5) # Append another "df" to the sheet "Cool" starting from col 5
append_df_to_excel(filename, df, index=False, truncate_sheet=True, startrow=10, na_rep = '') # Override (truncate) the "Sheet1", writing the df from row 10, and showing blank cells instead of NaN
def append_sheet_to_master(self, master_file_path, current_file_path, sheet_name):
try:
master_book = load_workbook(master_file_path)
master_writer = pandas.ExcelWriter(master_file_path, engine='openpyxl')
master_writer.book = master_book
master_writer.sheets = dict((ws.title, ws) for ws in master_book.worksheets)
current_frames = pandas.ExcelFile(current_file_path).parse(pandas.ExcelFile(current_file_path).sheet_names[0],
header=None,
index_col=None)
current_frames.to_excel(master_writer, sheet_name, index=None, header=False)
master_writer.save()
except Exception as e:
raise e
This works perfectly fine only thing is that formatting of the master file(file to which we add new sheet) is lost.
writer = pd.ExcelWriter('prueba1.xlsx'engine='openpyxl',keep_date_col=True)
The "keep_date_col" hope help you
I used the answer described here
from openpyxl import load_workbook
writer = pd.ExcelWriter(p_file_name, engine='openpyxl', mode='a')
writer.book = load_workbook(p_file_name)
writer.sheets = {ws.title:ws for ws in writer.book.worksheets}
df.to_excel(writer, 'Data', startrow=10, startcol=20)
writer.save()
book = load_workbook(xlsFilename)
writer = pd.ExcelWriter(self.xlsFilename)
writer.book = book
writer.sheets = dict((ws.title, ws) for ws in book.worksheets)
df.to_excel(writer, sheet_name=sheetName, index=False)
writer.save()
Solution by #MaxU worked very well. I have just one suggestion:
If truncate_sheet=True is specified than "startrow" should NOT be retained from existing sheet. I suggest:
if startrow is None and sheet_name in writer.book.sheetnames:
if not truncate_sheet: # truncate_sheet would use startrow if provided (or zero below)
startrow = writer.book[sheet_name].max_row
I'd reccommend using xlwings (https://docs.xlwings.org/en/stable/api.html), it is really powerful for this application... This is how I use it:
import xlwings as xw
import pandas as pd
import xlsxwriter
# function to get the active workbook
def getActiveWorkbook():
try:
# logic from xlwings to grab the current excel file
activeWb = xw.books.active
except:
# print error message if unable to get the current workbook
print('Unable to grab the current Workbook')
pause()
exitProgram()
else:
return activeWb
# function that returns the last row number and last cell of a sheet
def getLastRow(myBook, sheetName):
lastRow = myBook.sheets[sheetName].range("A1").current_region.last_cell.row
lastCol = str(xlsxwriter.utility.xl_col_to_name(myBook.sheets[sheetName].range("A1").current_region.last_cell.column))
return str(lastRow), lastCol + str(lastRow)
activeWb = getActiveWorkbook()
df = pd.DataFrame(data=[1,2,3])
# look at worksheet = Part Number Status
sheetName = "Sheet1"
ws = activeWb.sheets[sheetName]
lastRow, lastCell = getLastRow(activeWb, sheetName)
if int(lastRow) > 1:
ws.range("A1:" + lastCell).clear()
ws.range("A1").options(index=False, header=False).value = df.fillna('')
This seems to work very well for my applications because .xlsm workbooks can be very tricky. You can execute this as a python script or turn it into and executable with pyinstaller and then run the .exe through an excel macro. You can also call VBA macros from Python using xlwings which is very useful.
You can write to an existing Excel file without overwriting data using pandas by using the pandas.DataFrame.to_excel() method and specifying the mode parameter as 'a' (append mode).
Here's an example:
import pandas as pd
# Create a sample DataFrame
df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9]})
# Write the DataFrame to an existing Excel file in append mode
df.to_excel('existing_file.xlsx', engine='openpyxl', mode='a', index=False, sheet_name='Sheet1')
Method:
Can create file if not present
Append to existing excel as per sheet name
import pandas as pd
from openpyxl import load_workbook
def write_to_excel(df, file):
try:
book = load_workbook(file)
writer = pd.ExcelWriter(file, engine='openpyxl')
writer.book = book
writer.sheets = dict((ws.title, ws) for ws in book.worksheets)
df.to_excel(writer, **kwds)
writer.save()
except FileNotFoundError as e:
df.to_excel(file, **kwds)
Usage:
df_a = pd.DataFrame(range(10), columns=["a"])
df_b = pd.DataFrame(range(10, 20), columns=["b"])
write_to_excel(df_a, "test.xlsx", sheet_name="Sheet a", columns=['a'], index=False)
write_to_excel(df_b, "test.xlsx", sheet_name="Sheet b", columns=['b'])

Adding a pandas.DataFrame to Existing Excel File

I have a web scraper which creates an excel file for this month's scrapes. I want to add today's scrape and every scrape for that month into that file as a new sheet every time it is run. My issue, however, has been that it only overwrites the existing sheet with a new sheet instead of adding it as a separate new sheet. I've tried to do it with xlrd, xlwt, pandas, and openpyxl.
Still brand new to Python so simplicity is appreciated!
Below is just the code dealing with writing the excel file.
# My relevant time variables
ts = time.time()
date_time = datetime.datetime.fromtimestamp(ts).strftime('%y-%m-%d %H_%M_%S')
HourMinuteSecond = datetime.datetime.fromtimestamp(ts).strftime('%H_%M_%S')
month = datetime.datetime.now().strftime('%m-%y')
# Creates a writer for this month and year
writer = pd.ExcelWriter(
'C:\\Users\\G\\Desktop\\KickstarterLinks(%s).xlsx' % (month),
engine='xlsxwriter')
# Creates dataframe from my data, d
df = pd.DataFrame(d)
# Writes to the excel file
df.to_excel(writer, sheet_name='%s' % (HourMinuteSecond))
writer.save()
Update:
This functionality has been added to pandas 0.24.0:
ExcelWriter now accepts mode as a keyword argument, enabling append to existing workbooks when using the openpyxl engine (GH3441)
Previous version:
Pandas has an open feature request for this.
In the mean time, here is a function which adds a pandas.DataFrame to an existing workbook:
Code:
def add_frame_to_workbook(filename, tabname, dataframe, timestamp):
"""
Save a dataframe to a workbook tab with the filename and tabname
coded to timestamp
:param filename: filename to create, can use strptime formatting
:param tabname: tabname to create, can use strptime formatting
:param dataframe: dataframe to save to workbook
:param timestamp: timestamp associated with dataframe
:return: None
"""
filename = timestamp.strftime(filename)
sheet_name = timestamp.strftime(tabname)
# create a writer for this month and year
writer = pd.ExcelWriter(filename, engine='openpyxl')
try:
# try to open an existing workbook
writer.book = load_workbook(filename)
# copy existing sheets
writer.sheets = dict(
(ws.title, ws) for ws in writer.book.worksheets)
except IOError:
# file does not exist yet, we will create it
pass
# write out the new sheet
dataframe.to_excel(writer, sheet_name=sheet_name)
# save the workbook
writer.save()
Test Code:
import datetime as dt
import pandas as pd
from openpyxl import load_workbook
data = [x.strip().split() for x in """
Date Close
2016-10-18T13:44:59 2128.00
2016-10-18T13:59:59 2128.75
""".split('\n')[1:-1]]
df = pd.DataFrame(data=data[1:], columns=data[0])
name_template = './sample-%m-%y.xlsx'
tab_template = '%d_%H_%M'
now = dt.datetime.now()
in_an_hour = now + dt.timedelta(hours=1)
add_frame_to_workbook(name_template, tab_template, df, now)
add_frame_to_workbook(name_template, tab_template, df, in_an_hour)
(Source)

Write a pandas df into Excel and save it into a copy

I have a pandas dataframe and I want to open an existing excel workbook containing formulas, copying the dataframe in a specific set of columns (lets say from column A to column H) and save it as a new file with a different name.
The idea is to update an existing template, populate it with the dataframe in a specified set of column and then save a copy of the Excel file with a different name.
Any idea?
What I have is:
import pandas
from openpyxl import load_workbook
book = load_workbook('Template.xlsx')
writer = pandas.ExcelWriter('Template.xlsx', engine='openpyxl')
writer.book = book
writer.sheets = dict((ws.title, ws) for ws in book.worksheets)
df.to_excel(writer)
writer.save()
The below should work, assuming that you are happy to copy into column A. I don't see a way to write into the sheet starting in a different column (without overwriting anything).
The below incorporates #MaxU's suggestion of copying the template sheet before writing to it (having just lost a few hours' work on my own template workbook to pd.to_excel)
import pandas as pd
from openpyxl.utils.dataframe import dataframe_to_rows
from shutil import copyfile
template_file = 'Template.xlsx' # Has a header in row 1 already
output_file = 'Result.xlsx' # What we are saving the template as
# Copy Template.xlsx as Result.xlsx
copyfile(template_file, output_file)
# Read in the data to be pasted into the termplate
df = pd.read_csv('my_data.csv')
# Load the workbook and access the sheet we'll paste into
wb = load_workbook(output_file)
ws = wb.get_sheet_by_name('Existing Result Sheet')
# Selecting a cell in the header row before writing makes append()
# start writing to the following line i.e. row 2
ws['A1']
# Write each row of the DataFrame
# In this case, I don't want to write the index (useless) or the header (already in the template)
for r in dataframe_to_rows(df, index=False, header=False):
ws.append(r)
wb.save(output_file)
try this:
df.to_excel(writer, startrow=10, startcol=1, index=False, engine='openpyxl')
Pay attention at startrow and startcol parameters

Categories