How to add sheet to existing excel file with pandas?

How to add sheet to existing excel file with pandas? - python

import pandas as pd
from openpyxl import load_workbook
book = load_workbook('test.xlsx')
writer = pd.ExcelWriter('test.xlsx')
writer.book = book
writer.sheets = dict((ws.title, ws) for ws in book.worksheets)
df = pd.DataFrame({'a':[1,3,5,7,4,5,6,4,7,8,9],
'b':[3,5,6,2,4,6,7,8,7,8,9]})
df.to_excel(writer, sheet_name='tab_name', index = False)
writer.save()
I get an error
AttributeError: 'Workbook' object has no attribute 'add_worksheet'

import pandas as pd
from openpyxl import load_workbook
fn = 'test.xlsx'
df = pd.read_excel(fn, header=None)
df2 = pd.DataFrame({'a':[1,3,5,7,4,5,6,4,7,8,9],
'b':[3,5,6,2,4,6,7,8,7,8,9]})
writer = pd.ExcelWriter(fn, engine='openpyxl')
book = load_workbook(fn)
writer.book = book
writer.sheets = dict((ws.title, ws) for ws in book.worksheets)
df.to_excel(writer, sheet_name='tab_name', header=None, index=False)
df2.to_excel(writer, sheet_name='tab_name', header=None, index=False,
startcol=7,startrow=6)
writer.save()

Related

Property 'sheets' of 'OpenpyxlWriter' object has no setter using pandas and openpyxl

This code used to get a xlsx file and write over it, but after updating from pandas 1.1.5 to 1.5.1 I got zipfile.badzipfile file is not a zip file
Then I read here that after pandas 1.2.0 the pd.ExcelWriter(report_path, engine='openpyxl') creates a new file but as this is a completely empty file, openpyxl cannot load it.
Knowing that, I changed the code to this one, but now I'm getting AttributeError: property 'sheets' of 'OpenpyxlWriter' object has no setter. How should I handle this?
book = load_workbook('Resultados.xlsx')
writer = pd.ExcelWriter('Resultados.xlsx', engine='openpyxl')
writer.book = book
writer.sheets = dict((ws.title, ws) for ws in book.worksheets)
reader = pd.read_excel(r'Resultados.xlsx')
df = pd.DataFrame.from_dict(dict_)
df.to_excel(writer, index=False, header=False, startrow=len(reader) + 1)
writer.close()

TLDR
Use .update to modify writer.sheets
Rearrange the order of your script to get it working
# run before initializing the ExcelWriter
reader = pd.read_excel("Resultados.xlsx", engine="openpyxl")
book = load_workbook("Resultados.xlsx")
# use `with` to avoid other exceptions
with pd.ExcelWriter("Resultados.xlsx", engine="openpyxl") as writer:
writer.book = book
writer.sheets.update(dict((ws.title, ws) for ws in book.worksheets))
df.to_excel(writer, index=False, header=False, startrow=len(reader)+1)
Details
Recreating your problem with some fake data
import numpy as np
from openpyxl import load_workbook
import pandas as pd
if __name__ == "__main__":
# make some random data
np.random.seed(0)
df = pd.DataFrame(np.random.random(size=(5, 5)))
# this makes an existing file
with pd.ExcelWriter("Resultados.xlsx", engine="openpyxl") as writer:
df.to_excel(excel_writer=writer)
# make new random data
np.random.seed(1)
df = pd.DataFrame(np.random.random(size=(5, 5)))
# what you tried...
book = load_workbook("Resultados.xlsx")
writer = pd.ExcelWriter("Resultados.xlsx", engine="openpyxl")
writer.book = book
writer.sheets = dict((ws.title, ws) for ws in book.worksheets)
reader = pd.read_excel("Resultados.xlsx")
# skipping this step as we defined `df` differently
# df = pd.DataFrame.from_dict(dict_)
df.to_excel(writer, index=False, header=False, startrow=len(reader)+1)
writer.close()
We get the same error plus a FutureWarning
...\StackOverflow\answer.py:23: FutureWarning: Setting the `book` attribute is not part of the public API, usage can give unexpected or corrupted results and will be removed in a future version
writer.book = book
Traceback (most recent call last):
File "...\StackOverflow\answer.py", line 24, in <module>
writer.sheets = dict((ws.title, ws) for ws in book.worksheets)
AttributeError: can't set attribute 'sheets'
The AttributeError is because sheets is a property of the writer instance. If you're unfamiliar with it, here is a resource.
In shorter terms, the exception is raised because sheets cannot be modified in the way you're trying. However, you can do this:
# use the `.update` method
writer.sheets.update(dict((ws.title, ws) for ws in book.worksheets))
That will move us past the the AttributeError, but we'll hit a ValueError a couple lines down:
reader = pd.read_excel("Resultados.xlsx")
Traceback (most recent call last):
File "...\StackOverflow\answer.py", line 26, in <module>
reader = pd.read_excel("Resultados.xlsx")
...
File "...\lib\site-packages\pandas\io\excel\_base.py", line 1656, in __init__
raise ValueError(
ValueError: Excel file format cannot be determined, you must specify an engine manually.
Do what the error message says and supply an argument to the engine parameter
reader = pd.read_excel("Resultados.xlsx", engine="openpyxl")
And now we're back to your original zipfile.BadZipFile exception
Traceback (most recent call last):
File "...\StackOverflow\answer.py", line 26, in <module>
reader = pd.read_excel("Resultados.xlsx", engine="openpyxl")
...
File "...\Local\Programs\Python\Python310\lib\zipfile.py", line 1334, in _RealGetContents
raise BadZipFile("File is not a zip file")
zipfile.BadZipFile: File is not a zip file
After a bit of toying, I noticed that the Resultados.xlsx file could not be opened manually after running this line:
writer = pd.ExcelWriter("Resultados.xlsx", engine="openpyxl")
So I reordered some of the steps in your code:
# run before initializing the ExcelWriter
reader = pd.read_excel("Resultados.xlsx", engine="openpyxl")
book = load_workbook("Resultados.xlsx")
# the old way
# writer = pd.ExcelWriter("Resultados.xlsx", engine="openpyxl")
with pd.ExcelWriter("Resultados.xlsx", engine="openpyxl") as writer:
writer.book = book
writer.sheets.update(dict((ws.title, ws) for ws in book.worksheets))
df.to_excel(writer, index=False, header=False, startrow=len(reader)+1)

try this:
filepath = r'Resultados.xlsx'
with pd.ExcelWriter(
filepath,
engine='openpyxl',
mode='a',
if_sheet_exists='overlay') as writer:
reader = pd.read_excel(filepath)
df.to_excel(
writer,
startrow=reader.shape[0] + 1,
index=False,
header=False)

AttributeError when trying to save pandas dataframe to existing excel sheet

I am trying to write a pandas data frame to an existing excel sheet on a new tab, but it gives me the following error:
AttributeError: 'NoneType' object has no attribute 'read'.
I've determined this is because pandas to_excel returns a NoneType object, which isn't allowing me to save the file with writer.save(). Does anyone know a workaround for this?
path = 'summary.xlsx'
book = load_workbook(path)
writer = pd.ExcelWriter(path, engine='openpyxl')
writer.book = book
writer.sheets = dict((ws.title, ws) for ws in book.worksheets)
df.to_excel(writer, sheet_name="results")
writer.save()

I had exactly the same issue.
I managed to work around it by removing the value in legacy_drawing from each sheet in the workbook.
path = 'summary.xlsx'
book = load_workbook(path)
writer = pd.ExcelWriter(path, engine='openpyxl')
writer.book = book
writer.sheets = dict((ws.title, ws) for ws in book.worksheets)
for s in list(writer.sheets.keys()):
writer.sheets[s].legacy_drawing = None
df.to_excel(writer, sheet_name="results")
writer.save()

Style the title

def multiple_dfs(file_name, sheet, *args):
"""
Put multiple dataframes into one xlsx sheet
"""
row=2
writer = pd.ExcelWriter(file_name, engine='openpyxl')
df = pd.DataFrame({'user': ['Bob', 'Jane', 'Alice'],
'income': [40000, 50000, 42000]})
df2 = pd.DataFrame(['Title'])
df.to_excel(writer, sheet, startrow=row, index=False)
df2.to_excel(writer, sheet, startrow=0, startcol=0, header=None, \
index=False)
I would like to resize and bold Title from this code with pandas (or with openpyxl if it is not possible with pandas). Is there a way to do it accurately and "pythonically"?
I know we could use http://openpyxl.readthedocs.io/en/default/styles.html, but I don't know how to use it with my code.
UPDATE
According to #AndyHayden, normally the following code should bold the title, but that seems to not working.
import pandas as pd
import io, os, openpyxl
from openpyxl import Workbook
from openpyxl.utils.dataframe import dataframe_to_rows
from openpyxl.styles import (PatternFill, Border, Side, Alignment, Protection, Font)
def multiple_dfs(sheet):
row=2
writer = pd.ExcelWriter("testing.xlsx", engine='openpyxl')
df = pd.DataFrame(['Title'])
df2 = pd.DataFrame({'user': ['Bob', 'Jane', 'Alice'],
'income': [40000, 50000, 42000]})
df.to_excel(writer, sheet, startrow=0, startcol=0, header=None, \
index=False)
df2.to_excel(writer, sheet, startrow=row, index=False)
wb = Workbook()
ws = wb.active
for r in dataframe_to_rows(df, index=False, header=False):
ws.append(r)
title = ws["A1"]; title.font = Font(bold=True)
wb.save("testing.xlsx")
writer.save()
writer.close()
multiple_dfs('aaa')
The title is never showed up in bold. How could I fix that?
Please let me know if the question is unclear.

The openpyxl docs offer a neat way to do this:
from openpyxl import Workbook
from openpyxl.utils.dataframe import dataframe_to_rows
wb = Workbook()
ws = wb.active
for r in dataframe_to_rows(df, index=True, header=True):
ws.append(r)
for cell in ws['A'] + ws[1]:
cell.style = 'Pandas'
wb.save("pandas_openpyxl.xlsx")
Note: That if you are doing some other inserting beforehand (like you suggest in your previous question, you'll have to use a different row number.

How to write at the same time to Different excel Using Python

I try to write to all files, that I have at the same time.
I have some files
izzymonroe#mail.ru.xlsx,
lucky-frog#mail.ru.xlsx,
lucky-frog#mail.ru.xlsx,
izzymonroe#mail.ru.xlsx,
Yubodrova#ya.ru.xlsx,
lucky-frog#mail.ru.xlsx,
Ant.karpoff2011#yandex.ru.xlsx
9rooney9#list.ru.xlsx
and I want to write data to this. But how can I send it to function(and I need to write to file value with groupby)
df = pd.read_excel('group.xlsx')
def add_xlsx_sheet(df, sheet_name=u'Смартфоны полно', index=True, digits=1, path='9rooney9#list.ru.xlsx'):
book = load_workbook(path)
writer = ExcelWriter('9rooney9#list.ru.xlsx', engine='openpyxl')
writer.book = book
writer.sheets = dict((ws.title, ws) for ws in book.worksheets)
if sheet_name in list(writer.sheets.keys()):
sh = book.get_sheet_by_name(sheet_name)
book.remove_sheet(sh)
df.to_excel(writer, sheet_name=u'Смартфоны полно', startrow=0, startcol=0,
float_format='%.{}f'.format(digits), index=index)
writer.save()
It works to one file, but it write all data to this file. But I need to write group, where id in mail complies the name of file
How can I specify all file in function and next
df.groupby('member_id').apply(lambda g: g.to_excel(str(g.name) + '.xlsx', 'sheet2'))

The problem was solved with df.groupby('col_name').apply(lambda x: add_xlsx_sheet(x, x.name, path='{}.xlsx'.format(x.name)))

xlsxwriter intersect row and column formats

I'm saving pandas DataFrame to Excel with xlsxwriter. First I'm adding some format to columns (change font, for instance). Then I want to change background color for some rows. But when I add set_row function, all my column's format is gone. Is there a way to combine it?
import pandas as pd
data = pd.DataFrame({'test_data': [1,2,3,4,5]})
writer = pd.ExcelWriter('test.xlsx', engine='xlsxwriter')
pd.core.format.header_style = None
data.to_excel(writer, sheet_name='test', index=False)
workbook = writer.book
worksheet = writer.sheets['test']
font_fmt = workbook.add_format({'font_name': 'Arial', 'font_size': 10})
worksheet.set_column('A:A', None, font_fmt)
zebra = workbook.add_format({'bg_color': 'green'})
for index in range(5):
if index % 2 == 0:
worksheet.set_row(index+1, None, zebra)
writer.save()

This shall help:
import pandas as pd
data = pd.DataFrame({'test_data': [1,2,3,4,5]})
writer = pd.ExcelWriter('test.xlsx', engine='xlsxwriter')
pd.core.format.header_style = None
data.to_excel(writer, sheet_name='test', index=False)
workbook = writer.book
worksheet = writer.sheets['test']
formatdict = {'font_name': 'Calibri', 'font_size': 10, 'font_color':'red'}
font_fmt = workbook.add_format(formatdict)
worksheet.set_column('A:A', None, font_fmt)
zebra = workbook.add_format(formatdict)
zebra.set_bg_color('green')
for index in range(1,6,2):
worksheet.set_row(index, None, zebra)
writer.save()
Shall produce the following output:

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

How to add sheet to existing excel file with pandas? - python

Related

Property 'sheets' of 'OpenpyxlWriter' object has no setter using pandas and openpyxl

AttributeError when trying to save pandas dataframe to existing excel sheet

Style the title

How to write at the same time to Different excel Using Python

xlsxwriter intersect row and column formats

Categories

Resources