How to add sheet to existing excel file with pandas? - python

import pandas as pd
from openpyxl import load_workbook
book = load_workbook('test.xlsx')
writer = pd.ExcelWriter('test.xlsx')
writer.book = book
writer.sheets = dict((ws.title, ws) for ws in book.worksheets)
df = pd.DataFrame({'a':[1,3,5,7,4,5,6,4,7,8,9],
'b':[3,5,6,2,4,6,7,8,7,8,9]})
df.to_excel(writer, sheet_name='tab_name', index = False)
writer.save()
I get an error
AttributeError: 'Workbook' object has no attribute 'add_worksheet'

import pandas as pd
from openpyxl import load_workbook
fn = 'test.xlsx'
df = pd.read_excel(fn, header=None)
df2 = pd.DataFrame({'a':[1,3,5,7,4,5,6,4,7,8,9],
'b':[3,5,6,2,4,6,7,8,7,8,9]})
writer = pd.ExcelWriter(fn, engine='openpyxl')
book = load_workbook(fn)
writer.book = book
writer.sheets = dict((ws.title, ws) for ws in book.worksheets)
df.to_excel(writer, sheet_name='tab_name', header=None, index=False)
df2.to_excel(writer, sheet_name='tab_name', header=None, index=False,
startcol=7,startrow=6)
writer.save()

Related

Property 'sheets' of 'OpenpyxlWriter' object has no setter using pandas and openpyxl

This code used to get a xlsx file and write over it, but after updating from pandas 1.1.5 to 1.5.1 I got zipfile.badzipfile file is not a zip file
Then I read here that after pandas 1.2.0 the pd.ExcelWriter(report_path, engine='openpyxl') creates a new file but as this is a completely empty file, openpyxl cannot load it.
Knowing that, I changed the code to this one, but now I'm getting AttributeError: property 'sheets' of 'OpenpyxlWriter' object has no setter. How should I handle this?
book = load_workbook('Resultados.xlsx')
writer = pd.ExcelWriter('Resultados.xlsx', engine='openpyxl')
writer.book = book
writer.sheets = dict((ws.title, ws) for ws in book.worksheets)
reader = pd.read_excel(r'Resultados.xlsx')
df = pd.DataFrame.from_dict(dict_)
df.to_excel(writer, index=False, header=False, startrow=len(reader) + 1)
writer.close()
TLDR
Use .update to modify writer.sheets
Rearrange the order of your script to get it working
# run before initializing the ExcelWriter
reader = pd.read_excel("Resultados.xlsx", engine="openpyxl")
book = load_workbook("Resultados.xlsx")
# use `with` to avoid other exceptions
with pd.ExcelWriter("Resultados.xlsx", engine="openpyxl") as writer:
writer.book = book
writer.sheets.update(dict((ws.title, ws) for ws in book.worksheets))
df.to_excel(writer, index=False, header=False, startrow=len(reader)+1)
Details
Recreating your problem with some fake data
import numpy as np
from openpyxl import load_workbook
import pandas as pd
if __name__ == "__main__":
# make some random data
np.random.seed(0)
df = pd.DataFrame(np.random.random(size=(5, 5)))
# this makes an existing file
with pd.ExcelWriter("Resultados.xlsx", engine="openpyxl") as writer:
df.to_excel(excel_writer=writer)
# make new random data
np.random.seed(1)
df = pd.DataFrame(np.random.random(size=(5, 5)))
# what you tried...
book = load_workbook("Resultados.xlsx")
writer = pd.ExcelWriter("Resultados.xlsx", engine="openpyxl")
writer.book = book
writer.sheets = dict((ws.title, ws) for ws in book.worksheets)
reader = pd.read_excel("Resultados.xlsx")
# skipping this step as we defined `df` differently
# df = pd.DataFrame.from_dict(dict_)
df.to_excel(writer, index=False, header=False, startrow=len(reader)+1)
writer.close()
We get the same error plus a FutureWarning
...\StackOverflow\answer.py:23: FutureWarning: Setting the `book` attribute is not part of the public API, usage can give unexpected or corrupted results and will be removed in a future version
writer.book = book
Traceback (most recent call last):
File "...\StackOverflow\answer.py", line 24, in <module>
writer.sheets = dict((ws.title, ws) for ws in book.worksheets)
AttributeError: can't set attribute 'sheets'
The AttributeError is because sheets is a property of the writer instance. If you're unfamiliar with it, here is a resource.
In shorter terms, the exception is raised because sheets cannot be modified in the way you're trying. However, you can do this:
# use the `.update` method
writer.sheets.update(dict((ws.title, ws) for ws in book.worksheets))
That will move us past the the AttributeError, but we'll hit a ValueError a couple lines down:
reader = pd.read_excel("Resultados.xlsx")
Traceback (most recent call last):
File "...\StackOverflow\answer.py", line 26, in <module>
reader = pd.read_excel("Resultados.xlsx")
...
File "...\lib\site-packages\pandas\io\excel\_base.py", line 1656, in __init__
raise ValueError(
ValueError: Excel file format cannot be determined, you must specify an engine manually.
Do what the error message says and supply an argument to the engine parameter
reader = pd.read_excel("Resultados.xlsx", engine="openpyxl")
And now we're back to your original zipfile.BadZipFile exception
Traceback (most recent call last):
File "...\StackOverflow\answer.py", line 26, in <module>
reader = pd.read_excel("Resultados.xlsx", engine="openpyxl")
...
File "...\Local\Programs\Python\Python310\lib\zipfile.py", line 1334, in _RealGetContents
raise BadZipFile("File is not a zip file")
zipfile.BadZipFile: File is not a zip file
After a bit of toying, I noticed that the Resultados.xlsx file could not be opened manually after running this line:
writer = pd.ExcelWriter("Resultados.xlsx", engine="openpyxl")
So I reordered some of the steps in your code:
# run before initializing the ExcelWriter
reader = pd.read_excel("Resultados.xlsx", engine="openpyxl")
book = load_workbook("Resultados.xlsx")
# the old way
# writer = pd.ExcelWriter("Resultados.xlsx", engine="openpyxl")
with pd.ExcelWriter("Resultados.xlsx", engine="openpyxl") as writer:
writer.book = book
writer.sheets.update(dict((ws.title, ws) for ws in book.worksheets))
df.to_excel(writer, index=False, header=False, startrow=len(reader)+1)
try this:
filepath = r'Resultados.xlsx'
with pd.ExcelWriter(
filepath,
engine='openpyxl',
mode='a',
if_sheet_exists='overlay') as writer:
reader = pd.read_excel(filepath)
df.to_excel(
writer,
startrow=reader.shape[0] + 1,
index=False,
header=False)

AttributeError when trying to save pandas dataframe to existing excel sheet

I am trying to write a pandas data frame to an existing excel sheet on a new tab, but it gives me the following error:
AttributeError: 'NoneType' object has no attribute 'read'.
I've determined this is because pandas to_excel returns a NoneType object, which isn't allowing me to save the file with writer.save(). Does anyone know a workaround for this?
path = 'summary.xlsx'
book = load_workbook(path)
writer = pd.ExcelWriter(path, engine='openpyxl')
writer.book = book
writer.sheets = dict((ws.title, ws) for ws in book.worksheets)
df.to_excel(writer, sheet_name="results")
writer.save()
I had exactly the same issue.
I managed to work around it by removing the value in legacy_drawing from each sheet in the workbook.
path = 'summary.xlsx'
book = load_workbook(path)
writer = pd.ExcelWriter(path, engine='openpyxl')
writer.book = book
writer.sheets = dict((ws.title, ws) for ws in book.worksheets)
for s in list(writer.sheets.keys()):
writer.sheets[s].legacy_drawing = None
df.to_excel(writer, sheet_name="results")
writer.save()

Style the title

def multiple_dfs(file_name, sheet, *args):
"""
Put multiple dataframes into one xlsx sheet
"""
row=2
writer = pd.ExcelWriter(file_name, engine='openpyxl')
df = pd.DataFrame({'user': ['Bob', 'Jane', 'Alice'],
'income': [40000, 50000, 42000]})
df2 = pd.DataFrame(['Title'])
df.to_excel(writer, sheet, startrow=row, index=False)
df2.to_excel(writer, sheet, startrow=0, startcol=0, header=None, \
index=False)
I would like to resize and bold Title from this code with pandas (or with openpyxl if it is not possible with pandas). Is there a way to do it accurately and "pythonically"?
I know we could use http://openpyxl.readthedocs.io/en/default/styles.html, but I don't know how to use it with my code.
UPDATE
According to #AndyHayden, normally the following code should bold the title, but that seems to not working.
import pandas as pd
import io, os, openpyxl
from openpyxl import Workbook
from openpyxl.utils.dataframe import dataframe_to_rows
from openpyxl.styles import (PatternFill, Border, Side, Alignment, Protection, Font)
def multiple_dfs(sheet):
row=2
writer = pd.ExcelWriter("testing.xlsx", engine='openpyxl')
df = pd.DataFrame(['Title'])
df2 = pd.DataFrame({'user': ['Bob', 'Jane', 'Alice'],
'income': [40000, 50000, 42000]})
df.to_excel(writer, sheet, startrow=0, startcol=0, header=None, \
index=False)
df2.to_excel(writer, sheet, startrow=row, index=False)
wb = Workbook()
ws = wb.active
for r in dataframe_to_rows(df, index=False, header=False):
ws.append(r)
title = ws["A1"]; title.font = Font(bold=True)
wb.save("testing.xlsx")
writer.save()
writer.close()
multiple_dfs('aaa')
The title is never showed up in bold. How could I fix that?
Please let me know if the question is unclear.
The openpyxl docs offer a neat way to do this:
from openpyxl import Workbook
from openpyxl.utils.dataframe import dataframe_to_rows
wb = Workbook()
ws = wb.active
for r in dataframe_to_rows(df, index=True, header=True):
ws.append(r)
for cell in ws['A'] + ws[1]:
cell.style = 'Pandas'
wb.save("pandas_openpyxl.xlsx")
Note: That if you are doing some other inserting beforehand (like you suggest in your previous question, you'll have to use a different row number.

How to write at the same time to Different excel Using Python

I try to write to all files, that I have at the same time.
I have some files
izzymonroe#mail.ru.xlsx,
lucky-frog#mail.ru.xlsx,
lucky-frog#mail.ru.xlsx,
izzymonroe#mail.ru.xlsx,
Yubodrova#ya.ru.xlsx,
lucky-frog#mail.ru.xlsx,
Ant.karpoff2011#yandex.ru.xlsx
9rooney9#list.ru.xlsx
and I want to write data to this. But how can I send it to function(and I need to write to file value with groupby)
df = pd.read_excel('group.xlsx')
def add_xlsx_sheet(df, sheet_name=u'Смартфоны полно', index=True, digits=1, path='9rooney9#list.ru.xlsx'):
book = load_workbook(path)
writer = ExcelWriter('9rooney9#list.ru.xlsx', engine='openpyxl')
writer.book = book
writer.sheets = dict((ws.title, ws) for ws in book.worksheets)
if sheet_name in list(writer.sheets.keys()):
sh = book.get_sheet_by_name(sheet_name)
book.remove_sheet(sh)
df.to_excel(writer, sheet_name=u'Смартфоны полно', startrow=0, startcol=0,
float_format='%.{}f'.format(digits), index=index)
writer.save()
It works to one file, but it write all data to this file. But I need to write group, where id in mail complies the name of file
How can I specify all file in function and next
df.groupby('member_id').apply(lambda g: g.to_excel(str(g.name) + '.xlsx', 'sheet2'))
The problem was solved with df.groupby('col_name').apply(lambda x: add_xlsx_sheet(x, x.name, path='{}.xlsx'.format(x.name)))

xlsxwriter intersect row and column formats

I'm saving pandas DataFrame to Excel with xlsxwriter. First I'm adding some format to columns (change font, for instance). Then I want to change background color for some rows. But when I add set_row function, all my column's format is gone. Is there a way to combine it?
import pandas as pd
data = pd.DataFrame({'test_data': [1,2,3,4,5]})
writer = pd.ExcelWriter('test.xlsx', engine='xlsxwriter')
pd.core.format.header_style = None
data.to_excel(writer, sheet_name='test', index=False)
workbook = writer.book
worksheet = writer.sheets['test']
font_fmt = workbook.add_format({'font_name': 'Arial', 'font_size': 10})
worksheet.set_column('A:A', None, font_fmt)
zebra = workbook.add_format({'bg_color': 'green'})
for index in range(5):
if index % 2 == 0:
worksheet.set_row(index+1, None, zebra)
writer.save()
This shall help:
import pandas as pd
data = pd.DataFrame({'test_data': [1,2,3,4,5]})
writer = pd.ExcelWriter('test.xlsx', engine='xlsxwriter')
pd.core.format.header_style = None
data.to_excel(writer, sheet_name='test', index=False)
workbook = writer.book
worksheet = writer.sheets['test']
formatdict = {'font_name': 'Calibri', 'font_size': 10, 'font_color':'red'}
font_fmt = workbook.add_format(formatdict)
worksheet.set_column('A:A', None, font_fmt)
zebra = workbook.add_format(formatdict)
zebra.set_bg_color('green')
for index in range(1,6,2):
worksheet.set_row(index, None, zebra)
writer.save()
Shall produce the following output:

Categories