Cant read xlsx file with pandas

Cant read xlsx file with pandas - python

I am trying read .xlsx file as dataframe. File itself has two worksheet but when I tried to read it returns empty worksheet. Even though I have specified the sheet_name, it returns there is not a worksheet named like you have provided.
I have used several methods but all returns [].
'''
from openpyxl import load_workbook
workbook = load_workbook(filename="filename.xlsx",read_only = True, data_only = True)
print(workbook.sheetnames)
'''
'''
xl = pd.read_excel('filename.xlsx',engine='openpyxl')
xl.sheet_names
'''

If you need list of sheet names:
xl = pd.ExcelFile('filename.xlsx')
xl.sheet_names
# to read from specific sheet
xl.parse(sheetname)
If you know the sheet name just use:
pd.read_excel('filename.xlsx', sheet_name='sheetname')

With pandas:
pandas.read_excel
import pandas as pd
df = pd.read_excel(
io='filename.xlsx',
sheet_name='your sheet name',
engine='openpyxl'
)
With openpyxl:
Read an existing workbook and
Converting a worksheet to a Dataframe
from openpyxl import load_workbook
import pandas as pd
wb = load_workbook(
filename='filename.xlsx',
data_only=True
)
sheet_names = wb.sheetnames # list available sheet names in workbook
ws = wb['your sheet name']
df = pd.DataFrame(data=ws)

Thanks everyone, I found the problem. It was because of excel.

Related

how to add data frame to existing excel work book

'`import pandas
from openpyxl import load_workbook
mypath="C:\Users\egoyrat\Desktop\smt tracker\Swap Manual Tracking_v1 (12).xlsx"
wb = load_workbook(mypath,read_only=False)
wb_ws= wb['Main']
for row in dataframe_to_rows(now_append, header = False, index = False):
wb_ws.append(row)
wb.save(mypath) # save workbook
wb.close()
writer.save()
writer.close()`
I have done this but it is not proper working, data is appending bt not particular column

Pandas creates new excel sheet when trying to append to existing sheet

I have the code where I want to read data from the current sheet, store it in df_old, append the current data to it using df = df_old.append(df) and then replace the data in the sheet with this new dataframe. However, what it does instead is create a new sheet with the exact same name where it publishes this new dataframe. I tried adding if_sheet_exists="replace" as an argument to ExcelWriter but this did not change anything. How can I force it to overwrite the data in the sheet with the current name?
df_old = pd.read_excel(r'C:\Users\XXX\Downloads\Digitalisation\mat_flow\reblend_v2.xlsx',sheet_name = ft_tags_final[i][j])
df = df_old.append(df)
with pd.ExcelWriter(r'C:\Users\XXX\Downloads\Digitalisation\mat_flow\reblend_v2.xlsx', engine="openpyxl", mode="a", if_sheet_exists="replace") as writer:
df.to_excel(writer, index=False, sheet_name = ft_tags_final[i][j])

I had the same issue and i solved it with using write instead of append. Also i used openpyxl instead of xlsxwriter
from pandas import ExcelWriter
from pandas import ExcelFile
from openpyxl import load_workbook
book = load_workbook('Wallet.xlsx')
writer = pd.ExcelWriter('Wallet.xlsx', engine='openpyxl')
writer.book = book
writer.sheets = dict((ws.title, ws) for ws in book.worksheets)
#^THIS IS THE MOST IMPORTANT LINES BECAUSE IT GIVES PANDAS THE SHEET
Data.to_excel(writer, sheet_name='Main', header=None, index=False, startcol=number,startrow=counter)

unable to write updated worksheet back to the same workbook-Python Excel Openpyxl

Thank you for reading my post and I appreciate your help!
I am trying to complete below steps using Python:
copy 5 excel xlsx files from a folder as data source(all 5 files only have 1 sheet each)
paste the above mentioned excel files in one workbook as 5 separate worksheet.
Make updates on every sheet(for example, on sheet1 I need to sum a specific column etc.) and then write the modified sheets back to the same workbook.
Issue: when I write back to the original file it replaced the workbook in step2. I searched far and wide here and it says I need to change writer to openpyxl, however, when I change to openpyxl it has "zipfile.BadZipFile: File is not a zip file" error.
import openpyxl
import pandas as pd
import os
from openpyxl import Workbook
import xlsxwriter as xw
import openpyxl as xl
from openpyxl.utils import get_column_letter
import numpy as np
# step 1: copy sheets from data folder into master workbook
# opening the source excel file
df1 = pd.read_excel(r"file1path.xlsx")
df2 = pd.read_excel(r"file2path.xlsx")
df3 = pd.read_excel(r"file3path.xlsx")
df4 = pd.read_excel(r"file4path.xlsx")
df5 = pd.read_excel(r"file5path.xlsx")
dest_filename = r'masterfilepath.xlsx'
# opening the destination excel file
writer = pd.ExcelWriter(dest_filename, engine='xlsxwriter')
# Write to master workbook.
df1.to_excel(writer, sheet_name='W.5 Revenue',index= False)
df2.to_excel(writer, sheet_name='W.4 Rev details',index= False)
df3.to_excel(writer, sheet_name='W.7 Accrual',index= False)
df4.to_excel(writer, sheet_name='W.6 Adhoc',index= False)
df5.to_excel(writer, sheet_name='W.8 State',index= False)
# Close the Pandas Excel writer and output the Excel file.
writer.save()
# Filter rows account number beginning with 4 on file1(df1)
df1 = df1[df1["Ledger account"].str.startswith('4')]
writer = pd.ExcelWriter(dest_filename, engine='openpyxl')
if os.path.exists(dest_filename):
book = openpyxl.load_workbook(dest_filename)
writer.book = book
df1.to_excel(writer, sheet_name="W5")
writer.save()
writer.close()

Insert worksheet at specified index in existing Excel file using Pandas

Is there a way to insert a worksheet at a specified index using Pandas? With the code below, when adding a dataframe as a new worksheet, it gets added after the last sheet in the existing Excel file. What if I want to insert it at say index 1?
import pandas as pd
from openpyxl import load_workbook
f = 'existing_file.xlsx'
df = pd.DataFrame({'cat':['A','B'], 'word': ['C','D']})
book = load_workbook(f)
writer = pd.ExcelWriter(f, engine = 'openpyxl')
writer.book = book
df.to_excel(writer, sheet_name = 'sheet')
writer.save()
writer.close()
Thank you.

Write a pandas df into Excel and save it into a copy

I have a pandas dataframe and I want to open an existing excel workbook containing formulas, copying the dataframe in a specific set of columns (lets say from column A to column H) and save it as a new file with a different name.
The idea is to update an existing template, populate it with the dataframe in a specified set of column and then save a copy of the Excel file with a different name.
Any idea?
What I have is:
import pandas
from openpyxl import load_workbook
book = load_workbook('Template.xlsx')
writer = pandas.ExcelWriter('Template.xlsx', engine='openpyxl')
writer.book = book
writer.sheets = dict((ws.title, ws) for ws in book.worksheets)
df.to_excel(writer)
writer.save()

The below should work, assuming that you are happy to copy into column A. I don't see a way to write into the sheet starting in a different column (without overwriting anything).
The below incorporates #MaxU's suggestion of copying the template sheet before writing to it (having just lost a few hours' work on my own template workbook to pd.to_excel)
import pandas as pd
from openpyxl.utils.dataframe import dataframe_to_rows
from shutil import copyfile
template_file = 'Template.xlsx' # Has a header in row 1 already
output_file = 'Result.xlsx' # What we are saving the template as
# Copy Template.xlsx as Result.xlsx
copyfile(template_file, output_file)
# Read in the data to be pasted into the termplate
df = pd.read_csv('my_data.csv')
# Load the workbook and access the sheet we'll paste into
wb = load_workbook(output_file)
ws = wb.get_sheet_by_name('Existing Result Sheet')
# Selecting a cell in the header row before writing makes append()
# start writing to the following line i.e. row 2
ws['A1']
# Write each row of the DataFrame
# In this case, I don't want to write the index (useless) or the header (already in the template)
for r in dataframe_to_rows(df, index=False, header=False):
ws.append(r)
wb.save(output_file)

try this:
df.to_excel(writer, startrow=10, startcol=1, index=False, engine='openpyxl')
Pay attention at startrow and startcol parameters

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Cant read xlsx file with pandas - python

If you need list of sheet names: xl = pd.ExcelFile('filename.xlsx') xl.sheet_names # to read from specific sheet xl.parse(sheetname) If you know the sheet name just use: pd.read_excel('filename.xlsx', sheet_name='sheetname')

Thanks everyone, I found the problem. It was because of excel.

Related

how to add data frame to existing excel work book

Pandas creates new excel sheet when trying to append to existing sheet

unable to write updated worksheet back to the same workbook-Python Excel Openpyxl

Insert worksheet at specified index in existing Excel file using Pandas

Write a pandas df into Excel and save it into a copy

Categories

Resources