how to download excel file in python and streamlit? - python

I have a Python script that read files and convert it to dataframe using Python and streamlit. Then I want to create a function to allows the user to download this dataframe as an Excel file with extension .xls.
So I tried to read the dataframe and convert it to an Excel file using these two functions:
pd.ExcelWriter
df.to_excel
But when I try to download the file using a link the file doesn't download and displays this error:
Failed-Network error
Code:
import pandas as pd
import streamlit as st
writer = pd.ExcelWriter('update2.xlsx')
df.to_excel(writer, index = False, header=True,encoding='utf-8')
with open(writer,'rb') as f:
b64 = base64.b64encode(f.read())
href = f'Download {extension}'
st.write(href, unsafe_allow_html=True)

With the streamlit latest release(above 1.0.0):
Use
st.download_button
Displays a download button widget.
This is useful when you would like to provide a way for your users to download a file directly from your app.
Note that the data to be downloaded is stored in memory while the user is connected, so it's a good idea to keep file sizes under a couple of hundred megabytes to conserve memory.
Here is a sample code from the discussion, that can be helpful to download excel files...
import pandas as pd
from io import BytesIO
from pyxlsb import open_workbook as open_xlsb
import streamlit as st
def to_excel(df):
output = BytesIO()
writer = pd.ExcelWriter(output, engine='xlsxwriter')
df.to_excel(writer, index=False, sheet_name='Sheet1')
workbook = writer.book
worksheet = writer.sheets['Sheet1']
format1 = workbook.add_format({'num_format': '0.00'})
worksheet.set_column('A:A', None, format1)
writer.save()
processed_data = output.getvalue()
return processed_data
df_xlsx = to_excel(df)
st.download_button(label='📥 Download Current Result',
data=df_xlsx ,
file_name= 'df_test.xlsx')

This worked for me
import pandas as pd
from io import BytesIO
import streamlit as st
def to_excel(df: pd.DataFrame):
in_memory_fp = BytesIO()
df.to_excel(in_memory_fp)
# Write the file out to disk to demonstrate that it worked.
in_memory_fp.seek(0, 0)
return in_memory_fp.read()
cols = ["col1", "col2"]
df = pd.DataFrame.from_records([{k: 0.0 for k in cols} for _ in range(25)])
excel_data = to_excel(df)
file_name = "excel.xlsx"
st.download_button(
f"Click to download {file_name}",
excel_data,
file_name,
f"text/{file_name}",
key=file_name
)

line 5 can't be executed since you havent assigned any excel to the DataFrame df.
try something like this in your code:
df = pd.read_csv('update2.xlsx')
I hope, this helped.
Take care

def get_binary_file_downloader_html(bin_file, file_label='File'):
with open(bin_file, 'rb') as f:
data = f.read()
bin_str = base64.b64encode(data).decode()
href = f'Descargar {file_label}'
return href
st.markdown(get_binary_file_downloader_html('Wip_QRY.xlsx', 'Excel'), unsafe_allow_html=True)

Related

ValueError: Sheet 'Sheet1' already exists and if_sheet_exists is set to 'error'

I am trying to create an excel file of 3 columns: System Date, Time, Value on a webpage at that time.
Intention is to create a dataframe of the 3 values, every time the code runs, and append the dataframe to existing excel workbook (with one existing sheet).
I am able to create dataframe every time code runs, but when I try to append it to an excel file, it throws error:
ValueError: Sheet 'Sheet1' already exists and if_sheet_exists is set to 'error'
Can you please suggest, where am I going wrong.
# Importing Libraries
from datetime import datetime
import pandas as pd
import requests
from bs4 import BeautifulSoup
import openpyxl
#getting today's date amd formatting it
now = datetime.now()
Date = now.strftime ("%d/%m/%Y")
Time = now.strftime ("%H:%M")
# GET request to scrape. 'Page' variable to assign contents
page = requests.get("https://www.traderscockpit.com/?pageView=live-nse-advance-decline-ratio-chart")
# Create BeautifulSoup object to parse content
soup = BeautifulSoup(page.content, 'html.parser')
adv = soup.select_one('a:-soup-contains("Advanced:")').next_sibling.strip()
dec = soup.select_one('a:-soup-contains("Declined:")').next_sibling.strip()
ADratio = round(int(adv)/int(dec), 2)
df = pd.DataFrame({tuple([Date, Time, ADratio])})
#Load workbook and read last used row
path = r'C:\Users\kashk\OneDrive\Documents\ADratios.xlsx'
writer = pd.ExcelWriter (path, engine='openpyxl', mode = 'a')
wb = openpyxl.load_workbook(path)
startrow = writer.sheets['Sheet1'].max_row
#Append data frame to existing table in existing sheet
df.to_excel (writer, sheet_name = 'Sheet1', index = False, header = False, startrow = startrow)
writer.save()
writer.close()
A fast and easy solution would be upgrading your pandas > 1.4.0 since it provides a if_sheet_exists = 'overlay' Source
pd.ExcelWriter(path, engine='openpyxl', mode='a', if_sheet_exists='overlay')
If you don't want to upgrade your pandas, there is a way to work around by removing and re-write the sheet into the excel file. (Not recommended if you have a lot of records since it will be slow).
path, sheet_name = 'ADratios.xlsx' , 'Sheet 1'
df.columns = ['Date','Time','ADratio']
with pd.ExcelWriter(path, engine='openpyxl', mode='a', if_sheet_exists='replace') as writer:
book = openpyxl.load_workbook(path, 'r')
df_bak = pd.read_excel(path)
writer.book = openpyxl.load_workbook(path)
writer.book.remove(writer.book.worksheets[writer.book.sheetnames.index(sheet_name)])
writer.sheets = {ws.title:ws for ws in writer.book.worksheets}
pd.concat([df_bak, df], axis=0).to_excel(writer, sheet_name=sheet_name, index = False)

Read Excel file in AWS

I wanted to read an excel file in S3 from Glue.
Here's what I've done so far.
import pandas as pd
import awswrangler as wr
import io
ad_request_path = 's3://bucketname/key.xlsx'
df = wr.s3.read_excel(ad_request_path)
OR
bucket_name = 'bucketname'
object_key = 'key.xlsx'
s3_client = boto3.client('s3')
obj = s3_client.get_object(Bucket=bucket_name, Key=object_key)
data = obj['Body'].read()
workbook = open_workbook_xls(file_contents=data)
df = pd.read_excel(io.BytesIO(data))
print(df)
I got this error message:
XLRDError: Excel xlsx file; not supported
Managed to make it work. Just add engine = 'openpyxl'
import awswrangler as wr
import openpyxl
ad_request_path = 's3://bucketname/key.xlsx'
df = wr.s3.read_excel(ad_request_path, engine='openpyxl')

Converting all worksheets in an Excel workbook to csv format

My Excel document my.xlsx has two Sheets named Sheet1 and Sheet2. I want to convert all worksheets to csv format using xlsx2csv. I used the following commands:
from xlsx2csv import *
xlsx2csv my.xlsx convert.csv
File "<stdin>", line 1
xlsx2csv my.xlsx convert.csv
^
SyntaxError: invalid syntax
x2c -a my.xlsx my1.csv
File "<stdin>", line 1
x2c -a my.xlsx my1.csv
^
SyntaxError: invalid syntax
Any help, please.
I have not used xlsx2csv before but why don't we try pandas.
Your requirement can be solved like this:
import pandas as pd
for sheet in ['Sheet1', 'Sheet2']:
df = pd.read_excel('my.xlsx', sheetname=sheet)
df.to_csv(sheet + '_output.csv', index=False)
You can do something as the follows:
import pandas as pd
xls_file = pd.ExcelFile('<path_to_your_excel_file>')
sheet_names = xls_file.sheet_names
for sheet in sheet_names:
df = xls_file.parse(sheet)
Xlsx2csv python implementation:
Could only execute Xlsx2csv with sheetid parameter. In order to get sheet names and ids, get_sheet_details was used.
csvfrmxlsx creates csv files for each sheet in csv folder under parent directory.
import pandas as pd
from pathlib import Path
def get_sheet_details(filename):
import os
import xmltodict
import shutil
import zipfile
sheets = []
# Make a temporary directory with the file name
directory_to_extract_to = (filename.with_suffix(''))
os.mkdir(directory_to_extract_to)
# Extract the xlsx file as it is just a zip file
zip_ref = zipfile.ZipFile(filename, 'r')
zip_ref.extractall(directory_to_extract_to)
zip_ref.close()
# Open the workbook.xml which is very light and only has meta data, get sheets from it
path_to_workbook = directory_to_extract_to / 'xl' / 'workbook.xml'
with open(path_to_workbook, 'r') as f:
xml = f.read()
dictionary = xmltodict.parse(xml)
for sheet in dictionary['workbook']['sheets']['sheet']:
sheet_details = {
'id': sheet['#sheetId'], # can be sheetId for some versions
'name': sheet['#name'] # can be name
}
sheets.append(sheet_details)
# Delete the extracted files directory
shutil.rmtree(directory_to_extract_to)
return sheets
def csvfrmxlsx(xlsxfl, df): # create csv files in csv folder on parent directory
from xlsx2csv import Xlsx2csv
for index, row in df.iterrows():
shnum = row['id']
shnph = xlsxfl.parent / 'csv' / Path(row['name'] + '.csv') # path for converted csv file
Xlsx2csv(str(xlsxfl), outputencoding="utf-8").convert(str(shnph), sheetid=int(shnum))
return
pthfnc = 'c:/xlsx/'
wrkfl = 'my.xlsx'
xls_file = Path(pthfnc + wrkfl)
sheetsdic = get_sheet_details(xls_file) # dictionary with sheet names and ids without opening xlsx file
df = pd.DataFrame.from_dict(sheetsdic)
csvfrmxlsx(xls_file, df) # df with sheets to be converted

Exception: Exception('Exception caught in workbook destructor. Explicit close() may be required for workbook.',)

I am trying to put a bunch of CSV files into one workbook and here is my code:
import csv
import glob
import openpyxl
import os, sys
import pandas as pd
import xlsxwriter as xlwr
def main():
list_of_files = []
names = []
for csv_file in glob.glob(os.path.join('.', '*.csv')):
bleh = csv_file[2:]
name = bleh[:-4]
names.append(name)
df = pd.read_csv(csv_file, index_col=None, header=0)
list_of_files.append(df)
writer = pd.ExcelWriter('non_concussed_game_logs.xlsx')
for n, df in enumerate(list_of_files):
df.to_excel(writer, '%s' % names[n])
writer.save
if __name__ == "__main__":
main()
I am getting the error mentioned in the title of my post but I am unsure as to why I'm getting it. I have used this script before and it has worked but I'm not sure why it is not now. Any help is appreciated!
I figured it out, my CSV files were encoded in utf-8 so I had to make the read_csv() call
df = pd.read_csv(csv_file, index_col=None, header=0, encoding='utf-8')
and also add the parenthesis to the writer.save line.

From password-protected Excel file to pandas DataFrame

I can open a password-protected Excel file with this:
import sys
import win32com.client
xlApp = win32com.client.Dispatch("Excel.Application")
print "Excel library version:", xlApp.Version
filename, password = sys.argv[1:3]
xlwb = xlApp.Workbooks.Open(filename, Password=password)
# xlwb = xlApp.Workbooks.Open(filename)
xlws = xlwb.Sheets(1) # counts from 1, not from 0
print xlws.Name
print xlws.Cells(1, 1) # that's A1
I'm not sure though how to transfer the information to a pandas dataframe. Do I need to read cells one by one and all, or is there a convenient method for this to happen?
Simple solution
import io
import pandas as pd
import msoffcrypto
passwd = 'xyz'
decrypted_workbook = io.BytesIO()
with open(i, 'rb') as file:
office_file = msoffcrypto.OfficeFile(file)
office_file.load_key(password=passwd)
office_file.decrypt(decrypted_workbook)
df = pd.read_excel(decrypted_workbook, sheet_name='abc')
pip install --user msoffcrypto-tool
Exporting all sheets of each excel from directories and sub-directories to seperate csv files
from glob import glob
PATH = "Active Cons data"
# Scaning all the excel files from directories and sub-directories
excel_files = [y for x in os.walk(PATH) for y in glob(os.path.join(x[0], '*.xlsx'))]
for i in excel_files:
print(str(i))
decrypted_workbook = io.BytesIO()
with open(i, 'rb') as file:
office_file = msoffcrypto.OfficeFile(file)
office_file.load_key(password=passwd)
office_file.decrypt(decrypted_workbook)
df = pd.read_excel(decrypted_workbook, sheet_name=None)
sheets_count = len(df.keys())
sheet_l = list(df.keys()) # list of sheet names
print(sheet_l)
for i in range(sheets_count):
sheet = sheet_l[i]
df = pd.read_excel(decrypted_workbook, sheet_name=sheet)
new_file = f"D:\\all_csv\\{sheet}.csv"
df.to_csv(new_file, index=False)
Assuming the starting cell is given as (StartRow, StartCol) and the ending cell is given as (EndRow, EndCol), I found the following worked for me:
# Get the content in the rectangular selection region
# content is a tuple of tuples
content = xlws.Range(xlws.Cells(StartRow, StartCol), xlws.Cells(EndRow, EndCol)).Value
# Transfer content to pandas dataframe
dataframe = pandas.DataFrame(list(content))
Note: Excel Cell B5 is given as row 5, col 2 in win32com. Also, we need list(...) to convert from tuple of tuples to list of tuples, since there is no pandas.DataFrame constructor for a tuple of tuples.
from David Hamann's site (all credits go to him)
https://davidhamann.de/2018/02/21/read-password-protected-excel-files-into-pandas-dataframe/
Use xlwings, opening the file will first launch the Excel application so you can enter the password.
import pandas as pd
import xlwings as xw
PATH = '/Users/me/Desktop/xlwings_sample.xlsx'
wb = xw.Book(PATH)
sheet = wb.sheets['sample']
df = sheet['A1:C4'].options(pd.DataFrame, index=False, header=True).value
df
Assuming that you can save the encrypted file back to disk using the win32com API (which I realize might defeat the purpose) you could then immediately call the top-level pandas function read_excel. You'll need to install some combination of xlrd (for Excel 2003), xlwt (also for 2003), and openpyxl (for Excel 2007) first though. Here is the documentation for reading in Excel files. Currently pandas does not provide support for using the win32com API to read Excel files. You're welcome to open up a GitHub issue if you'd like.
Based on the suggestion provided by #ikeoddy, this should put the pieces together:
How to open a password protected excel file using python?
# Import modules
import pandas as pd
import win32com.client
import os
import getpass
# Name file variables
file_path = r'your_file_path'
file_name = r'your_file_name.extension'
full_name = os.path.join(file_path, file_name)
# print(full_name)
Getting command-line password input in Python
# You are prompted to provide the password to open the file
xl_app = win32com.client.Dispatch('Excel.Application')
pwd = getpass.getpass('Enter file password: ')
Workbooks.Open Method (Excel)
xl_wb = xl_app.Workbooks.Open(full_name, False, True, None, pwd)
xl_app.Visible = False
xl_sh = xl_wb.Worksheets('your_sheet_name')
# Get last_row
row_num = 0
cell_val = ''
while cell_val != None:
row_num += 1
cell_val = xl_sh.Cells(row_num, 1).Value
# print(row_num, '|', cell_val, type(cell_val))
last_row = row_num - 1
# print(last_row)
# Get last_column
col_num = 0
cell_val = ''
while cell_val != None:
col_num += 1
cell_val = xl_sh.Cells(1, col_num).Value
# print(col_num, '|', cell_val, type(cell_val))
last_col = col_num - 1
# print(last_col)
ikeoddy's answer:
content = xl_sh.Range(xl_sh.Cells(1, 1), xl_sh.Cells(last_row, last_col)).Value
# list(content)
df = pd.DataFrame(list(content[1:]), columns=content[0])
df.head()
python win32 COM closing excel workbook
xl_wb.Close(False)
Adding to #Maurice answer to get all the cells in the sheet without having to specify the range
wb = xw.Book(PATH, password='somestring')
sheet = wb.sheets[0] #get first sheet
#sheet.used_range.address returns string of used range
df = sheet[sheet.used_range.address].options(pd.DataFrame, index=False, header=True).value

Categories