How to manually open a file that Python is writing in simultaneously? - python

I have a python code which performs some calculations and then writes the output to an excel sheet as follows-
# import the necessary packages
import numpy as np
import argparse
import time
import cv2
import os
from openpyxl import load_workbook
import pandas as pd
from datetime import date, datetime
filename = r'PathToDirectory\Data.xlsx'
def append_df_to_excel(filename, df, sheet_name='Sheet1', startrow=None,
truncate_sheet=False,
**to_excel_kwargs):
# ignore [engine] parameter if it was passed
if 'engine' in to_excel_kwargs:
to_excel_kwargs.pop('engine')
writer = pd.ExcelWriter(filename, engine='openpyxl')
# Python 2.x: define [FileNotFoundError] exception if it doesn't exist
try:
FileNotFoundError
except NameError:
FileNotFoundError = IOError
try:
# try to open an existing workbook
writer.book = load_workbook(filename)
# get the last row in the existing Excel sheet
# if it was not specified explicitly
if startrow is None and sheet_name in writer.book.sheetnames:
startrow = writer.book[sheet_name].max_row
# truncate sheet
if truncate_sheet and sheet_name in writer.book.sheetnames:
# index of [sheet_name] sheet
idx = writer.book.sheetnames.index(sheet_name)
# remove [sheet_name]
writer.book.remove(writer.book.worksheets[idx])
# create an empty sheet [sheet_name] using old index
writer.book.create_sheet(sheet_name, idx)
# copy existing sheets
writer.sheets = {ws.title:ws for ws in writer.book.worksheets}
except FileNotFoundError:
# file does not exist yet, we will create it
pass
if startrow is None:
startrow = 0
# write out the new sheet
df.to_excel(writer, sheet_name, startrow=startrow, **to_excel_kwargs, header = False, index=False)
# save the workbook
writer.save()
while(True):
# --------------------------------------------------------------
# -----------REST OF THE CODE AND COMPUTATION HERE--------------
# --------------------------------------------------------------
today = date.today()
today = today.strftime("%d/%m/%Y")
now = datetime.now()
now = now.strftime("%H:%M:%S")
rec_classes = list(set(classIDs))
for counting in range(len(rec_classes)):
my_label = LABELS[rec_classes[counting]]
my_count = classIDs.count(rec_classes[counting])
data_dict = ({'today_date' : [today], 'now_time' : [now], 'animal_class' : [my_label], 'animal_count' : my_count})
df = pd.DataFrame(data_dict)
append_df_to_excel(filename, df)
The code works fine if I want to write to an excel sheet, and after the code has run, I can open the file and all the content appears perfectly.
The problem is that I want to open the excel file while it is running. I want to see the rows being added and data being appended as the code runs. However, whenever I open the excel file while the code is running, I get a 'Permission Denied' error, and the code stops.
I have tried solving it using except OSError pass but it did not help.
Is there anything that can be done?

I suggest you two possible ways:
To write with prints on the the terminal the data you are inserting, or
To use Python's logging library, which can be used to log the data in a file and to have a complete vision of what's going on. You can set format, level of importance of logging, handlers... it's an awesome tool, also for other usages.

No, there is no straightforward way to have Excel dynamically update its display of a changing file. It's loaded from disk into memory once, and then Excel ignores the disk file.
(If you wrote an Excel macro which revisited the file periodically, maybe you could pull this off. But friends don't let friends use Excel anyway.)

Related

How do I write multiple dataframes to excel using python and download the results using streamlit?

I have a Python script using streamlit, that allows the user to upload certain excel files, then it automatically runs my anslysis on it, and then I want them to download the results in xlsx format using the streamlit download button. However, I know how to make them download one dataframe to a csv, but not an xlsx file using the streamlit download button, which is what I want to do.
Here's what I've tried so far, and this is after my analysis where I'm just trying to create the download button for the user to download the results that are stored in 3 different dataframes:
Import pandas as pd
Import streamlit as st
# arrived_clean, booked_grouped, and arrived_grouped are all dataframes that I want to export to an excel file as results for the user to download.
def convert_df():
writer = pd.ExcelWriter('test_data.xlsx', engine='xlsxwriter')
arrived_clean.to_excel(writer, sheet_name='Cleaned', startrow=0, startcol=0, index=False)
booked_grouped.to_excel(writer, sheet_name='Output', startrow=0, startcol=0, index=False)
arrived_grouped.to_excel(writer, sheet_name='Output', startrow=0, startcol=20, index=False)
writer.save()
csv = convert_df()
st.download_button(
label="Download data",
data=csv,
file_name='test_data.xlsx',
mime='text/xlsx',
)
When I first run the streamlit app locally I get this error:
"NameError: name 'booked_grouped' is not defined"
I get it because I haven't uploaded any files yet. After I upload my files the error message goes away and everything runs normally. However, I get this error and I don't see the download button to download my new dataframes:
RuntimeError: Invalid binary data format: <class 'NoneType'> line 313,
in marshall_file raise RuntimeError("Invalid binary data format: %s" %
type(data))
Can someone tell me what I'm doing wrong? It's the last piece I have to figure out.
The Pluviophile's answer is correct, but you should use output in pd.ExcelWriter instead of file_name:
def dfs_tabs(df_list, sheet_list, file_name):
output = BytesIO()
writer = pd.ExcelWriter(output, engine='xlsxwriter')
for dataframe, sheet in zip(df_list, sheet_list):
dataframe.to_excel(writer, sheet_name=sheet, startrow=0 , startcol=0)
writer.save()
processed_data = output.getvalue()
return processed_data
When I first run the streamlit app locally I get this error:
"NameError: name 'booked_grouped' is not defined"
Assuming your code
booked_grouped = st.fileuploader('Something.....`)
You can use the below method to skip the error
if booked_grouped:
# All your code inside this indentation
To Download excel
Convert all dataframes to one single excel
# Function to save all dataframes to one single excel
def dfs_tabs(df_list, sheet_list, file_name):
output = BytesIO()
writer = pd.ExcelWriter(file_name,engine='xlsxwriter')
for dataframe, sheet in zip(df_list, sheet_list):
dataframe.to_excel(writer, sheet_name=sheet, startrow=0 , startcol=0)
writer.save()
processed_data = output.getvalue()
return processed_data
# list of dataframes
dfs = [df, df1, df2]
# list of sheet names
sheets = ['df','df1','df2']
Note that the data to be downloaded is stored in memory while the user is connected, so it's a good idea to keep file sizes under a couple of hundred megabytes to conserve memory.
df_xlsx = dfs_tabs(dfs, sheets, 'multi-test.xlsx')
st.download_button(label='📥 Download Current Result',
data=df_xlsx ,
file_name= 'df_test.xlsx')

writing and appending to excel from pandas dataframe not working

I am trying to write pandas dataframe (allagents) to excel sheet...If the file is not there it should create a new file and if file is already there, it should append the data at the end. Below is my code..
try:
output_file = "all_agents_file.xlsx"
# try to open an existing workbook
book = load_workbook(output_file)
writer = pd.ExcelWriter(output_file)
writer.book = book
# copy existing sheets
writer.sheets = dict((ws.title, ws) for ws in writer.book.worksheets)
# read existing file
previous_data = pd.read_excel(output_file)
# write out the new sheet
if not all_agents.empty:
all_agents.index = np.arange(len(previous_data) + 1,
len(previous_data) + len(all_agents) + 1)
all_agents.to_excel(writer, index=True, header=False,
startrow=len(previous_data) + 1)
writer.close()
except Exception as e:
all_agents.index = np.arange(1, len(all_agents) + 1)
all_agents.to_excel(output_file)
print("File Created and Data Written in it...:", e)
The issue is it shows Exception like File is not a recognised excel file and if I specify engine="openpyxl" while reading, it gives Exception as File is not a zip file, if I give engine as "xlsxwriter", it gives exception as unknown engine xlsxwriter pandas==1.2.5, openpyxl==3.0.7
My machine is Ubuntu and the same code works on Jupyter notebook on my machine....But does not work when I run through terminal.
Any help would be appreciated

python - pandas with openpyxl damages my workbook

Using pandas with the openpyxl engine I want to open an excel woorkbook and add a sheet.
Because, the sheets are growing and getting quit big I want to create a new sheet without reading all the other sheet.
So instead of xlsxwriter, I did start using openpyxl, however I get the following issue that the file gets damaged and needs to recovered by excel.
After that, when I run it a second time, I get the python error that the excel file is 'raise BadZipFile("File is not a zip file")'.
This my test code:
import openpyxl
import pandas
from pandas import ExcelWriter
from pandas import ExcelFile
from openpyxl import Workbook
from openpyxl import load_workbook
sheet1 = 'sheet1'
sheet2 = 'sheet2'
if os.path.exists(filename):
workbook = openpyxl.load_workbook(filename)
else:
workbook = Workbook()
writer = pandas.ExcelWriter(filename, engine='openpyxl')
writer.book = workbook
df_1.to_excel(writer, sheet_name=sheet1)
writer.save()
writer.close()
time.sleep(2) # to give it some time. But doesn't help :(
if os.path.exists(filename):
workbook = openpyxl.load_workbook(filename)
else:
workbook = Workbook()
writer = pandas.ExcelWriter(filename, engine='openpyxl')
writer.book = workbook
df_2.to_excel(writer, sheet_name=sheet2)
writer.save()
writer.close()
Any suggestions how to solve this? Or do I miss something?
btw, excel 365 - 16.46 macOS

Unable to read excel file , list index out of range error, cant find Sheets

I am trying to read excel (.xlsx) file and convert it to dataframe. I used pandas.ExelFile , pandas.read_excel, openpyxl load_workbook and even io file reading methods but i am unable to read Sheet of this file. Every time i get list index out of range error or no sheet names is case of openpyxl. Also tried xlrd method.
temp_df = pd.read_excel("v2s.xlsx", sheet_name = 0)
or
temp_df = pd.read_excel("v2s.xlsx", sheet_name = "Sheet1")
or
from openpyxl import load_workbook
workbook = load_workbook(filename="v2s.xlsx",read_only = True, data_only = True)
workbook.sheetnames
Link to excel file
According to this ticket, the file is saved in a "slightly defective" format.
The user posted that he used Save As to change the type of document back to a normal Excel spreadsheet file.
Your file is this type:
You need to save it as:
Then running your code
from openpyxl import load_workbook
workbook = load_workbook(filename="v2s_0.xlsx",read_only = True, data_only = True)
print(workbook.sheetnames)
Outputs:
['Sheet1']

Excel file can not be opened anymore after writing with pd.ExcelWriter

I am writing a dataframe to a range of Excel file in a certain tab, but after saving the file, I see that the Excel file has become unusable. Could anyone suggest a solution?
import openpyxl as pyx
df3_xmax= df3.iloc[0]
wb = pyx.load_workbook(dst)
xl_writer = pd.ExcelWriter(dst, engine='openpyxl')
xl_writer.book = wb
xl_writer.sheets = {ws.title:ws for ws in wb.worksheets}
df3_xmax.to_excel(xl_writer, 'shname', index=False, header=False, startcol=3, startrow=7)
xl_writer.save()

Categories