How to Output Downloadable file after processing? - python

Specification
gr.__version__ --> '3.16.2'
I want to create a gradio tab in mygradio app
Disregard TAB 1, I am only working on tab2
where I upload an excel file
save name of the excel fie to a variable
process that excel file take data out of it 2 numbers (1 and 2)
Load data from the excel file to a pandas dataframe and add 1 to both of the numbers
Turn dataframe to excel again and output it to the user to be able to download the output excel file
The output file is named as the original uploaded file
MY CURRENT Code
import gradio as gr
import pandas as pd
# def func1():
# #....
# pass
def func2(name, file):
file_name = name
file_x = file
# use this function to retrieve the file_x without modification for gradio.io output
# excel to dataframe
df = pd.read_excel(file_x)
# add 1 to both numbers
df['1'] = df['1'] + 1
df['2'] = df['2'] + 1
# dataframe to excel
# returnt the exported excel fiel with the same name as the original file
return df.to_excel(file_x, index=False)
# GRADIO APP
with gr.Blocks() as demo:
gr.Markdown("BI App")
''' #1.TAB '''
# with gr.Tab("Tab1"):
# #.... unimportant code
# with gr.Column():
# file_obj = gr.File(label="Input File",
# file_count="single",
# file_types=["", ".", ".csv",".xls",".xlsx"]),
# # extract the filename from gradio.io file object
# # keyfile_name = gr.Interface(file_name_reader, inputs="file", outputs=None)
# keyfile_name = 'nothing'
# tab1_inputs = [keyfile_name, file_obj]
# with gr.Column():
# # output excel file with gradio.io
# tab1_outputs = [gr.File(label="Output File",
# file_count="single",
# file_types=["", ".", ".csv",".xls",".xlsx"])]
# tab1_submit_button = gr.Button("Submit")
''' #2.TAB - I EDIT THIS TAB'''
with gr.Tab("Tab2"):
admitad_invoice_approvals_button = gr.Button("Submit")
def file_name_reader(file):
file_name = file.name # extract the file name from the uploaded file
return file_name
# iface = gr.Interface(file_name_reader, inputs="file", outputs=None)
with gr.Column():
file_obj = gr.File(label="Input File",
file_count="single",
file_types=["", ".", ".csv",".xls",".xlsx"]),
# extract the filename from gradio.io file object
keyfile_name = gr.Interface(file_name_reader, inputs="file", outputs=None)
tab2_inputs = [keyfile_name, file_obj]
with gr.Column():
# output excel file with gradio.io
tab2_outputs = [gr.File(label="Output File",
file_count="single",
file_types=["", ".", ".csv",".xls",".xlsx"])]
tab2_submit_button = gr.Button("Submit")
'''1 button for each of the tabs to execute the GUI TASK'''
# tab1_submit_button.click(func1,
# inputs=tab1_inputs,
# outputs=tab1_outputs)
tab2_submit_button.click(func2,
inputs=tab2_inputs,
outputs=tab2_outputs)
''' EXECUTING THE APP'''
demo.launch(debug=True, share=True) ## PRODUCTION TESTING
ERROR:
Output exceeds the size limit. Open the full output data in a text editor
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
Cell In[7], line 95
90 '''1 button for each of the tabs to execute the GUI TASK'''
91 # tab1_submit_button.click(func1,
92 # inputs=tab1_inputs,
93 # outputs=tab1_outputs)
---> 95 tab2_submit_button.click(func2,
96 inputs=tab2_inputs,
97 outputs=tab2_outputs)
100 ''' EXECUTING THE APP'''
101 demo.launch(debug=True, share=True) ## PRODUCTION TESTING
File ~/.local/lib/python3.8/site-packages/gradio/events.py:145, in Clickable.click(self, fn, inputs, outputs, api_name, status_tracker, scroll_to_output, show_progress, queue, batch, max_batch_size, preprocess, postprocess, cancels, every, _js)
140 if status_tracker:
141 warnings.warn(
142 "The 'status_tracker' parameter has been deprecated and has no effect."
143 )
--> 145 dep = self.set_event_trigger(
146 "click",
147 fn,
148 inputs,
149 outputs,
150 preprocess=preprocess,
151 postprocess=postprocess,
152 scroll_to_output=scroll_to_output,
153 show_progress=show_progress,
154 api_name=api_name,
155 js=_js,
156 queue=queue,
157 batch=batch,
158 max_batch_size=max_batch_size,
159 every=every,
160 )
161 set_cancel_events(self, "click", cancels)
162 return dep
File ~/.local/lib/python3.8/site-packages/gradio/blocks.py:225, in Block.set_event_trigger(self, event_name, fn, inputs, outputs, preprocess, postprocess, scroll_to_output, show_progress, api_name, js, no_target, queue, batch, max_batch_size, cancels, every)
217 warnings.warn(
218 "api_name {} already exists, using {}".format(api_name, api_name_)
219 )
220 api_name = api_name_
222 dependency = {
223 "targets": [self._id] if not no_target else [],
224 "trigger": event_name,
...
237 }
238 Context.root_block.dependencies.append(dependency)
239 return dependency
AttributeError: 'tuple' object has no attribute '_id'
Tried
I have looked in to https://gradio.app/docs/#file but the output file generation is not clean especially regarding applying it to my case

Instead of
with gr.Column():
file_obj = gr.File(label="Input File"
# no any other arguments
)
input= file_obj
Just have
with gr.Column():
file_obj = gr.File(label="Input File")
input= file_obj

Related

How to solve BadZipFile: File is not a zip file error in Jupyter Notebook?

I'm trying to read .xlsx files from folders in a specific directory and to write/export them into 4 new .xlsx files, which every new .xlsx will be containing data catalog per year.
The script works well when i tried it some months ago but it's not working anymore. It keeps on resulting BadZipFile: File is not a zip file error.
Do I miss something? I've tried upgrading and downgrading the anaconda, python, openpyxl, and pandas version but it doesn't help.
from openpyxl import load_workbook
import pandas as pd
import os
import re
path_folder = r'C:\\Users\\lala\\Downloads\\New folder\\Data Klimatologi\\'
folder_tahun = os.listdir(path_folder)
year_folder
for x in year_folder:
year_folder = os.listdir(path_folder + x)
frames = []
for y in station_folder:
path_file = path_folder + '{}\\{}'.format(x,y)
files = os.listdir(path_file)
for z in files:
pattern = path_folder + '{}\\{}\\{}'.format(x,y,z)
wb = load_workbook(filename = pattern)
sheet = wb.active#has 1 sheet
max_row_for_Tn = max((b.row for b in sheet['B'] if b.value is not None))
cell = 'A9:K%d' % (max_row_for_Tn)
data = sheet[cell]
row_list = []
for row in data:
cols = []
for col in row:
cols.append(col.value)
row_list.append(cols)
df = pd.DataFrame(data = row_list[1:], index=None, columns=row_list[0])
cell_id = sheet.cell(row = 1, column = 3)
pk = cell_id.value
pk = re.sub('[\s]+', '', pk)
pk = int(re.sub(r'[^.,a-zA-Z0-9 \n\.]','', pk))
df['Id WMO'] = pk
frames.append(df)
result = pd.concat(frames)
result.to_excel(r'C:\Users\lala\OneDrive\Documents\Dataset\Dataset Stasiun BMKG Tahun {}.xlsx'.format(x), index = False)
The script works well until year_folder giving the output ('2000','2001','2002','2003','2004').
Here's the traceback.
---------------------------------------------------------------------------
BadZipFile Traceback (most recent call last)
<ipython-input-4-e8e2d94d1368> in <module>
7 for z in files:
8 pattern = path_folder + '{}\\{}\\{}'.format(x,y,z)
----> 9 wb = load_workbook(filename = pattern)
10 sheet = wb.active#has 1 sheet
11 max_row_for_Tn = max((b.row for b in sheet['B'] if b.value is not None))
~\anaconda3\envs\Pandas\lib\site-packages\openpyxl\reader\excel.py in load_workbook(filename, read_only, keep_vba, data_only, keep_links)
312 """
313 reader = ExcelReader(filename, read_only, keep_vba,
--> 314 data_only, keep_links)
315 reader.read()
316 return reader.wb
~\anaconda3\envs\Pandas\lib\site-packages\openpyxl\reader\excel.py in __init__(self, fn, read_only, keep_vba, data_only, keep_links)
122 def __init__(self, fn, read_only=False, keep_vba=KEEP_VBA,
123 data_only=False, keep_links=True):
--> 124 self.archive = _validate_archive(fn)
125 self.valid_files = self.archive.namelist()
126 self.read_only = read_only
~\anaconda3\envs\Pandas\lib\site-packages\openpyxl\reader\excel.py in _validate_archive(filename)
94 raise InvalidFileException(msg)
95
---> 96 archive = ZipFile(filename, 'r')
97 return archive
98
~\anaconda3\envs\Pandas\lib\zipfile.py in __init__(self, file, mode, compression, allowZip64)
1129 try:
1130 if mode == 'r':
-> 1131 self._RealGetContents()
1132 elif mode in ('w', 'x'):
1133 # set the modified flag so central directory gets written
~\anaconda3\envs\Pandas\lib\zipfile.py in _RealGetContents(self)
1196 raise BadZipFile("File is not a zip file")
1197 if not endrec:
-> 1198 raise BadZipFile("File is not a zip file")
1199 if self.debug > 1:
1200 print(endrec)
BadZipFile: File is not a zip file
The error message is exactly correct. Current versions of Excel use the .xlsx format, which are zip files containing a small directory tree. That format was not introduced until Excel 2007. Assuming those files really are from 2001, 2002, etc., they are in the old-style Excel .xls format, which is not a zip file. pandas does not know how to import .xls files. You may need to find a separate module to convert them.
It turned out one of the .xlsx file was duplicated. I deleted the duplicated file and the error isn't showing up anymore.
If anyone find the same error, you can check them separately if there's corrupted/doubled files in your directory.
It won't be a problem to use the same file name as long as the files contain different values in it.

Why there is a error message Exception: This file is already closed

1.I was trying to write a python code to get all contents of files in each subfolder and create a index for each content (file contents). All the contents for each file can be get successfully. However, when I run the code, it always shows an error message Exception: This file is already closed.
2.Here is the code for building an index for each content, could someone explain to me why this thing could happened?
The traceback:
python-input-49-38a47b2f8c0c> in <module>
39 print(searcher)
40
---> 41 writers.commit(optimize=True)
42
43 # from whoosh.query import *
~/.local/lib/python3.8/site-packages/whoosh/writing.py in commit(self, mergetype, optimize, merge)
928 else:
929 # Close segment files
--> 930 self._close_segment()
931 # Write TOC
932 self._commit_toc(finalsegments)
~/.local/lib/python3.8/site-packages/whoosh/writing.py in _close_segment(self)
841 def _close_segment(self):
842 if not self.perdocwriter.is_closed:
--> 843 self.perdocwriter.close()
844 if not self.fieldwriter.is_closed:
845 self.fieldwriter.close()
~/.local/lib/python3.8/site-packages/whoosh/codec/whoosh3.py in close(self)
265 for writer in self._colwriters.values():
266 writer.finish(self._doccount)
--> 267 self._cols.save_as_files(self._storage, self._column_filename)
268
269 # If vectors were written, close the vector writers
~/.local/lib/python3.8/site-packages/whoosh/filedb/compound.py in save_as_files(self, storage, name_fn)
295
296 def save_as_files(self, storage, name_fn):
--> 297 for name, blocks in self._readback():
298 f = storage.create_file(name_fn(name))
299 for block in blocks():
~/.local/lib/python3.8/site-packages/whoosh/filedb/compound.py in _readback(self)
276
277 yield (name, gen)
--> 278 temp.close()
279 self._tempstorage.delete_file(self._tempname)
280
~/.local/lib/python3.8/site-packages/whoosh/filedb/structfile.py in close(self)
121
122 if self.is_closed:
--> 123 raise Exception("This file is already closed")
124 if self.onclose:
125 self.onclose(self)
Exception: This file is already closed
import os
import codecs
import whoosh
from whoosh.index import create_in
from whoosh.fields import *
from whoosh.qparser import QueryParser
schema = Schema(title=TEXT(stored=True), path=ID(stored=True), content=TEXT,textdata=TEXT(stored=True))
ix = create_in("folder", schema)
filelist = []
for root, dirs, files in os.walk("./test_result"):
for file in files:
#append the file name to the list
filelist.append(os.path.join(root,file))
#print all the file names
writer = ix.writer()
i = 0
for name in filelist:
i = i +1
with codecs.open (name, "r",encoding='utf-8',
errors='ignore') as myfile:
text=myfile.read()
# print ("adding document "+name)
writer.add_document(title="document "+name, path="folder",content=text,textdata=text)
myfile.close()
print(text)
searcher = ix.searcher()
print(searcher)
writers.commit(optimize=True)
with statement handles resources management, including file closing. You could read more about it here.
This code:
f = open(file)
f.write("blablabla")
f.close
is equivalent to this:
with open(file) as f
f.write("blablabla")
This exception is a result of you trying to close a file that is already closed implicitly by with statement.
You only need to delete this line:
myfile.close()
EDIT:
I just explained the error in the code, but didn't notice the update in the comments. Please update the question itself with the mentioned line deleted.
On a side note, I see you used writers.commit() instead of writer.commit(), please make sure it's not a typo and update your question if your code still doesn't work.

Error reading Excel file with openpyxl 2.6.2 (was working under openpyxl 2.5.12)

I wrote a script that checks if Excel files contain the worksheet "Budget". This is working with Python 3.7 and openpyxl 2.5.12. However, since Anaconda updated openpyxl to version 2.6.2 it is no longer working (see error message below). I am having issues resolving this error message. Any help is appreciated.
# Import modules:
import os
import pandas as pd
from openpyxl import load_workbook
from pathlib import Path, PureWindowsPath
# Set correct path to Excel files:
excel_folder = PureWindowsPath("C:\\Users\\c3post\\Desktop\\Excel")
correct_excel_folder = Path(excel_folder)
print('Working directory: ', correct_excel_folder, '\n\n')
# Change current working directory so that opening relative paths will work:
os.chdir(correct_excel_folder)
# Add files to list:
files = os.listdir(correct_excel_folder)
# Filter 'xlsx' files and add them to new list:
files_xlsx = [f for f in files if f[-4:] == 'xlsx']
# Filter out xlsx files that do not contain the worksheet "Budget"
files_budget = []
for file in files_xlsx:
wb = load_workbook(file)
ws = wb.worksheets
if 'Budget' in str(ws):
files_budget.append(file)
print('Files that contain worksheet "Budget": ', files_budget, '\n\n')
print('Files that do not contain worksheet "Budget": ', set(files_xlsx) - set(files_budget))
Error message:
KeyError Traceback (most recent call last)
<ipython-input-2-fa1881e560e0> in <module>
29
30 for file in files_xlsx:
---> 31 wb = load_workbook(file)
32 ws = wb.worksheets
33 if 'Budget' in str(ws):
~/anaconda3/lib/python3.7/site-packages/openpyxl/reader/excel.py in load_workbook(filename, read_only, keep_vba, data_only, keep_links)
310 reader = ExcelReader(filename, read_only, keep_vba,
311 data_only, keep_links)
--> 312 reader.read()
313 return reader.wb
~/anaconda3/lib/python3.7/site-packages/openpyxl/reader/excel.py in read(self)
272 self.read_theme()
273 apply_stylesheet(self.archive, self.wb)
--> 274 self.read_worksheets()
275 self.parser.assign_names()
276 if not self.read_only:
~/anaconda3/lib/python3.7/site-packages/openpyxl/reader/excel.py in read_worksheets(self)
226 ws._rels = rels
227 ws_parser = WorksheetReader(ws, fh, self.shared_strings, self.data_only)
--> 228 ws_parser.bind_all()
229
230 # assign any comments to cells
~/anaconda3/lib/python3.7/site-packages/openpyxl/worksheet/_reader.py in bind_all(self)
401
402 def bind_all(self):
--> 403 self.bind_cells()
404 self.bind_merged_cells()
405 self.bind_hyperlinks()
~/anaconda3/lib/python3.7/site-packages/openpyxl/worksheet/_reader.py in bind_cells(self)
324
325 def bind_cells(self):
--> 326 for idx, row in self.parser.parse():
327 for cell in row:
328 style = self.ws.parent._cell_styles[cell['style_id']]
~/anaconda3/lib/python3.7/site-packages/openpyxl/worksheet/_reader.py in parse(self)
148 element.clear()
149 elif tag_name == ROW_TAG:
--> 150 row = self.parse_row(element)
151 element.clear()
152 yield row
~/anaconda3/lib/python3.7/site-packages/openpyxl/worksheet/_reader.py in parse_row(self, row)
270 if keys != set(['r', 'spans']) and keys != set(['r']):
271 # don't create dimension objects unless they have relevant information
--> 272 self.row_dimensions[attrs['r']] = attrs
273
274 cells = [self.parse_cell(el) for el in row]
KeyError: 'r'

Import audio files in python for analysis e.g Signal Analysis

I am trying to import a .wav file to perform Signal analysis on it. I have used all the Ipython,wave libraries that i am meant to import but its still showing me error.Some of the libraries were gotten from a book downloaded from git hub repository(https://github.com/AllenDowney/ThinkDSP). Can anyone one show me what is wrong with the code?
(This is after importing all necessary libraries in the book and in python)
Error Traceback (most recent call last)
in ()
----> 1 wave= thinkdsp.read_wave('365515__noedell__noedell-shady-scheme-01.wav')
C:\Users\Ademola\Desktop\500 Level\DSP\DSP_Python\ThinkDSP-master\ThinkDSP-master\code\thinkdsp.py in read_wave(filename)
99 returns: Wave
100 """
--> 101 fp = open_wave(filename, 'r')
102
103 nchannels = fp.getnchannels()
C:\Users\Ademola\Anaconda3\lib\wave.py in open(f, mode)
497 mode = 'rb'
498 if mode in ('r', 'rb'):
--> 499 return Wave_read(f)
500 elif mode in ('w', 'wb'):
501 return Wave_write(f)
C:\Users\Ademola\Anaconda3\lib\wave.py in init(self, f)
161 # else, assume it is an open file object already
162 try:
--> 163 self.initfp(f)
164 except:
165 if self._i_opened_the_file:
C:\Users\Ademola\Anaconda3\lib\wave.py in initfp(self, file)
141 chunkname = chunk.getname()
142 if chunkname == b'fmt ':
--> 143 self._read_fmt_chunk(chunk)
144 self._fmt_chunk_read = 1
145 elif chunkname == b'data':
C:\Users\Ademola\Anaconda3\lib\wave.py in _read_fmt_chunk(self, chunk)
258 self._sampwidth = (sampwidth + 7) // 8
259 else:
--> 260 raise Error('unknown format: %r' % (wFormatTag,))
261 self._framesize = self._nchannels * self._sampwidth
262 self._comptype = 'NONE'
Error: unknown format: 3
Without seeing your code its hard to answer your question...you can read wav files with the wav module that comes standard in python. Basic syntax below:
import wave
wav = wave.open('wavFile.wav', 'r')
here is the documentation:
https://docs.python.org/2/library/wave.html
Let me know if this helps!

Python call script still failing

I've spent hours trying to run this script, but for some reason it simply cannot function properly. 'Sex' is now a global install, so it calls that fine, but something else seemingly fails. The thing that looks like it fails is the thing it's supposed to create, unfortunately. I even added a simple print statement after the def statement, and it wouldn't print, it just fails automatically. Any insight?
Code:
def objmask(inimgs, inwhts, thresh1='20.0', thresh2='2.0', tfdel=True,
xceng=3001., yceng=3001., outdir='.', tmpdir='tmp'):
print "c"
# initial detection of main galaxy with SExtractor for re-centering purposes
if outdir!='.':
if not os.path.exists(outdir):
os.makedirs(outdir)
print inimgs
if not os.path.exists(tmpdir):
os.makedirs(tmpdir)
for c in range(np.size(inimgs)):
print 'Creating Aperture Run:', c
subprocess.call(['sex',inimgs[c],'-c','/home/vidur/se_files/gccg.sex',
'-CATALOG_NAME','/home/vidur/se_files/tmp'+str(c)+'.cat',
'-PARAMETERS_NAME','/home/vidur/se_files/gccg_ell.param',
'-FILTER_NAME','/home/vidur/se_files/gccg.conv',
'-STARNNW_NAME','/home/vidur/se_files/gccg.nnw',
'-CHECKIMAGE_TYPE','APERTURES',
'-VERBOSE_TYPE','QUIET',
'-DETECT_THRESH',thresh1,
'-ANALYSIS_THRESH',thresh2,
'-WEIGHT_IMAGE',inwhts[c]],shell=True
)
Error:
---------------------------------------------------------------------------
IOError Traceback (most recent call last)
/home/vidur/se_files/<ipython-input-2-bbc58f9e134a> in <module>()
----> 1 fetch_swarp2.objmask(['sciPHOTOf105w0.fits'],['whtPHOTOf105w0.fits'])
/home/vidur/se_files/fetch_swarp2.pyc in objmask(inimgs, inwhts, thresh1, thresh2, tfdel, xceng, yceng, outdir, tmpdir)
116 secat=asciitable.read('./se_files/_tmp_seobj'+str(c)+'.cat',
117 names=['flux','ferr','xmin','ymin','xmax','ymax',
--> 118 'xc','yc','cxx','cyy','cxy'])
119 robj = np.sqrt((secat['xc']-xceng)**2.0+(secat['yc']-yceng)**2.0)
120 rmin = (robj==np.min(robj))
/usr/local/lib/python2.7/dist-packages/asciitable-0.8.0-py2.7.egg/asciitable/ui.pyc in read(table, numpy, guess, **kwargs)
129 guess = _GUESS
130 if guess:
--> 131 dat = _guess(table, new_kwargs)
132 else:
133 reader = get_reader(**new_kwargs)
/usr/local/lib/python2.7/dist-packages/asciitable-0.8.0-py2.7.egg/asciitable/ui.pyc in _guess(table, read_kwargs)
173 try:
174 reader = get_reader(**guess_kwargs)
--> 175 dat = reader.read(table)
176 # When guessing impose additional requirements on column names and number of cols
177 bads = [" ", ",", "|", "\t", "'", '"']
/usr/local/lib/python2.7/dist-packages/asciitable-0.8.0-py2.7.egg/asciitable/core.pyc in read(self, table)
839 self.header.data = self.data
840
--> 841 self.lines = self.inputter.get_lines(table)
842 self.data.get_data_lines(self.lines)
843 self.header.get_cols(self.lines)
/usr/local/lib/python2.7/dist-packages/asciitable-0.8.0-py2.7.egg/asciitable/core.pyc in get_lines(self, table)
155 table = table.read()
156 elif '\n' not in table and '\r' not in table + '':
--> 157 table = open(table, 'r').read()
158 lines = table.splitlines()
159 except TypeError:
IOError: [Errno 2] No such file or directory: './se_files/_tmp_seobj0.cat'
The most relevant portion of the error seems to be the end.
P.S. I run Ubuntu 12.04 32-bit

Categories