Writing data to an existing excel sheet using openpyxl - python

I'm quit new to coding in general.
What i want to achieve is to make an script that runs to a list of employers in excel and weekly generate a new hour-sheet. And by generating i mean copy for every employer an empty hour-sheet and rename it, and also change the week-number and employer-name in the newly made copy.
I didn't start with a loop, because i first wanted to made the part that change the employers-name and week-number. I've already search the internet for some answers, but i can't get the code to work, keep getting error messages.
So here is my code so far:
import os
import shutil
import time
from openpyxl import load_workbook
#calculate the year and week number
from time import strftime
year = (time.strftime("%Y"))
week = str(int(time.strftime("%W"))+1)
year_week = year + "_" + week
#create weekly houresheets per employer
employer = "Adam"
hsheets_dir = "C:\\test\\"
old_file_name = "blanco.xlsx"
new_file_name = employer + "_" + year_week + ".xlsx"
dest_filename = (hsheets_dir + new_file_name)
shutil.copy2((hsheets_dir + old_file_name), dest_filename)
#change employer name and weeknumber
def insert_xlsx(dest, empl, wk):
#Open an xlsx for reading
print (dest)
wb = load_workbook(filename = dest)
#Get the current Active Sheet
ws = wb.get_sheet_by_name("Auto")
ws.cell(row=1,column=2).value = empl
ws.cell(row=2,column=2).value = wk
wb.save(dest)
insert_xlsx(dest_filename, employer, week_str)
And here is the error message i keep getting:
Traceback (most recent call last):
File "G:\ALL\Urenverantwoording\Wekelijks\Genereer_weekstaten.py", line 46, in <module>
insert_xlsx(dest_filename, employer, week)
File "G:\ALL\Urenverantwoording\Wekelijks\Genereer_weekstaten.py", line 44, in insert_xlsx
wb.save(dest)
File "C:\Python34\lib\site-packages\openpyxl\workbook\workbook.py", line 298, in save
save_workbook(self, filename)
File "C:\Python34\lib\site-packages\openpyxl\writer\excel.py", line 198, in save_workbook
writer.save(filename, as_template=as_template)
File "C:\Python34\lib\site-packages\openpyxl\writer\excel.py", line 181, in save
self.write_data(archive, as_template=as_template)
File "C:\Python34\lib\site-packages\openpyxl\writer\excel.py", line 87, in write_data
self._write_worksheets(archive)
File "C:\Python34\lib\site-packages\openpyxl\writer\excel.py", line 114, in _write_worksheets
write_worksheet(sheet, self.workbook.shared_strings,
File "C:\Python34\lib\site-packages\openpyxl\writer\worksheet.py", line 302, in write_worksheet
xf.write(comments)
File "C:\Python34\lib\contextlib.py", line 66, in __exit__
next(self.gen)
File "C:\Python34\lib\site-packages\openpyxl\xml\xmlfile.py", line 51, in element
self._write_element(el)
File "C:\Python34\lib\site-packages\openpyxl\xml\xmlfile.py", line 78, in _write_element
xml = tostring(element)
File "C:\Python34\lib\xml\etree\ElementTree.py", line 1126, in tostring
short_empty_elements=short_empty_elements)
File "C:\Python34\lib\xml\etree\ElementTree.py", line 778, in write
short_empty_elements=short_empty_elements)
File "C:\Python34\lib\xml\etree\ElementTree.py", line 943, in _serialize_xml
short_empty_elements=short_empty_elements)
File "C:\Python34\lib\xml\etree\ElementTree.py", line 943, in _serialize_xml
short_empty_elements=short_empty_elements)
File "C:\Python34\lib\xml\etree\ElementTree.py", line 935, in _serialize_xml
v = _escape_attrib(v)
File "C:\Python34\lib\xml\etree\ElementTree.py", line 1093, in _escape_attrib
_raise_serialization_error(text)
File "C:\Python34\lib\xml\etree\ElementTree.py", line 1059, in _raise_serialization_error
"cannot serialize %r (type %s)" % (text, type(text).__name__)
TypeError: cannot serialize 3 (type int)
Can somewone put me in the right directions?
Many thanks

I think based on your responses then that the problem lies with your existing hour-sheet Excel spreadsheet:
Try starting with a copy of your existing spreadsheet and removing all of the entries. Hopefully this too will work.
If this fails, start with a new blank spreadsheet.
Bit by bit copy the existing data and repeat your script.
By doing this you will might be able to isolate the feature which is not compatible with openpyxl.
Alternatively, you might be able to write the whole thing from your Python script, and skip trying to modify a semi-filled in one. This would then be 100% compatible.

Related

Python Pandas xlsxwriter failing to close

I am building automation for Excel a multi-tabbed excel document. When I try to close the document I get the error below (full traceback, minus the personal details at the top), which then is corrupted and I cannot open the xlsx document. Unfortunately I haven't found any clues to go off of. I am using xlsxwriter functions to set row and column formatting, from what I've found this could be an issue but I haven't been able to track it down. Any thoughts on possible solutions?
writer.close()
File "/opt/homebrew/lib/python3.10/site-packages/pandas/io/excel/_base.py", line 1480, in close
self._save()
File "/opt/homebrew/lib/python3.10/site-packages/pandas/io/excel/_xlsxwriter.py", line 244, in _save
self.book.close()
File "/opt/homebrew/lib/python3.10/site-packages/xlsxwriter/workbook.py", line 324, in close
self._store_workbook()
File "/opt/homebrew/lib/python3.10/site-packages/xlsxwriter/workbook.py", line 709, in _store_workbook
xml_files = packager._create_package()
File "/opt/homebrew/lib/python3.10/site-packages/xlsxwriter/packager.py", line 137, in _create_package
self._write_worksheet_files()
File "/opt/homebrew/lib/python3.10/site-packages/xlsxwriter/packager.py", line 193, in _write_worksheet_files
worksheet._assemble_xml_file()
File "/opt/homebrew/lib/python3.10/site-packages/xlsxwriter/worksheet.py", line 4221, in _assemble_xml_file
self._write_cols()
File "/opt/homebrew/lib/python3.10/site-packages/xlsxwriter/worksheet.py", line 5807, in _write_cols
self._write_col_info(self.colinfo[col])
File "/opt/homebrew/lib/python3.10/site-packages/xlsxwriter/worksheet.py", line 5836, in _write_col_info
if width > 0:
TypeError: '>' not supported between instances of 'Format' and 'int'

File doesnt exist error before user inputs file name

I am working with streamlit in python to produce a tool that takes a user's input of a csv filename, and then carries out cleaning/tabulating of the data within the file.
I have encountered an issue where before the user has entered their filename, my streamlit site shows a "FileNotFoundError: [Errno 2] No such file or directory:"
This is expected because the user has not entered their filename yet - however once filename is entered the code runs smoothly. I am hoping to overcome this issue but as a relative newcomer to Python I am quite unsure how!
Please see code snippet below
autocall_gbp_file = str(st.text_input("Please type in your Autocall File Name (GBP)"))
filepath = M:/Desktop/AutomationProject/
express_gbp = pd.read_csv(filepath + autocall_gbp_file + ".csv")
st.write('Saved!')
The exact error I get before any user input has been taken is:
FileNotFoundError: [Errno 2] No such file or directory:
'M:/Desktop/AutomationProject/.csv'
Traceback:
File "C:\Users\adavie18\.conda\envs\projectenv\lib\site-
packages\streamlit\scriptrunner\script_runner.py", line 475, in
_run_script
exec(code, module.__dict__)
File "M:\Desktop\AutomationProject\AutocallApp.py", line 179, in
<module>
express_gbp = pd.read_csv(filepath+autocall_gbp_file+".csv")
File "C:\Users\adavie18\.conda\envs\projectenv\lib\site-
packages\pandas\util\_decorators.py", line 311, in wrapper
return func(*args, **kwargs)
File "C:\Users\adavie18\.conda\envs\projectenv\lib\site-
packages\pandas\io\parsers\readers.py", line 680, in read_csv
return _read(filepath_or_buffer, kwds)
File "C:\Users\adavie18\.conda\envs\projectenv\lib\site-
packages\pandas\io\parsers\readers.py", line 575, in _read
parser = TextFileReader(filepath_or_buffer, **kwds)
File "C:\Users\adavie18\.conda\envs\projectenv\lib\site-
packages\pandas\io\parsers\readers.py", line 933, in __init__
self._engine = self._make_engine(f, self.engine)
File "C:\Users\adavie18\.conda\envs\projectenv\lib\site-
packages\pandas\io\parsers\readers.py", line 1217, in _make_engine
self.handles = get_handle( # type: ignore[call-overload]
File "C:\Users\adavie18\.conda\envs\projectenv\lib\site-
packages\pandas\io\common.py", line 789, in get_handle
handle = open(
Thanks in advance to anyone who can offer a suggestion!
The general pattern for both Streamlit and Python in general is to test for the value existing:
if autocall_gbp_file:
express_gbp = pd.read_csv(filepath + autocall_gbp_file + ".csv")
When the Streamlit app runs before a user inputs something, the value of autocall_gbp_file is None. By writing if autocall_gbp_file:, you're only running the pandas read_csv after someone has entered a value.
Separately, you're better off developing this with st.file_uploader than using text_input, as the Streamlit app doesn't necessarily have access to the user filesystem and same drive mapping as the machine you are developing on. By using st.file_uploader, you're literally providing the actual file, not a reference to where it might be located.

loop through and load a zipped folder of yaml files

I have a zipped folder containing 15 000 yaml files. I'd like to iterate through the folder using yaml.safe_load so that each file is in a dictionary format and I can extract information from each file that I need. I've written some code so far using zipfile.ZipFile and yaml.safe_load but it only works for the first file in the zipped folder. Would anyone please mind taking a look and explaining what I'm misunderstanding please?
zip_file = zipfile.ZipFile("D:/export.zip")
files = zip_file.namelist()
print(files)
for i in range(10):
with zip_file.open(files[i]) as yamlfile:
yamlreader = yaml.safe_load(yamlfile)
print(yamlreader["identifier"])
for now I'm just iterating through 10 files to make life easier. Eventually I'd like to do the whole 15 000. "identifier" is a key in the yaml file.
This is the error:
10.5281/zenodo.1014773
Traceback (most recent call last):
File "C:/Users/estho/PycharmProjects/GSOC3/testing_dataextraction.py", line 20, in <module>
yamlreader = yaml.safe_load(yamlfile)
File "C:\Users\estho\PycharmProjects\GSOC3\lib\site-packages\yaml\__init__.py", line 162, in safe_load
return load(stream, SafeLoader)
File "C:\Users\estho\PycharmProjects\GSOC3\lib\site-packages\yaml\__init__.py", line 114, in load
return loader.get_single_data()
File "C:\Users\estho\PycharmProjects\GSOC3\lib\site-packages\yaml\constructor.py", line 41, in get_single_data
node = self.get_single_node()
File "C:\Users\estho\PycharmProjects\GSOC3\lib\site-packages\yaml\composer.py", line 36, in get_single_node
document = self.compose_document()
File "C:\Users\estho\PycharmProjects\GSOC3\lib\site-packages\yaml\composer.py", line 55, in compose_document
node = self.compose_node(None, None)
File "C:\Users\estho\PycharmProjects\GSOC3\lib\site-packages\yaml\composer.py", line 84, in compose_node
node = self.compose_mapping_node(anchor)
File "C:\Users\estho\PycharmProjects\GSOC3\lib\site-packages\yaml\composer.py", line 127, in compose_mapping_node
while not self.check_event(MappingEndEvent):
File "C:\Users\estho\PycharmProjects\GSOC3\lib\site-packages\yaml\parser.py", line 98, in check_event
self.current_event = self.state()
File "C:\Users\estho\PycharmProjects\GSOC3\lib\site-packages\yaml\parser.py", line 428, in parse_block_mapping_key
if self.check_token(KeyToken):
File "C:\Users\estho\PycharmProjects\GSOC3\lib\site-packages\yaml\scanner.py", line 116, in check_token
self.fetch_more_tokens()
File "C:\Users\estho\PycharmProjects\GSOC3\lib\site-packages\yaml\scanner.py", line 260, in fetch_more_tokens
self.get_mark())
yaml.scanner.ScannerError: while scanning for the next token
found character '\t' that cannot start any token
in "yamlfile_10_5281_zenodo_1745362.yaml", line 4, column 1
Thank you.
It seems to me like in the file "yamlfile_10_5281_zenodo_1745362.yaml" there is a bad token name. Try running it without this file. In python \t is representative of a tab and so cannot be included in a string ect normally without escaping it.

Pyexcel doesn't merge .xls files

I'm trying to merge multiple .xls files into a single workbook, where each file is inserted into a sheet, named with the .xls filename.
While surfing on web, I've seen the documentation of Pyexcel and a specific module which, as written here, could do the job easly.
Here's the code.
from pyexcel.cookbook import merge_all_to_a_book
import glob
merge_all_to_a_book(glob.glob("Dir\*.xls"),"output.xls")
As expected, it doesn't work. Here's the console output.
File "..\Desktop\scripts\provaimport.py", line 48, in <module>
merge_all_to_a_book(glob.glob("C:\Users\Tesisti\Desktop\forpythonscript\*.xls"),"output.xls")
File "C:\Users\Tesisti\Anaconda2\lib\site-packages\pyexcel\cookbook.py", line 148, in merge_all_to_a_book
merged.save_as(outfilename)
File "C:\Users\Tesisti\Anaconda2\lib\site-packages\pyexcel\internal\meta.py", line 339, in save_as
return save_book(self, file_name=filename, **keywords)
File "C:\Users\Tesisti\Anaconda2\lib\site-packages\pyexcel\internal\core.py", line 51, in save_book
return _save_any(a_source, book)
File "C:\Users\Tesisti\Anaconda2\lib\site-packages\pyexcel\internal\core.py", line 55, in _save_any
a_source.write_data(instance)
File "C:\Users\Tesisti\Anaconda2\lib\site-packages\pyexcel\plugins\sources\file_output.py", line 38, in write_data
**self._keywords)
File "C:\Users\Tesisti\Anaconda2\lib\site-packages\pyexcel\plugins\renderers\excel.py", line 30, in render_book_to_file
save_data(file_name, book.to_dict(), **keywords)
File "C:\Users\Tesisti\Anaconda2\lib\site-packages\pyexcel_io\io.py", line 119, in save_data
**keywords)
File "C:\Users\Tesisti\Anaconda2\lib\site-packages\pyexcel_io\io.py", line 141, in store_data
writer.write(data)
File "C:\Users\Tesisti\Anaconda2\lib\site-packages\pyexcel_io\book.py", line 58, in __exit__
self.close()
File "C:\Users\Tesisti\Anaconda2\lib\site-packages\pyexcel_xls\xlsw.py", line 86, in close
self.work_book.save(self._file_alike_object)
File "C:\Users\Tesisti\Anaconda2\lib\site-packages\xlwt\Workbook.py", line 710, in save
doc.save(filename_or_stream, self.get_biff_data())
File "C:\Users\Tesisti\Anaconda2\lib\site-packages\xlwt\Workbook.py", line 680, in get_biff_data
self.__worksheets[self.__active_sheet].selected = True
Any idea on how to fix?
It seems to me that glob.glob("Dir*.xls") returned an empty list of files. Hence pyexcel's plugin pyexcel-xls fails to create an empty file.
The current solution, I would recommend is to take the latest pyexcel-xls and use try-except statement around merge_all_to_a_book, catching empty file case.

Pandas sometimes writes empty or damaged files

I've been using pandas for a while and I think it is a great tool. I made a program to generate some excel files from some data collected by the user. The final user have been testing and using it for 6 months; it never failled till yesterday, when it generated a dagamaged excel file. When I opened it with a text editor, it was totally blank. The code to generate this file is this:
escritor = pandas.ExcelWriter(direccion, engine='xlsxwriter')
listaTotal.to_excel(escritor, index = False)
escritor.save()
and:
escritor = pandas.ExcelWriter(direccion + '.xlsx', engine='xlsxwriter')
self.listaFact.to_excel(escritor, index = False, startrow = 1, startcol = 0, sheet_name = 'Hoja1')
escritor.save()
The second code fragment also uses some format options for the 'xlsxwriter', an example here:
format = workbook.add_format()
format.set_font_size(9)
format.set_font_name('Sans Serif 12cpi')
format.set_border()
format.set_text_wrap()
This error happened twice; about 1 month ago and yesterday. I can't duplicate the error, I don't know what happened. And also the traceback is here, it shows the problem when the program reads the file, but this file was generated by the code posted before:
Exception in Tkinter callback
Traceback (most recent call last):
File "C:\Python27\lib\lib-tk\Tkinter.py", line 1532, in __call__
return self.func(*args)
File "C:\Users\WINNER\Documents\Visual Studio 2013\Projects\PythonApplication4\PythonApplication4\PythonApplication4.py", line 792, in botonGenerarPedido
self.generarPedido()
File "C:\Users\WINNER\Documents\Visual Studio 2013\Projects\PythonApplication4\PythonApplication4\PythonApplication4.py", line 904, in generarPedido
self.generarVentasDia()
File "C:\Users\WINNER\Documents\Visual Studio 2013\Projects\PythonApplication4\PythonApplication4\PythonApplication4.py", line 927, in generarVentasDia
listaTotal = pandas.io.excel.read_excel(direccion)
File "C:\Python27\lib\site-packages\pandas\io\excel.py", line 151, in read_excel
return ExcelFile(io, engine=engine).parse(sheetname=sheetname, **kwds)
File "C:\Python27\lib\site-packages\pandas\io\excel.py", line 188, in __init__
self.book = xlrd.open_workbook(io)
File "C:\Python27\lib\site-packages\xlrd\__init__.py", line 435, in open_workbook
ragged_rows=ragged_rows,
File "C:\Python27\lib\site-packages\xlrd\book.py", line 91, in open_workbook_xls
biff_version = bk.getbof(XL_WORKBOOK_GLOBALS)
File "C:\Python27\lib\site-packages\xlrd\book.py", line 1230, in getbof
bof_error('Expected BOF record; found %r' % self.mem[savpos:savpos+8])
File "C:\Python27\lib\site-packages\xlrd\book.py", line 1224, in bof_error
raise XLRDError('Unsupported format, or corrupt file: ' + msg)
XLRDError: Unsupported format, or corrupt file: Expected BOF record; found '\x00\x00\x00\x00\x00\x00\x00\x00'

Categories