Reading a messy excel file with python

Reading a messy excel file with python - python

I have been looking for solutions to this problem on stack exchange and beyond, and so far I'm not able to find a one.
I'm sure someone has encountered this problem before: I'm writing a python script which will extract and repurpose some data from an excel file - the catch being that the excel file is rife with irregular formatting and extraneous data. So, before I can get down to the table of data I need:
I have to go through tables like this:
My plan is to use some kind of regex or string recognition to know where to split the file so I can get to what I need. But the problem I'm having right now is that pandas freaks out whenever I try to run read_excel on this file.
In [4]: df = pd.read_excel(open('data.xlsx','rb'), sheetname=0)
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-4-21f5fee2b08d> in <module>()
----> 1 df = pd.read_excel(open('data.xlsx','rb'), sheetname=0)
/Users/Gus/anaconda2/lib/python2.7/site-packages/pandas/io/excel.pyc in read_excel(io, sheetname, header, skiprows, skip_footer, index_col, names, parse_cols, parse_dates, date_parser, na_values, thousands, convert_float, has_index_names, converters, engine, squeeze, **kwds)
168 """
169 if not isinstance(io, ExcelFile):
--> 170 io = ExcelFile(io, engine=engine)
171
172 return io._parse_excel(
/Users/Gus/anaconda2/lib/python2.7/site-packages/pandas/io/excel.pyc in __init__(self, io, **kwds)
223 # N.B. xlrd.Book has a read attribute too
224 data = io.read()
--> 225 self.book = xlrd.open_workbook(file_contents=data)
226 elif isinstance(io, compat.string_types):
227 self.book = xlrd.open_workbook(io)
/Users/Gus/anaconda2/lib/python2.7/site-packages/xlrd/__init__.pyc in open_workbook(filename, logfile, verbosity, use_mmap, file_contents, encoding_override, formatting_info, on_demand, ragged_rows)
420 formatting_info=formatting_info,
421 on_demand=on_demand,
--> 422 ragged_rows=ragged_rows,
423 )
424 return bk
/Users/Gus/anaconda2/lib/python2.7/site-packages/xlrd/xlsx.pyc in open_workbook_2007_xml(zf, component_names, logfile, verbosity, use_mmap, formatting_info, on_demand, ragged_rows)
831 x12sheet = X12Sheet(sheet, logfile, verbosity)
832 heading = "Sheet %r (sheetx=%d) from %r" % (sheet.name, sheetx, fname)
--> 833 x12sheet.process_stream(zflo, heading)
834 del zflo
835
/Users/Gus/anaconda2/lib/python2.7/site-packages/xlrd/xlsx.pyc in own_process_stream(self, stream, heading)
551 self.do_dimension(elem)
552 elif elem.tag == U_SSML12 + "mergeCell":
--> 553 self.do_merge_cell(elem)
554 self.finish_off()
555
/Users/Gus/anaconda2/lib/python2.7/site-packages/xlrd/xlsx.pyc in do_merge_cell(self, elem)
607 ref = elem.get('ref')
608 if ref:
--> 609 first_cell_ref, last_cell_ref = ref.split(':')
610 first_rowx, first_colx = cell_name_to_rowx_colx(first_cell_ref)
611 last_rowx, last_colx = cell_name_to_rowx_colx(last_cell_ref)
ValueError: need more than 1 value to unpack
The whole point of my writing this program is so that I don't have to go in to every single one of these files and delete information by hand. But how can I automate this process if python won't even accept the file? I'm hoping someone here will have encountered a similar problem before. What was your solution?

Related

ValueError: Invalid file path or buffer object type

I've been using mplsoccer library and statsbombpy libraries for a while now with success.
Recently, I've tried to use it again with this code (not fully reproducible due to it being behind a paid api).
!pip install mplsoccer
import pandas as pd
import requests
from mplsoccer.statsbomb import read_event
username = creds['user']
password = creds['passwd']
auth = requests.auth.HTTPBasicAuth(username, password)
URL = 'https://data.statsbombservices.com/api/v5/events/18241'
response = requests.get(URL, auth = auth)
df_dict = read_event(response)
and I'm now starting to get the ValueError of invalid file path or buffer type. I contacted the owner of mplsoccer and asked him about it, and he said it wasn't a reproducible error for him, but it looks like my pandas is having trouble reading it.
response is returning exactly what it should be, it just fails with the below error code
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-64-9be43ec4f519> in <module>
6 URL = 'https://data.statsbombservices.com/api/v5/events/7430'
7 response = requests.get(URL, auth=auth)
----> 8 df_dict = read_event(response)
E:\py\lib\site-packages\mplsoccer\statsbomb.py in read_event(path_or_buf, related_event_df, shot_freeze_frame_df, tactics_lineup_df, warn)
120 match_id = int(path_or_buf.url.split('/')[-1].split('.')[0])
121 else:
--> 122 df = pd.read_json(path_or_buf, encoding='utf-8')
123 match_id = int(os.path.basename(path_or_buf)[:-5])
124
E:\py\lib\site-packages\pandas\util\_decorators.py in wrapper(*args, **kwargs)
205 else:
206 kwargs[new_arg_name] = new_arg_value
--> 207 return func(*args, **kwargs)
208
209 return cast(F, wrapper)
E:\py\lib\site-packages\pandas\util\_decorators.py in wrapper(*args, **kwargs)
309 stacklevel=stacklevel,
310 )
--> 311 return func(*args, **kwargs)
312
313 return wrapper
E:\py\lib\site-packages\pandas\io\json\_json.py in read_json(path_or_buf, orient, typ, dtype, convert_axes, convert_dates, keep_default_dates, numpy, precise_float, date_unit, encoding, encoding_errors, lines, chunksize, compression, nrows, storage_options)
588 convert_axes = True
589
--> 590 json_reader = JsonReader(
591 path_or_buf,
592 orient=orient,
E:\py\lib\site-packages\pandas\io\json\_json.py in __init__(self, filepath_or_buffer, orient, typ, dtype, convert_axes, convert_dates, keep_default_dates, numpy, precise_float, date_unit, encoding, lines, chunksize, compression, nrows, storage_options, encoding_errors)
673 raise ValueError("nrows can only be passed if lines=True")
674
--> 675 data = self._get_data_from_filepath(filepath_or_buffer)
676 self.data = self._preprocess_data(data)
677
E:\py\lib\site-packages\pandas\io\json\_json.py in _get_data_from_filepath(self, filepath_or_buffer)
710 or file_exists(filepath_or_buffer)
711 ):
--> 712 self.handles = get_handle(
713 filepath_or_buffer,
714 "r",
E:\py\lib\site-packages\pandas\io\common.py in get_handle(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)
606
607 # open URLs
--> 608 ioargs = _get_filepath_or_buffer(
609 path_or_buf,
610 encoding=encoding,
E:\py\lib\site-packages\pandas\io\common.py in _get_filepath_or_buffer(filepath_or_buffer, encoding, compression, mode, storage_options)
393 if not is_file_like(filepath_or_buffer):
394 msg = f"Invalid file path or buffer object type: {type(filepath_or_buffer)}"
--> 395 raise ValueError(msg)
396
397 return IOArgs(
ValueError: Invalid file path or buffer object type: <class 'requests_cache.models.response.CachedResponse'>
hoping someone can help me see exactly where pandas is struggling and what I can do to fix it? Thanks

Unsupported format, or corrupt file: Expected BOF record; found b'\n\n\n\n\n\n<!'

what I doing wrong. I tried to parse Excel file from my Github, buthave an error: Unsupported format, or corrupt file: Expected BOF record; found b'\n\n\n\n\n\n<!. I did that manupulations in Spyder on my laptop and in Google Colab and get the same sad result. I am beginner at Github, maybe I`ve done something wrong with my .xlsx and it reading incorrect?
import pandas as pd
import requests as rq
import io
from io import BytesIO
put_k_ses = 'https://github.com/valeriigamaley/Kosh-Agach-SPS-MLDS/blob/024eb349c40174edbcd55e09e70f7fbc685c8ca6/GenInsolKoshAgachSPS.xlsx'
data1 = rq.get(put_k_ses).content
dannye_gener = pd.read_excel(io.BytesIO(data1))
print (dannye_gener)
Error looks like that:
XLRDError Traceback (most recent call last)
<ipython-input-8-85891c30428a> in <module>()
3 # путь к файлу с данными по инсоляции и генерации
4 data1 = rq.get(put_k_ses).content
----> 5 dannye_gener = pd.read_excel(io.BytesIO(data1))
6 print (dannye_gener)
9 frames
/usr/local/lib/python3.7/dist-packages/pandas/util/_decorators.py in wrapper(*args, **kwargs)
294 )
295 warnings.warn(msg, FutureWarning, stacklevel=stacklevel)
--> 296 return func(*args, **kwargs)
297
298 return wrapper
/usr/local/lib/python3.7/dist-packages/pandas/io/excel/_base.py in read_excel(io, sheet_name, header, names, index_col, usecols, squeeze, dtype, engine, converters, true_values, false_values, skiprows, nrows, na_values, keep_default_na, na_filter, verbose, parse_dates, date_parser, thousands, comment, skipfooter, convert_float, mangle_dupe_cols)
302
303 if not isinstance(io, ExcelFile):
--> 304 io = ExcelFile(io, engine=engine)
305 elif engine and engine != io.engine:
306 raise ValueError(
/usr/local/lib/python3.7/dist-packages/pandas/io/excel/_base.py in __init__(self, path_or_buffer, engine)
865 self._io = stringify_path(path_or_buffer)
866
--> 867 self._reader = self._engines[engine](self._io)
868
869 def __fspath__(self):
/usr/local/lib/python3.7/dist-packages/pandas/io/excel/_xlrd.py in __init__(self, filepath_or_buffer)
20 err_msg = "Install xlrd >= 1.0.0 for Excel support"
21 import_optional_dependency("xlrd", extra=err_msg)
---> 22 super().__init__(filepath_or_buffer)
23
24 #property
/usr/local/lib/python3.7/dist-packages/pandas/io/excel/_base.py in __init__(self, filepath_or_buffer)
349 # N.B. xlrd.Book has a read attribute too
350 filepath_or_buffer.seek(0)
--> 351 self.book = self.load_workbook(filepath_or_buffer)
352 elif isinstance(filepath_or_buffer, str):
353 self.book = self.load_workbook(filepath_or_buffer)
/usr/local/lib/python3.7/dist-packages/pandas/io/excel/_xlrd.py in load_workbook(self, filepath_or_buffer)
33 if hasattr(filepath_or_buffer, "read"):
34 data = filepath_or_buffer.read()
---> 35 return open_workbook(file_contents=data)
36 else:
37 return open_workbook(filepath_or_buffer)
/usr/local/lib/python3.7/dist-packages/xlrd/__init__.py in open_workbook(filename, logfile, verbosity, use_mmap, file_contents, encoding_override, formatting_info, on_demand, ragged_rows)
160 formatting_info=formatting_info,
161 on_demand=on_demand,
--> 162 ragged_rows=ragged_rows,
163 )
164 return bk
/usr/local/lib/python3.7/dist-packages/xlrd/book.py in open_workbook_xls(filename, logfile, verbosity, use_mmap, file_contents, encoding_override, formatting_info, on_demand, ragged_rows)
89 t1 = time.clock()
90 bk.load_time_stage_1 = t1 - t0
---> 91 biff_version = bk.getbof(XL_WORKBOOK_GLOBALS)
92 if not biff_version:
93 raise XLRDError("Can't determine file's BIFF version")
/usr/local/lib/python3.7/dist-packages/xlrd/book.py in getbof(self, rqd_stream)
1269 bof_error('Expected BOF record; met end of file')
1270 if opcode not in bofcodes:
-> 1271 bof_error('Expected BOF record; found %r' % self.mem[savpos:savpos+8])
1272 length = self.get2bytes()
1273 if length == MY_EOF:
/usr/local/lib/python3.7/dist-packages/xlrd/book.py in bof_error(msg)
1263 if DEBUG: print("reqd: 0x%04x" % rqd_stream, file=self.logfile)
1264 def bof_error(msg):
-> 1265 raise XLRDError('Unsupported format, or corrupt file: ' + msg)
1266 savpos = self._position
1267 opcode = self.get2bytes()
XLRDError: Unsupported format, or corrupt file: Expected BOF record; found b'\n\n\n\n\n\n<!'

Since you're passing in a memory stream, you'll need to pass in an engine:
If io is not a buffer or path, engine must be set to identify io.
(the docs)
dannye_gener = pd.read_excel(io.BytesIO(data1), engine="openpyxl")
Since you do have a byte buffer (data1), though, you could just do
dannye_gener = pd.read_excel(data1)

read_excel error in Pandas ('ElementTree' object has no attribute 'getiterator')

I have tried absolutely everything. I am a total beginner. So would really appreciate help.
This is the code I am trying to run.
import pandas as pd
filepath = r'/Users/vignesh/Desktop/Python/test2.xlsx'
df = pd.read_excel(filepath)
print(df)
And this keeps throwing up this below error on Jupyterlabs. Does someone know how I can fix it? I have installed and updated xlrd, openpyxl. I watched some videos on YT to see how others are doing it. And they seem to be getting by effortlessly with these same lines of code.
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-3-fa10276656c9> in <module>
----> 1 pd.read_excel('test2.xlsx')
/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/pandas/util/_decorators.py in wrapper(*args, **kwargs)
294 )
295 warnings.warn(msg, FutureWarning, stacklevel=stacklevel)
--> 296 return func(*args, **kwargs)
297
298 return wrapper
/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/pandas/io/excel/_base.py in read_excel(io, sheet_name, header, names, index_col, usecols, squeeze, dtype, engine, converters, true_values, false_values, skiprows, nrows, na_values, keep_default_na, na_filter, verbose, parse_dates, date_parser, thousands, comment, skipfooter, convert_float, mangle_dupe_cols)
302
303 if not isinstance(io, ExcelFile):
--> 304 io = ExcelFile(io, engine=engine)
305 elif engine and engine != io.engine:
306 raise ValueError(
/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/pandas/io/excel/_base.py in __init__(self, path_or_buffer, engine)
865 self._io = stringify_path(path_or_buffer)
866
--> 867 self._reader = self._engines[engine](self._io)
868
869 def __fspath__(self):
/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/pandas/io/excel/_xlrd.py in __init__(self, filepath_or_buffer)
20 err_msg = "Install xlrd >= 1.0.0 for Excel support"
21 import_optional_dependency("xlrd", extra=err_msg)
---> 22 super().__init__(filepath_or_buffer)
23
24 #property
/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/pandas/io/excel/_base.py in __init__(self, filepath_or_buffer)
351 self.book = self.load_workbook(filepath_or_buffer)
352 elif isinstance(filepath_or_buffer, str):
--> 353 self.book = self.load_workbook(filepath_or_buffer)
354 elif isinstance(filepath_or_buffer, bytes):
355 self.book = self.load_workbook(BytesIO(filepath_or_buffer))
/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/pandas/io/excel/_xlrd.py in load_workbook(self, filepath_or_buffer)
35 return open_workbook(file_contents=data)
36 else:
---> 37 return open_workbook(filepath_or_buffer)
38
39 #property
/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/xlrd/__init__.py in open_workbook(filename, logfile, verbosity, use_mmap, file_contents, encoding_override, formatting_info, on_demand, ragged_rows)
128 if 'xl/workbook.xml' in component_names:
129 from . import xlsx
--> 130 bk = xlsx.open_workbook_2007_xml(
131 zf,
132 component_names,
/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/xlrd/xlsx.py in open_workbook_2007_xml(zf, component_names, logfile, verbosity, use_mmap, formatting_info, on_demand, ragged_rows)
810 del zflo
811 zflo = zf.open(component_names['xl/workbook.xml'])
--> 812 x12book.process_stream(zflo, 'Workbook')
813 del zflo
814 props_name = 'docprops/core.xml'
/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/xlrd/xlsx.py in process_stream(self, stream, heading)
264 self.tree = ET.parse(stream)
265 getmethod = self.tag2meth.get
--> 266 for elem in self.tree.iter() if Element_has_iter else self.tree.getiterator():
267 if self.verbosity >= 3:
268 self.dump_elem(elem)
AttributeError: 'ElementTree' object has no attribute 'getiterator'

You're using Python 3.9 with xldr, this error is due to the getiterator method being removed from xldr. (NB Not specifying the engine, defaults to using xldr as per the pandas documentation)
You can either use a version of Python < 3.9 or use a different engine to parse your excel file. Like this:
import pandas as pd
filepath = r'/Users/vignesh/Desktop/Python/test2.xlsx'
df = pd.read_excel(filepath, engine='openpyxl')
print(df)

AssertionError when calling pands read_excel on xls file using xlrd engine

I'm trying to read in an excel file (.xls) via pandas into a data frame as follows:
df = pd.read_excel(
filename, sheet_name='Sheet1', nrows=6)
Unfortunately I get an AssertionError. However if I open the file in excel and then click save, then rerun this works perfectly fine. I've not changed any data, just opened in Excel and save. Has anyone come across this issue before?
This is the assertion error I'm getting:
AssertionError Traceback (most recent call last)
<ipython-input-153-58dcba1b45c3> in <module>
1 df = pd.read_excel(
----> 2 filename, sheet_name='Sheet1', nrows=6)
~\Anaconda3\lib\site-packages\pandas\util\_decorators.py in wrapper(*args, **kwargs)
206 else:
207 kwargs[new_arg_name] = new_arg_value
--> 208 return func(*args, **kwargs)
209
210 return wrapper
~\Anaconda3\lib\site-packages\pandas\io\excel\_base.py in read_excel(io, sheet_name, header, names, index_col, usecols, squeeze, dtype, engine, converters, true_values, false_values, skiprows, nrows, na_values, keep_default_na, verbose, parse_dates, date_parser, thousands, comment, skip_footer, skipfooter, convert_float, mangle_dupe_cols, **kwds)
308
309 if not isinstance(io, ExcelFile):
--> 310 io = ExcelFile(io, engine=engine)
311 elif engine and engine != io.engine:
312 raise ValueError(
~\Anaconda3\lib\site-packages\pandas\io\excel\_base.py in __init__(self, io, engine)
817 self._io = _stringify_path(io)
818
--> 819 self._reader = self._engines[engine](self._io)
820
821 def __fspath__(self):
~\Anaconda3\lib\site-packages\pandas\io\excel\_xlrd.py in __init__(self, filepath_or_buffer)
19 err_msg = "Install xlrd >= 1.0.0 for Excel support"
20 import_optional_dependency("xlrd", extra=err_msg)
---> 21 super().__init__(filepath_or_buffer)
22
23 #property
~\Anaconda3\lib\site-packages\pandas\io\excel\_base.py in __init__(self, filepath_or_buffer)
357 self.book = self.load_workbook(filepath_or_buffer)
358 elif isinstance(filepath_or_buffer, str):
--> 359 self.book = self.load_workbook(filepath_or_buffer)
360 else:
361 raise ValueError(
~\Anaconda3\lib\site-packages\pandas\io\excel\_xlrd.py in load_workbook(self, filepath_or_buffer)
34 return open_workbook(file_contents=data)
35 else:
---> 36 return open_workbook(filepath_or_buffer)
37
38 #property
~\Anaconda3\lib\site-packages\xlrd\__init__.py in open_workbook(filename, logfile, verbosity, use_mmap, file_contents, encoding_override, formatting_info, on_demand, ragged_rows)
155 formatting_info=formatting_info,
156 on_demand=on_demand,
--> 157 ragged_rows=ragged_rows,
158 )
159 return bk
~\Anaconda3\lib\site-packages\xlrd\book.py in open_workbook_xls(filename, logfile, verbosity, use_mmap, file_contents, encoding_override, formatting_info, on_demand, ragged_rows)
118 bk._sheet_list = [None for sh in bk._sheet_names]
119 if not on_demand:
--> 120 bk.get_sheets()
121 bk.nsheets = len(bk._sheet_list)
122 if biff_version == 45 and bk.nsheets > 1:
~\Anaconda3\lib\site-packages\xlrd\book.py in get_sheets(self)
721 for sheetno in xrange(len(self._sheet_names)):
722 if DEBUG: print("GET_SHEETS: sheetno =", sheetno, self._sheet_names, self._sh_abs_posn, file=self.logfile)
--> 723 self.get_sheet(sheetno)
724
725 def fake_globals_get_sheet(self): # for BIFF 4.0 and earlier
~\Anaconda3\lib\site-packages\xlrd\book.py in get_sheet(self, sh_number, update_pos)
712 sh_number,
713 )
--> 714 sh.read(self)
715 self._sheet_list[sh_number] = sh
716 return sh
~\Anaconda3\lib\site-packages\xlrd\sheet.py in read(self, bk)
1107 saved_obj_id = None
1108 elif rc == XL_NOTE:
-> 1109 self.handle_note(data, txos)
1110 elif rc == XL_FEAT11:
1111 self.handle_feat11(data)
~\Anaconda3\lib\site-packages\xlrd\sheet.py in handle_note(self, data, txos)
1985 # string length).
1986 # Issue 4 on github: Google Spreadsheet doesn't write the undefined byte.
-> 1987 assert (data_len - endpos) in (0, 1)
1988 if OBJ_MSO_DEBUG:
1989 o.dump(self.logfile, header="=== Note ===", footer= " ")
AssertionError:

This often happens if you have a date column in your data that hasn't been set to 'Date' format. Set Date columns to 'Date' in excel and the error will disappear.

How to fix "XLRDError: ZIP file contents not a known type of workbook"

i have written this code and it says an error like this help me to solve this problem
i have already installed xlrd
and again using pip install xlrd
import numpy as np
import pandas as pd
import os
datas=pd.read_excel('transactions.xlsx')
datas
and error shows like this
XLRDError Traceback (most recent call last)
<ipython-input-61-f7361e459a83> in <module>
2 import pandas as pd
3 import os
----> 4 datas=pd.read_excel('transactions.xlsx')
5 datas
~\Anaconda3\lib\site-packages\pandas\util\_decorators.py in wrapper(*args, **kwargs)
186 else:
187 kwargs[new_arg_name] = new_arg_value
--> 188 return func(*args, **kwargs)
189 return wrapper
190 return _deprecate_kwarg
~\Anaconda3\lib\site-packages\pandas\util\_decorators.py in wrapper(*args, **kwargs)
186 else:
187 kwargs[new_arg_name] = new_arg_value
--> 188 return func(*args, **kwargs)
189 return wrapper
190 return _deprecate_kwarg
~\Anaconda3\lib\site-packages\pandas\io\excel.py in read_excel(io, sheet_name, header, names, index_col, parse_cols, usecols, squeeze, dtype, engine, converters, true_values, false_values, skiprows, nrows, na_values, keep_default_na, verbose, parse_dates, date_parser, thousands, comment, skip_footer, skipfooter, convert_float, mangle_dupe_cols, **kwds)
348
349 if not isinstance(io, ExcelFile):
--> 350 io = ExcelFile(io, engine=engine)
351
352 return io.parse(
~\Anaconda3\lib\site-packages\pandas\io\excel.py in __init__(self, io, engine)
651 self._io = _stringify_path(io)
652
--> 653 self._reader = self._engines[engine](self._io)
654
655 def __fspath__(self):
~\Anaconda3\lib\site-packages\pandas\io\excel.py in __init__(self, filepath_or_buffer)
422 self.book = xlrd.open_workbook(file_contents=data)
423 elif isinstance(filepath_or_buffer, compat.string_types):
--> 424 self.book = xlrd.open_workbook(filepath_or_buffer)
425 else:
426 raise ValueError('Must explicitly set engine if not passing in'
~\Anaconda3\lib\site-packages\xlrd\__init__.py in open_workbook(filename, logfile, verbosity, use_mmap, file_contents, encoding_override, formatting_info, on_demand, ragged_rows)
143 if 'content.xml' in component_names:
144 raise XLRDError('Openoffice.org ODS file; not supported')
--> 145 raise XLRDError('ZIP file contents not a known type of workbook')
146
147 from . import book
XLRDError: ZIP file contents not a known type of workbook

Answer for users facing the Same Error:
import numpy as np
import pandas as pd
import os
datas=pd.read_excel('transactions.xlsx')
datas
This should work, unless there is something wrong with the Excel file. Try with other files with the same Extension.

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Reading a messy excel file with python - python

Related

ValueError: Invalid file path or buffer object type

Unsupported format, or corrupt file: Expected BOF record; found b'\n\n\n\n\n\n<!'

read_excel error in Pandas ('ElementTree' object has no attribute 'getiterator')

AssertionError when calling pands read_excel on xls file using xlrd engine

How to fix "XLRDError: ZIP file contents not a known type of workbook"

Categories

Resources