I would like to load a txt file with genformtxt(). the txt file is already in c:.
stock=np.genfromtxt('c:\09012017.txt',delimiter=' ',dtype=str,skip_header=1)
C:\Anaconda3\lib\site-packages\numpy\lib\npyio.py in genfromtxt(fname, dtype, comments, delimiter, skip_header, skip_footer, converters, missing_values, filling_values, usecols, names, excludelist, deletechars, replace_space, autostrip, case_sensitive, defaultfmt, unpack, usemask, loose, invalid_raise, max_rows)
1549 fhd = iter(np.lib._datasource.open(fname, 'rbU'))
1550 else:
-> 1551 fhd = iter(np.lib._datasource.open(fname, 'rb'))
1552 own_fhd = True
1553 else:
C:\Anaconda3\lib\site-packages\numpy\lib\_datasource.py in open(path, mode, destpath)
149
150 ds = DataSource(destpath)
--> 151 return ds.open(path, mode)
152
153
C:\Anaconda3\lib\site-packages\numpy\lib\_datasource.py in open(self, path, mode)
492
493 # NOTE: _findfile will fail on a new file opened for writing.
--> 494 found = self._findfile(path)
495 if found:
496 _fname, ext = self._splitzipext(found)
C:\Anaconda3\lib\site-packages\numpy\lib\_datasource.py in _findfile(self, path)
335
336 for name in filelist:
--> 337 if self.exists(name):
338 if self._isurl(name):
339 name = self._cache(name)
C:\Anaconda3\lib\site-packages\numpy\lib\_datasource.py in exists(self, path)
440
441 # Test local path
--> 442 if os.path.exists(path):
443 return True
444
C:\Anaconda3\lib\genericpath.py in exists(path)
17 """Test whether a path exists. Returns False for broken symbolic links"""
18 try:
---> 19 os.stat(path)
20 except OSError:
21 return False
ValueError: stat: embedded null character in path
it looks like sth wrong in path . I am 100% sure the txt is under disc c: path.
pls give some help. Thanks
You either have to use a forward slash, or a double backslash
stock=np.genfromtxt('c:/09012017.txt',delimiter=' ',dtype=str,skip_header=1)
or
stock=np.genfromtxt('c:\\09012017.txt',delimiter=' ',dtype=str,skip_header=1)
If you use just one backslash it will be seen as an escape command, which is not the thing you want to do there.
If you just call the filename and not the absolute path, the file has to be located in the working directory of the python process; this is the working directory of the shell, which started you python process.
Related
I am trying to read an excel file from SharePoint to python and my first error message was that I should define an engine manually, so I defined the engine = 'openpyxl' and now the following error message comes up: File is not a ZIP File
From the previous Q&As it was often talked about if the Excel - file is a real Excel file or some text file with a fake xlsx extension.
The excel file was created using Microsoft Excel and its stored in a shared OneDrive folder (Team - Sharepoint). Does it affect the error message?
How can i solve this?
Many thanks in advance!
My Code:
from office365.runtime.auth.authentication_context import AuthenticationContext
from office365.sharepoint.client_context import ClientContext
from office365.sharepoint.files.file import File
url_sp = 'https://company.sharepoint.com/teams/TeamE'
username_sp = 'MyUsername'
password_sp = 'MyPassword'
folder_url_sp = '/Shared%20Documents/02%20Team%20IAP/06_Da-An/Data/E/Edate.xlsx'
#Authentication
ctx_auth = AuthenticationContext(url_sp)
if ctx_auth.acquire_token_for_user(username_sp, password_sp):
ctx = ClientContext(url_sp, ctx_auth)
web = ctx.web
ctx.load(web)
ctx.execute_query()
print('Authentication sucessfull')
else:
print(ctx_auth.get_last_error())
import io
response = File.open_binary(ctx,folder_url_sp)
bytes_file_obj = io.BytesIO()
bytes_file_obj.write(response.content)
bytes_file_obj.seek(0)
data = pd.read_excel(bytes_file_obj,sheet_name = None, engine = 'openpyxl')
The Error:
BadZipFile Traceback (most recent call last)
Cell In[29], line 32
29 bytes_file_obj.write(response.content)
30 bytes_file_obj.seek(0)
---> 32 data = pd.read_excel(bytes_file_obj, sheet_name= None, engine = 'openpyxl')
File ~\Anaconda3\lib\site-packages\pandas\util\_decorators.py:311, in deprecate_nonkeyword_arguments.<locals>.decorate.<locals>.wrapper(*args, **kwargs)
305 if len(args) > num_allow_args:
306 warnings.warn(
307 msg.format(arguments=arguments),
308 FutureWarning,
309 stacklevel=stacklevel,
310 )
--> 311 return func(*args, **kwargs)
File ~\Anaconda3\lib\site-packages\pandas\io\excel\_base.py:457, in read_excel(io, sheet_name, header, names, index_col, usecols, squeeze, dtype, engine, converters, true_values, false_values, skiprows, nrows, na_values, keep_default_na, na_filter, verbose, parse_dates, date_parser, thousands, decimal, comment, skipfooter, convert_float, mangle_dupe_cols, storage_options)
455 if not isinstance(io, ExcelFile):
456 should_close = True
--> 457 io = ExcelFile(io, storage_options=storage_options, engine=engine)
458 elif engine and engine != io.engine:
459 raise ValueError(
460 "Engine should not be specified when passing "
461 "an ExcelFile - ExcelFile already has the engine set"
462 )
File ~\Anaconda3\lib\site-packages\pandas\io\excel\_base.py:1419, in ExcelFile.__init__(self, path_or_buffer, engine, storage_options)
1416 self.engine = engine
1417 self.storage_options = storage_options
-> 1419 self._reader = self._engines[engine](self._io, storage_options=storage_options)
File ~\Anaconda3\lib\site-packages\pandas\io\excel\_openpyxl.py:525, in OpenpyxlReader.__init__(self, filepath_or_buffer, storage_options)
514 """
515 Reader using openpyxl engine.
516
(...)
522 passed to fsspec for appropriate URLs (see ``_get_filepath_or_buffer``)
523 """
524 import_optional_dependency("openpyxl")
--> 525 super().__init__(filepath_or_buffer, storage_options=storage_options)
File ~\Anaconda3\lib\site-packages\pandas\io\excel\_base.py:518, in BaseExcelReader.__init__(self, filepath_or_buffer, storage_options)
516 self.handles.handle.seek(0)
517 try:
--> 518 self.book = self.load_workbook(self.handles.handle)
519 except Exception:
520 self.close()
File ~\Anaconda3\lib\site-packages\pandas\io\excel\_openpyxl.py:536, in OpenpyxlReader.load_workbook(self, filepath_or_buffer)
533 def load_workbook(self, filepath_or_buffer: FilePath | ReadBuffer[bytes]):
534 from openpyxl import load_workbook
--> 536 return load_workbook(
537 filepath_or_buffer, read_only=True, data_only=True, keep_links=False
538 )
File ~\Anaconda3\lib\site-packages\openpyxl\reader\excel.py:315, in load_workbook(filename, read_only, keep_vba, data_only, keep_links)
288 def load_workbook(filename, read_only=False, keep_vba=KEEP_VBA,
289 data_only=False, keep_links=True):
290 """Open the given filename and return the workbook
291
292 :param filename: the path to open or a file-like object
(...)
313
314 """
--> 315 reader = ExcelReader(filename, read_only, keep_vba,
316 data_only, keep_links)
317 reader.read()
318 return reader.wb
File ~\Anaconda3\lib\site-packages\openpyxl\reader\excel.py:124, in ExcelReader.__init__(self, fn, read_only, keep_vba, data_only, keep_links)
122 def __init__(self, fn, read_only=False, keep_vba=KEEP_VBA,
123 data_only=False, keep_links=True):
--> 124 self.archive = _validate_archive(fn)
125 self.valid_files = self.archive.namelist()
126 self.read_only = read_only
File ~\Anaconda3\lib\site-packages\openpyxl\reader\excel.py:96, in _validate_archive(filename)
89 msg = ('openpyxl does not support %s file format, '
90 'please check you can open '
91 'it with Excel first. '
92 'Supported formats are: %s') % (file_format,
93 ','.join(SUPPORTED_FORMATS))
94 raise InvalidFileException(msg)
---> 96 archive = ZipFile(filename, 'r')
97 return archive
File ~\Anaconda3\lib\zipfile.py:1266, in ZipFile.__init__(self, file, mode, compression, allowZip64, compresslevel, strict_timestamps)
1264 try:
1265 if mode == 'r':
-> 1266 self._RealGetContents()
1267 elif mode in ('w', 'x'):
1268 # set the modified flag so central directory gets written
1269 # even if no files are added to the archive
1270 self._didModify = True
File ~\Anaconda3\lib\zipfile.py:1333, in ZipFile._RealGetContents(self)
1331 raise BadZipFile("File is not a zip file")
1332 if not endrec:
-> 1333 raise BadZipFile("File is not a zip file")
1334 if self.debug > 1:
1335 print(endrec)
BadZipFile: File is not a zip file
```
1.I was trying to write a python code to get all contents of files in each subfolder and create a index for each content (file contents). All the contents for each file can be get successfully. However, when I run the code, it always shows an error message Exception: This file is already closed.
2.Here is the code for building an index for each content, could someone explain to me why this thing could happened?
The traceback:
python-input-49-38a47b2f8c0c> in <module>
39 print(searcher)
40
---> 41 writers.commit(optimize=True)
42
43 # from whoosh.query import *
~/.local/lib/python3.8/site-packages/whoosh/writing.py in commit(self, mergetype, optimize, merge)
928 else:
929 # Close segment files
--> 930 self._close_segment()
931 # Write TOC
932 self._commit_toc(finalsegments)
~/.local/lib/python3.8/site-packages/whoosh/writing.py in _close_segment(self)
841 def _close_segment(self):
842 if not self.perdocwriter.is_closed:
--> 843 self.perdocwriter.close()
844 if not self.fieldwriter.is_closed:
845 self.fieldwriter.close()
~/.local/lib/python3.8/site-packages/whoosh/codec/whoosh3.py in close(self)
265 for writer in self._colwriters.values():
266 writer.finish(self._doccount)
--> 267 self._cols.save_as_files(self._storage, self._column_filename)
268
269 # If vectors were written, close the vector writers
~/.local/lib/python3.8/site-packages/whoosh/filedb/compound.py in save_as_files(self, storage, name_fn)
295
296 def save_as_files(self, storage, name_fn):
--> 297 for name, blocks in self._readback():
298 f = storage.create_file(name_fn(name))
299 for block in blocks():
~/.local/lib/python3.8/site-packages/whoosh/filedb/compound.py in _readback(self)
276
277 yield (name, gen)
--> 278 temp.close()
279 self._tempstorage.delete_file(self._tempname)
280
~/.local/lib/python3.8/site-packages/whoosh/filedb/structfile.py in close(self)
121
122 if self.is_closed:
--> 123 raise Exception("This file is already closed")
124 if self.onclose:
125 self.onclose(self)
Exception: This file is already closed
import os
import codecs
import whoosh
from whoosh.index import create_in
from whoosh.fields import *
from whoosh.qparser import QueryParser
schema = Schema(title=TEXT(stored=True), path=ID(stored=True), content=TEXT,textdata=TEXT(stored=True))
ix = create_in("folder", schema)
filelist = []
for root, dirs, files in os.walk("./test_result"):
for file in files:
#append the file name to the list
filelist.append(os.path.join(root,file))
#print all the file names
writer = ix.writer()
i = 0
for name in filelist:
i = i +1
with codecs.open (name, "r",encoding='utf-8',
errors='ignore') as myfile:
text=myfile.read()
# print ("adding document "+name)
writer.add_document(title="document "+name, path="folder",content=text,textdata=text)
myfile.close()
print(text)
searcher = ix.searcher()
print(searcher)
writers.commit(optimize=True)
with statement handles resources management, including file closing. You could read more about it here.
This code:
f = open(file)
f.write("blablabla")
f.close
is equivalent to this:
with open(file) as f
f.write("blablabla")
This exception is a result of you trying to close a file that is already closed implicitly by with statement.
You only need to delete this line:
myfile.close()
EDIT:
I just explained the error in the code, but didn't notice the update in the comments. Please update the question itself with the mentioned line deleted.
On a side note, I see you used writers.commit() instead of writer.commit(), please make sure it's not a typo and update your question if your code still doesn't work.
I'm trying to load a text file as an array in python by entering this code:
from numpy import loadtxt
values = loadtxt("values.txt", float)
mean = sum(values)/len(values)
print(mean)
but when I run the program I get:
OSError Traceback (most recent call last)
<ipython-input-10-4b9a39f8b17f> in <module>
1 from numpy import loadtxt
----> 2 values = loadtxt("values.txt", float)
3 mean = sum(values)/len(values)
4 print(mean)
~\Anaconda3\lib\site-packages\numpy\lib\npyio.py in loadtxt(fname, dtype, comments, delimiter, converters, skiprows, usecols, unpack, ndmin, encoding, max_rows)
960 fname = os_fspath(fname)
961 if _is_string_like(fname):
--> 962 fh = np.lib._datasource.open(fname, 'rt', encoding=encoding)
963 fencoding = getattr(fh, 'encoding', 'latin1')
964 fh = iter(fh)
~\Anaconda3\lib\site-packages\numpy\lib\_datasource.py in open(path, mode, destpath, encoding, newline)
264
265 ds = DataSource(destpath)
--> 266 return ds.open(path, mode, encoding=encoding, newline=newline)
267
268
~\Anaconda3\lib\site-packages\numpy\lib\_datasource.py in open(self, path, mode, encoding, newline)
622 encoding=encoding, newline=newline)
623 else:
--> 624 raise IOError("%s not found." % path)
625
626
OSError: values.txt not found.
I have the values.txt file saved in my documents folder. Do I need to save it in some specific folder so Python can find it?
You can either use the absolute path, or use loadtxt("values.txt", float) but then your file should be in the same folder with your script/jupyter.
I tried to install the Python package music21 and am having a problem running it in Windows. Basically, when I tried to run the simple command they give as an example
converter.parse("tinynotation: 3/4 c4 d8 f g16 a g f#").show()
I got an error
SubConverterException: Cannot find a path to the 'mscore' file at C:\Program Files (x86)\MuseScore 2\MuseScore.exe -- download MuseScore
The reason for this is because Musescore.exe is no longer stored in the folder "MuseScore 2" but now in a subfolder called "bin". So the path needs to be set to be "C:\Program Files (x86)\MuseScore 2\bin\MuseScore.exe" in order to access Musescore.
How do I change this?
Full Error
SubConverterException Traceback (most recent call last)
<ipython-input-8-46c66c71749d> in <module>()
----> 1 converter.parse("tinynotation: 3/4 c4 d8 f g16 a g f#").show()
C:\Users\MrNoName\Anaconda3\lib\site-packages\music21\stream\__init__.py in show(self, *args, **kwargs)
255 if self.isSorted is False and self.autoSort:
256 self.sort()
--> 257 return super(Stream, self).show(*args, **kwargs)
258
259 #---------------------------------------------------------------------------
C:\Users\MrNoName\Anaconda3\lib\site-packages\music21\base.py in show(self, fmt, app, **keywords)
2586 app=app,
2587 subformats=subformats,
-> 2588 **keywords)
2589
2590 #--------------------------------------------------------------------------
C:\Users\MrNoName\Anaconda3\lib\site-packages\music21\converter\subConverters.py in show(self, obj, fmt, app, subformats, **keywords)
312
313 if 'Opus' not in obj.classes:
--> 314 fp = helperSubConverter.write(obj, helperFormat, subformats=helperSubformats)
315
316 defaults.title = savedDefaultTitle
C:\Users\MrNoName\Anaconda3\lib\site-packages\music21\converter\subConverters.py in write(self, obj, fmt, fp, subformats, **keywords)
808
809 if subformats is not None and 'png' in subformats:
--> 810 fp = self.runThroughMusescore(fp, **keywords)
811 return fp
812
C:\Users\MrNoName\Anaconda3\lib\site-packages\music21\converter\subConverters.py in runThroughMusescore(self, fp, **keywords)
756 raise SubConverterException(
757 "Cannot find a path to the 'mscore' file at " +
--> 758 "%s -- download MuseScore" % musescorePath)
759
760 fpOut = fp[0:len(fp) - 3]
SubConverterException: Cannot find a path to the 'mscore' file at C:\Program Files (x86)\MuseScore 2\MuseScore.exe -- download MuseScore
Do this right after importing music21:
environment.set('musescoreDirectPNGPath', 'C:\\Program Files (x86)\\MuseScore 2\\bin\\MuseScore.exe')
For MuseScore 3
us = environment.UserSettings()
us['musicxmlPath'] = 'C:\\Program Files\\MuseScore 3\\bin\\MuseScore3.exe'
us['musescoreDirectPNGPath'] = 'C:\\Program Files\\MuseScore 3\\bin\\MuseScore3.exe'
us['musicxmlPath']
And if it still does not work, try opening the environment.py with sublime or else in
C:\Users\YOU\AppData\Local\Programs\Python\Python39\Lib\site-packages\music21\environment.py
then change
'%PROGRAMFILES%\MuseScore 3\MuseScore.exe'
for
'%PROGRAMFILES%\MuseScore 3\bin\MuseScore.exe'
I have been looking for solutions to this problem on stack exchange and beyond, and so far I'm not able to find a one.
I'm sure someone has encountered this problem before: I'm writing a python script which will extract and repurpose some data from an excel file - the catch being that the excel file is rife with irregular formatting and extraneous data. So, before I can get down to the table of data I need:
I have to go through tables like this:
My plan is to use some kind of regex or string recognition to know where to split the file so I can get to what I need. But the problem I'm having right now is that pandas freaks out whenever I try to run read_excel on this file.
In [4]: df = pd.read_excel(open('data.xlsx','rb'), sheetname=0)
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-4-21f5fee2b08d> in <module>()
----> 1 df = pd.read_excel(open('data.xlsx','rb'), sheetname=0)
/Users/Gus/anaconda2/lib/python2.7/site-packages/pandas/io/excel.pyc in read_excel(io, sheetname, header, skiprows, skip_footer, index_col, names, parse_cols, parse_dates, date_parser, na_values, thousands, convert_float, has_index_names, converters, engine, squeeze, **kwds)
168 """
169 if not isinstance(io, ExcelFile):
--> 170 io = ExcelFile(io, engine=engine)
171
172 return io._parse_excel(
/Users/Gus/anaconda2/lib/python2.7/site-packages/pandas/io/excel.pyc in __init__(self, io, **kwds)
223 # N.B. xlrd.Book has a read attribute too
224 data = io.read()
--> 225 self.book = xlrd.open_workbook(file_contents=data)
226 elif isinstance(io, compat.string_types):
227 self.book = xlrd.open_workbook(io)
/Users/Gus/anaconda2/lib/python2.7/site-packages/xlrd/__init__.pyc in open_workbook(filename, logfile, verbosity, use_mmap, file_contents, encoding_override, formatting_info, on_demand, ragged_rows)
420 formatting_info=formatting_info,
421 on_demand=on_demand,
--> 422 ragged_rows=ragged_rows,
423 )
424 return bk
/Users/Gus/anaconda2/lib/python2.7/site-packages/xlrd/xlsx.pyc in open_workbook_2007_xml(zf, component_names, logfile, verbosity, use_mmap, formatting_info, on_demand, ragged_rows)
831 x12sheet = X12Sheet(sheet, logfile, verbosity)
832 heading = "Sheet %r (sheetx=%d) from %r" % (sheet.name, sheetx, fname)
--> 833 x12sheet.process_stream(zflo, heading)
834 del zflo
835
/Users/Gus/anaconda2/lib/python2.7/site-packages/xlrd/xlsx.pyc in own_process_stream(self, stream, heading)
551 self.do_dimension(elem)
552 elif elem.tag == U_SSML12 + "mergeCell":
--> 553 self.do_merge_cell(elem)
554 self.finish_off()
555
/Users/Gus/anaconda2/lib/python2.7/site-packages/xlrd/xlsx.pyc in do_merge_cell(self, elem)
607 ref = elem.get('ref')
608 if ref:
--> 609 first_cell_ref, last_cell_ref = ref.split(':')
610 first_rowx, first_colx = cell_name_to_rowx_colx(first_cell_ref)
611 last_rowx, last_colx = cell_name_to_rowx_colx(last_cell_ref)
ValueError: need more than 1 value to unpack
The whole point of my writing this program is so that I don't have to go in to every single one of these files and delete information by hand. But how can I automate this process if python won't even accept the file? I'm hoping someone here will have encountered a similar problem before. What was your solution?