Error when opening some gdb files with fiona and geopandas - python

I am trying to open NYC LION Geodatabase files for 2010, 2011, and 2012.
I successfully opened the 2012 and 2011 geodatabases with geopandas, but I was unable to open the 2010 version.
I've tried using fiona directly, but I kept getting a similar error.
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
import os
import sys
import requests
from zipfile import ZipFile as zzip
import fiona
sys.path.append(os.path.realpath('..'))
path = r"https://www1.nyc.gov/assets/planning/download/zip/data-maps/open-data/nyc_lion10aav.zip"
r = requests.get(path)
# open method to open a file on your system and write the contents
with open("../input_data/nyc_lion10aav.zip", "wb") as file:
file.write(r.content)
# opening the zip file in READ mode
with zzip("../input_data/nyc_lion10aav.zip", 'r') as file:
# printing all the contents of the zip file
#file.printdir()
path = "../input_data/nyc_lion10aav"
os.mkdir(path)
# extracting all the files
#rint('Extracting all the files now...')
file.extractall(path)
print('Done!')
fp = r"../input_data/nyc_lion10aav/lion/lion.gdb"
lion_gdf = gpd.read_file(fp, driver='OpenFileGDB', layer='lion')
fp = r"../input_data/nyc_lion10aav/lion/lion.gdb"
file = fiona.open(fp, driver='OpenFileGDB', layer='lion')
Notebook
I expected it to go through like the geodatabases from 2011 and 2012 when I ran it in the notebook. I've been searching here and on fiona's github issues to see if others have a similar problem and if there was a solution. But I am fairly new to using these libraries so I don't really understand the traceback in order to figure out what went wrong.
---------------------------------------------------------------------------
CPLE_OpenFailedError Traceback (most recent call last)
fiona/_shim.pyx in fiona._shim.gdal_open_vector()
fiona/_err.pyx in fiona._err.exc_wrap_pointer()
CPLE_OpenFailedError: ../input_data/nyc_lion10aav/lion/lion.gdb: Permission denied
During handling of the above exception, another exception occurred:
DriverError Traceback (most recent call last)
<ipython-input-14-f49f8c92c671> in <module>
1 fp = r"../input_data/nyc_lion10aav/lion/lion.gdb"
----> 2 lion_gdf = gpd.read_file(fp, driver='OpenFileGDB', layer='lion')
~\AppData\Local\Continuum\anaconda3\envs\geo\lib\site-packages\geopandas\io\file.py in read_file(filename, bbox, **kwargs)
75
76 with fiona_env():
---> 77 with reader(path_or_bytes, **kwargs) as features:
78
79 # In a future Fiona release the crs attribute of features will
~\AppData\Local\Continuum\anaconda3\envs\geo\lib\site-packages\fiona\env.py in wrapper(*args, **kwargs)
394 def wrapper(*args, **kwargs):
395 if local._env:
--> 396 return f(*args, **kwargs)
397 else:
398 if isinstance(args[0], str):
~\AppData\Local\Continuum\anaconda3\envs\geo\lib\site-packages\fiona\__init__.py in open(fp, mode, driver, schema, crs, encoding, layer, vfs, enabled_drivers, crs_wkt, **kwargs)
251 if mode in ('a', 'r'):
252 c = Collection(path, mode, driver=driver, encoding=encoding,
--> 253 layer=layer, enabled_drivers=enabled_drivers, **kwargs)
254 elif mode == 'w':
255 if schema:
~\AppData\Local\Continuum\anaconda3\envs\geo\lib\site-packages\fiona\collection.py in __init__(self, path, mode, driver, schema, crs, encoding, layer, vsi, archive, enabled_drivers, crs_wkt, ignore_fields, ignore_geometry, **kwargs)
157 if self.mode == 'r':
158 self.session = Session()
--> 159 self.session.start(self, **kwargs)
160 elif self.mode in ('a', 'w'):
161 self.session = WritingSession()
fiona/ogrext.pyx in fiona.ogrext.Session.start()
fiona/_shim.pyx in fiona._shim.gdal_open_vector()
DriverError: ../input_data/nyc_lion10aav/lion/lion.gdb: Permission denied

Related

Python lzma unable to load joblib

I have a scikit learn pipeline that I serialize using:
with lzma.open('outputs/baseModel_LR.joblib',"wb") as f:
dill.dump(pipeline, f)
When I try to open the file and load the pipeline using:
with lzma.open('outputs/baseModel_LR.joblib',"rb") as f:
model = dill.load(f)
it gives error:
---------------------------------------------------------------------------
EOFError Traceback (most recent call last)
somePath/notebooks/test.ipynb Cell 5 in <cell line: 1>()
1 with lzma.open('outputs/baseModel_LR.joblib',"rb") as f:
----> 2 model = dill.load(f)
3 model
File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/dill/_dill.py:373, in load(file, ignore, **kwds)
367 def load(file, ignore=None, **kwds):
368 """
369 Unpickle an object from a file.
370
371 See :func:`loads` for keyword arguments.
372 """
--> 373 return Unpickler(file, ignore=ignore, **kwds).load()
File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/dill/_dill.py:646, in Unpickler.load(self)
645 def load(self): #NOTE: if settings change, need to update attributes
--> 646 obj = StockUnpickler.load(self)
647 if type(obj).__module__ == getattr(_main_module, '__name__', '__main__'):
648 if not self._ignore:
649 # point obj class to main
File /anaconda/envs/azureml_py38/lib/python3.8/lzma.py:200, in LZMAFile.read(self, size)
194 """Read up to size uncompressed bytes from the file.
...
100 "end-of-stream marker was reached")
101 else:
102 rawblock = b""
**EOFError: Compressed file ended before the end-of-stream marker was reached**
Has anyone faced this problem and solved it? I use lzma because otherwise the joblib size is 27GB and with lzma its just 20MB

Errno 2 No such file or directory:

I am using jupyter notebook (python 3.8 both from anaconda3) and following this post, cells 84 and 85 are resulting in the traceback and followed the advice of
FileNotFoundError Traceback (most recent call last)
<ipython-input-15-9cdebd0bb247> in <module>
2
3
----> 4 create_wordcloud(tw_list["text"].values)
<ipython-input-14-524a73dcd1e0> in create_wordcloud(text)
2
3 def create_wordcloud(text):
----> 4 mask = np.array(Image.open("cloud.png"))
5 stopwords = set(STOPWORDS)
6 wc = WordCloud(background_color="white",
~/opt/anaconda3/lib/python3.8/site-packages/PIL/Image.py in open(fp, mode, formats)
2889
2890 if filename:
-> 2891 fp = builtins.open(filename, "rb")
2892 exclusive_fp = True
2893
FileNotFoundError: [Errno 2] No such file or directory: 'cloud.png'
following this i found advice (the link evades me but its somewhere on this site to change from PIL import image to import PIL.image in cell 2 and add
from IPython.display import Image
Image(filename='cloud.png')
still resulting in a similar, but longer traceback
---------------------------------------------------------------------------
FileNotFoundError Traceback (most recent call last)
<ipython-input-16-8c5d56ae9874> in <module>
1 #Creating wordcloud for all tweets
2 from IPython.display import Image
----> 3 Image(filename='cloud.png')
4
5 create_wordcloud(tw_list["text"].values)
~/opt/anaconda3/lib/python3.8/site-packages/IPython/core/display.py in
__init__(self, data, url, filename, format, embed, width, height, retina,
unconfined, metadata)
1222 self.retina = retina
1223 self.unconfined = unconfined
-> 1224 super(Image, self).__init__(data=data, url=url, filename=filename,
1225 metadata=metadata)
1226
~/opt/anaconda3/lib/python3.8/site-packages/IPython/core/display.py in
__init__(self, data, url, filename, metadata)
628 self.metadata = {}
629
--> 630 self.reload()
631 self._check_data()
632
~/opt/anaconda3/lib/python3.8/site-packages/IPython/core/display.py in
reload(self)
1254 """Reload the raw data from file or URL."""
1255 if self.embed:
-> 1256 super(Image,self).reload()
1257 if self.retina:
1258 self._retina_shape()
~/opt/anaconda3/lib/python3.8/site-packages/IPython/core/display.py in
reload(self)
653 """Reload the raw data from file or URL."""
654 if self.filename is not None:
--> 655 with open(self.filename, self._read_flags) as f:
656 self.data = f.read()
657 elif self.url is not None:
FileNotFoundError: [Errno 2] No such file or directory: 'cloud.png'
which evidently is not the right solution, I am a little out of my depth here and grateful for any help
That means the file does not exist in the directory it is called. You must download their 'cloud.png' and put it in the same file as the jupyter notebook file.
https://github.com/ChilesheChanda/TwitterSentimentAnalysis/blob/master/cloud.png

Read shapefile from HDFS with geopandas

I have a shapefile on my HDFS and I would like to import it in my Jupyter Notebook with geopandas (version 0.8.1).
I tried the standard read_file() method but it does not recognize the HDFS directory; instead I believe it searches in my local directory, as I made a test with the local directory and reads the shapefile correctly.
This is the code I used:
import geopandas as gpd
shp = gpd.read_file('hdfs://hdfsha/my_hdfs_directory/my_shapefile.shp')
and the error I obtained:
---------------------------------------------------------------------------
CPLE_OpenFailedError Traceback (most recent call last)
fiona/_shim.pyx in fiona._shim.gdal_open_vector()
fiona/_err.pyx in fiona._err.exc_wrap_pointer()
CPLE_OpenFailedError: hdfs://hdfsha/my_hdfs_directory/my_shapefile.shp: No such file or directory
During handling of the above exception, another exception occurred:
DriverError Traceback (most recent call last)
<ipython-input-17-3118e740e4a9> in <module>
----> 2 shp = gpd.read_file('hdfs://hdfsha/my_hdfs_directory/my_shapefile.shp' class="ansi-blue-fg">)
3 print(shp.shape)
4 shp.head(3)
/opt/venv/geocoding/lib/python3.6/site-packages/geopandas/io/file.py in _read_file(filename, bbox, mask, rows, **kwargs)
94
95 with fiona_env():
---> 96 with reader(path_or_bytes, **kwargs) as features:
97
98 # In a future Fiona release the crs attribute of features will
/opt/venv/geocoding/lib/python3.6/site-packages/fiona/env.py in wrapper(*args, **kwargs)
398 def wrapper(*args, **kwargs):
399 if local._env:
--> 400 return f(*args, **kwargs)
401 else:
402 if isinstance(args[0], str):
/opt/venv/geocoding/lib/python3.6/site-packages/fiona/__init__.py in open(fp, mode, driver, schema, crs, encoding, layer, vfs, enabled_drivers, crs_wkt, **kwargs)
255 if mode in ('a', 'r'):
256 c = Collection(path, mode, driver=driver, encoding=encoding,
--> 257 layer=layer, enabled_drivers=enabled_drivers, **kwargs)
258 elif mode == 'w':
259 if schema:
/opt/venv/geocoding/lib/python3.6/site-packages/fiona/collection.py in __init__(self, path, mode, driver, schema, crs, encoding, layer, vsi, archive, enabled_drivers, crs_wkt, ignore_fields, ignore_geometry, **kwargs)
160 if self.mode == 'r':
161 self.session = Session()
--> 162 self.session.start(self, **kwargs)
163 elif self.mode in ('a', 'w'):
164 self.session = WritingSession()
fiona/ogrext.pyx in fiona.ogrext.Session.start()
fiona/_shim.pyx in fiona._shim.gdal_open_vector()
DriverError: hdfs://hdfsha/my_hdfs_directory/my_shapefile.shp: No such file or directory
So, I was wondering whether it is actually possible to read a shapefile, stored in HDFS, with geopandas. If yes, how?
If someone is still looking for an answer to this question, I managed to find a workaround.
First of all, you need a .zip file which contains all the data related to your shapefile (.shp, .shx, .dbf, ...). Then, we use pyarrow to establish a connection to HDFS and fiona to read the zipped shapefile.
Package versions I'm using:
pyarrow==2.0.0
fiona==1.8.18
The code:
# import packages
import pandas as pd
import geopandas as gpd
import fiona
import pyarrow
# establish a connection to HDFS
fs = pyarrow.hdfs.connect()
# read zipped shapefile
with fiona.io.ZipMemoryFile(fs.open('hdfs://my_hdfs_directory/my_zipped_shapefile.zip')) as z:
with z.open('my_shp_file_within_zip.shp') as collection:
gdf = gpd.GeoDataFrame.from_features(collection)
print(gdf.shape)

QST: error while using pickle to load the files

I am getting the below error while using pickle to load the files on kaggle.
It has worked for everyone, but it is not working for me. The file path is correct.
Thank you for your help.
My code:
%%time
import pickle
#using one of the validation sets composed by tito
cv2_train = pd.read_pickle("../input/riiid-cross-validation-files/cv2_train.pickle")['row_id']
cv2_valid = pd.read_pickle("../input/riiid-cross-validation-files/cv2_valid.pickle")['row_id']
Error:
---------------------------------------------------------------------------
FileNotFoundError Traceback (most recent call last)
<timed exec> in <module>
/opt/conda/lib/python3.7/site-packages/pandas/io/pickle.py in read_pickle(filepath_or_buffer, compression)
167 if not isinstance(fp_or_buf, str) and compression == "infer":
168 compression = None
--> 169 f, fh = get_handle(fp_or_buf, "rb", compression=compression, is_text=False)
170
171 # 1) try standard library Pickle
/opt/conda/lib/python3.7/site-packages/pandas/io/common.py in get_handle(path_or_buf, mode, encoding, compression, memory_map, is_text, errors)
497 else:
498 # Binary mode
--> 499 f = open(path_or_buf, mode)
500 handles.append(f)
501
FileNotFoundError: [Errno 2] No such file or directory: '../input/riiid-cross-validation-files/cv2_train.pickle'

Errno 13 - Permission denied from Jupyter Notebook on Windows 10

I am trying to import downloaded MNIST data to Jupyter Notebook, but when I try to run the code, it says that it doesn't have permissions.
How do I solve the problem?
pip install python-mnist
from mnist import MNIST
mndata = MNIST('C:\\Users\\username\\path\\to\\the\\samples')
images, labels = mndata.load_training()
---------------------------------------------------------------------------
PermissionError Traceback (most recent call last)
<ipython-input-2-1df33381d649> in <module>
3 mndata = MNIST('C:\\Users\\username\\path\\to\\the\\samples')
4
----> 5 images, labels = mndata.load_training()
D:\Anaconda\lib\site-packages\mnist\loader.py in load_training(self)
124 def load_training(self):
125 ims, labels = self.load(os.path.join(self.path, self.train_img_fname),
--> 126 os.path.join(self.path, self.train_lbl_fname))
127
128 self.train_images = self.process_images(ims)
D:\Anaconda\lib\site-packages\mnist\loader.py in load(self, path_img, path_lbl, batch)
245 '(start_point, batch_size)')
246
--> 247 with self.opener(path_lbl, 'rb') as file:
248 magic, size = struct.unpack(">II", file.read(8))
249 if magic != 2049:
D:\Anaconda\lib\site-packages\mnist\loader.py in opener(self, path_fn, *args, **kwargs)
237 return gzip.open(path_fn + '.gz', *args, **kwargs)
238 else:
--> 239 return open(path_fn, *args, **kwargs)
240
241 def load(self, path_img, path_lbl, batch=None):
PermissionError: [Errno 13] Permission denied: 'C:\\Users\\username\\path\\to\\the\\samples\\train-labels-idx1-ubyte'

Categories