Error while saving Optuna study to Google Drive from Colab - python

I can save a random file to my drive colab as:
with open ("gdrive/My Drive/chapter_classification/output/hello.txt",'w')as f:
f.write('hello')
works fine but when I use the Official documentation approach of Optuna using the code:
direction = 'minimize'
name = 'opt1'
study = optuna.create_study(sampler=optuna.samplers.TPESampler(),direction=direction,study_name=name, storage=f"gdrive/My Drive/chapter_classification/output/sqlite:///{name}.db",load_if_exists=True)
study.optimize(tune, n_trials=1000)
throws an error as:
ArgumentError Traceback (most recent call last)
<ipython-input-177-f32da2c0f69a> in <module>()
2 direction = 'minimize'
3 name = 'opt1'
----> 4 study = optuna.create_study(sampler=optuna.samplers.TPESampler(),direction=direction,study_name=name, storage="gdrive/My Drive/chapter_classification/output/sqlite:///opt1.db",load_if_exists=True)
5 study.optimize(tune, n_trials=1000)
6 frames
/usr/local/lib/python3.7/dist-packages/optuna/study/study.py in create_study(storage, sampler, pruner, study_name, direction, load_if_exists, directions)
1134 ]
1135
-> 1136 storage = storages.get_storage(storage)
1137 try:
1138 study_id = storage.create_new_study(study_name)
/usr/local/lib/python3.7/dist-packages/optuna/storages/__init__.py in get_storage(storage)
29 return RedisStorage(storage)
30 else:
---> 31 return _CachedStorage(RDBStorage(storage))
32 elif isinstance(storage, RDBStorage):
33 return _CachedStorage(storage)
/usr/local/lib/python3.7/dist-packages/optuna/storages/_rdb/storage.py in __init__(self, url, engine_kwargs, skip_compatibility_check, heartbeat_interval, grace_period, failed_trial_callback)
173
174 try:
--> 175 self.engine = create_engine(self.url, **self.engine_kwargs)
176 except ImportError as e:
177 raise ImportError(
<string> in create_engine(url, **kwargs)
/usr/local/lib/python3.7/dist-packages/sqlalchemy/util/deprecations.py in warned(fn, *args, **kwargs)
307 stacklevel=3,
308 )
--> 309 return fn(*args, **kwargs)
310
311 doc = fn.__doc__ is not None and fn.__doc__ or ""
/usr/local/lib/python3.7/dist-packages/sqlalchemy/engine/create.py in create_engine(url, **kwargs)
528
529 # create url.URL object
--> 530 u = _url.make_url(url)
531
532 u, plugins, kwargs = u._instantiate_plugins(kwargs)
/usr/local/lib/python3.7/dist-packages/sqlalchemy/engine/url.py in make_url(name_or_url)
713
714 if isinstance(name_or_url, util.string_types):
--> 715 return _parse_rfc1738_args(name_or_url)
716 else:
717 return name_or_url
/usr/local/lib/python3.7/dist-packages/sqlalchemy/engine/url.py in _parse_rfc1738_args(name)
775 else:
776 raise exc.ArgumentError(
--> 777 "Could not parse rfc1738 URL from string '%s'" % name
778 )
779
ArgumentError: Could not parse rfc1738 URL from string 'gdrive/My Drive/chapter_classification/output/sqlite:///opt1.db'

So according to the official Documentation of create_study
When a database URL is passed, Optuna internally uses SQLAlchemy to handle the database. Please refer to SQLAlchemy’s document for further details. If you want to specify non-default options to SQLAlchemy Engine, you can instantiate RDBStorage with your desired options and pass it to the storage argument instead of a URL.
And when you visit the documentation of SQLAlchemy, you find that it uses absolute path.
So all you have to do is to change
storage=f"gdrive/My Drive/chapter_classification/output/sqlite:///{name}.db"
to the absolute path as:
storage = f"sqlite:///gdrive/My Drive/chapter_classification/output{name}.db"

Related

py2neo Issue: ConnectionUnavailable: Cannot open connection to ConnectionProfile('bolt://localhost:7687')

I am trying to replicate this example on neo4j desktop:
https://stellargraph.readthedocs.io/en/stable/demos/connector/neo4j/load-cora-into-neo4j.html
I am able to reproduce everything until I get to the following line:
import py2neo
default_host = os.environ.get("STELLARGRAPH_NEO4J_HOST")
# Create the Neo4j Graph database object; the arguments can be edited to specify location and authentication
graph = py2neo.Graph(host=default_host, port=None, user=None, password=None)
I have tried the following attempts to create the neo4j database object:
#1
default_host = os.environ.get("StellarGraph")
graph = py2neo.Graph(host=default_host, port=None, user=None, password=None)
#2
uri = 'bolt://localhost:7687'
graph = Graph(uri, auth=("neo4j", "password"), port= 7687, secure=True)
#3
uri = uri = 'bolt://localhost:7687'
graph = Graph(uri, auth=("neo4j", "password"), port= 7687, secure=True, name= "StellarGraph")
However, each time I attempt this, it results in some variation of this error:
IndexError Traceback (most recent call last)
File ~/.local/lib/python3.8/site-packages/py2neo/client/__init__.py:806, in ConnectionPool.acquire(self, force_reset, can_overfill)
804 try:
805 # Plan A: select a free connection from the pool
--> 806 cx = self._free_list.popleft()
807 except IndexError:
IndexError: pop from an empty deque
During handling of the above exception, another exception occurred:
ConnectionRefusedError Traceback (most recent call last)
File ~/.local/lib/python3.8/site-packages/py2neo/wiring.py:62, in Wire.open(cls, address, timeout, keep_alive, on_broken)
61 try:
---> 62 s.connect(address)
63 except (IOError, OSError) as error:
ConnectionRefusedError: [Errno 111] Connection refused
The above exception was the direct cause of the following exception:
WireError Traceback (most recent call last)
File ~/.local/lib/python3.8/site-packages/py2neo/client/bolt.py:355, in Bolt.open(cls, profile, user_agent, on_release, on_broken)
354 try:
--> 355 wire = cls._connect(profile, on_broken=on_broken)
356 protocol_version = cls._handshake(wire)
File ~/.local/lib/python3.8/site-packages/py2neo/client/bolt.py:369, in Bolt._connect(cls, profile, on_broken)
368 log.debug("[#%04X] C: (Dialing <%s>)", 0, profile.address)
--> 369 wire = Wire.open(profile.address, keep_alive=True, on_broken=on_broken)
370 local_port = wire.local_address.port_number
File ~/.local/lib/python3.8/site-packages/py2neo/wiring.py:64, in Wire.open(cls, address, timeout, keep_alive, on_broken)
63 except (IOError, OSError) as error:
---> 64 raise_from(WireError("Cannot connect to %r" % (address,)), error)
65 return cls(s, on_broken=on_broken)
File <string>:3, in raise_from(value, from_value)
WireError: Cannot connect to IPv4Address(('localhost', 7687))
The above exception was the direct cause of the following exception:
ConnectionUnavailable Traceback (most recent call last)
/home/myname/Project1/graph_import.ipynb Cell 13' in <cell line: 2>()
1 uri = 'bolt://localhost:7687'
----> 2 graph = Graph(uri, auth=("neo4j", "mypass"), port= 7687, secure=True, name= "StellarGraph")
File ~/.local/lib/python3.8/site-packages/py2neo/database.py:288, in Graph.__init__(self, profile, name, **settings)
287 def __init__(self, profile=None, name=None, **settings):
--> 288 self.service = GraphService(profile, **settings)
289 self.__name__ = name
290 self.schema = Schema(self)
File ~/.local/lib/python3.8/site-packages/py2neo/database.py:119, in GraphService.__init__(self, profile, **settings)
116 if connector_settings["init_size"] is None and not profile.routing:
117 # Ensures credentials are checked on construction
118 connector_settings["init_size"] = 1
--> 119 self._connector = Connector(profile, **connector_settings)
120 self._graphs = {}
File ~/.local/lib/python3.8/site-packages/py2neo/client/__init__.py:960, in Connector.__init__(self, profile, user_agent, init_size, max_size, max_age, routing_refresh_ttl)
958 else:
959 self._router = None
--> 960 self._add_pools(*self._initial_routers)
File ~/.local/lib/python3.8/site-packages/py2neo/client/__init__.py:982, in Connector._add_pools(self, *profiles)
980 continue
981 log.debug("Adding connection pool for profile %r", profile)
--> 982 pool = ConnectionPool.open(
983 profile,
984 user_agent=self._user_agent,
985 init_size=self._init_size,
986 max_size=self._max_size,
987 max_age=self._max_age,
988 on_broken=self._on_broken)
989 self._pools[profile] = pool
File ~/.local/lib/python3.8/site-packages/py2neo/client/__init__.py:649, in ConnectionPool.open(cls, profile, user_agent, init_size, max_size, max_age, on_broken)
627 """ Create a new connection pool, with an option to seed one
628 or more initial connections.
629
(...)
646 scheme
647 """
648 pool = cls(profile, user_agent, max_size, max_age, on_broken)
--> 649 seeds = [pool.acquire() for _ in range(init_size or cls.default_init_size)]
650 for seed in seeds:
651 seed.release()
File ~/.local/lib/python3.8/site-packages/py2neo/client/__init__.py:649, in <listcomp>(.0)
627 """ Create a new connection pool, with an option to seed one
628 or more initial connections.
629
(...)
646 scheme
647 """
648 pool = cls(profile, user_agent, max_size, max_age, on_broken)
--> 649 seeds = [pool.acquire() for _ in range(init_size or cls.default_init_size)]
650 for seed in seeds:
651 seed.release()
File ~/.local/lib/python3.8/site-packages/py2neo/client/__init__.py:813, in ConnectionPool.acquire(self, force_reset, can_overfill)
807 except IndexError:
808 if self._has_capacity() or can_overfill:
809 # Plan B: if the pool isn't full, open
810 # a new connection. This may raise a
811 # ConnectionUnavailable exception, which
812 # should bubble up to the caller.
--> 813 cx = self._connect()
814 if cx.supports_multi():
815 self._supports_multi = True
File ~/.local/lib/python3.8/site-packages/py2neo/client/__init__.py:764, in ConnectionPool._connect(self)
761 def _connect(self):
762 """ Open and return a new connection.
763 """
--> 764 cx = Connection.open(self.profile, user_agent=self.user_agent,
765 on_release=lambda c: self.release(c),
766 on_broken=lambda msg: self.__on_broken(msg))
767 self._server_agent = cx.server_agent
768 return cx
File ~/.local/lib/python3.8/site-packages/py2neo/client/__init__.py:174, in Connection.open(cls, profile, user_agent, on_release, on_broken)
172 if profile.protocol == "bolt":
173 from py2neo.client.bolt import Bolt
--> 174 return Bolt.open(profile, user_agent=user_agent,
175 on_release=on_release, on_broken=on_broken)
176 elif profile.protocol == "http":
177 from py2neo.client.http import HTTP
File ~/.local/lib/python3.8/site-packages/py2neo/client/bolt.py:364, in Bolt.open(cls, profile, user_agent, on_release, on_broken)
362 return bolt
363 except (TypeError, WireError) as error:
--> 364 raise_from(ConnectionUnavailable("Cannot open connection to %r" % profile), error)
File <string>:3, in raise_from(value, from_value)
ConnectionUnavailable: Cannot open connection to ConnectionProfile('bolt+s://localhost:7687')
I have also tried variations on this fix as well, but had the same error:
ISSUE IN CONNECTING py2neo v4 to my neo4j server
I appreciate any help resolving this issue. Thanks!
I was able to resolve this with the following syntax:
graph = Graph('neo4j://localhost:7687', user="neo4j", password="999")
However, I am now having an issue with the following block:
empty_db_query = """
MATCH(n) DETACH
DELETE(n)
"""
tx = graph.begin(autocommit=True)
tx.evaluate(empty_db_query)
For the newer version of py2neo, the graph.begin argument takes readonly = F instead of autocommit = True, but in any case, I have this error now:
ServiceUnavailable Traceback (most recent call last)
/home/myname/Project1/graph_import.ipynb Cell 13' in <cell line: 6>()
1 empty_db_query = """
2 MATCH(n) DETACH
3 DELETE(n)
4 """
----> 6 tx = graph.begin(readonly=False)
7 tx.evaluate(empty_db_query)
File ~/.local/lib/python3.8/site-packages/py2neo/database.py:351, in Graph.begin(self, readonly)
340 def begin(self, readonly=False,
341 # after=None, metadata=None, timeout=None
342 ):
343 """ Begin a new :class:`~py2neo.Transaction`.
344
345 :param readonly: if :py:const:`True`, will begin a readonly
(...)
349 removed. Use the 'auto' method instead.*
350 """
--> 351 return Transaction(self, autocommit=False, readonly=readonly,
352 # after, metadata, timeout
353 )
File ~/.local/lib/python3.8/site-packages/py2neo/database.py:915, in Transaction.__init__(self, graph, autocommit, readonly)
913 self._ref = None
914 else:
--> 915 self._ref = self._connector.begin(self.graph.name, readonly=readonly,
916 # after, metadata, timeout
917 )
918 self._readonly = readonly
919 self._closed = False
File ~/.local/lib/python3.8/site-packages/py2neo/client/__init__.py:1357, in Connector.begin(self, graph_name, readonly)
1345 def begin(self, graph_name, readonly=False,
1346 # after=None, metadata=None, timeout=None
1347 ):
1348 """ Begin a new explicit transaction.
1349
1350 :param graph_name:
(...)
1355 :raises Failure: if the server signals a failure condition
1356 """
-> 1357 cx = self._acquire(graph_name)
1358 try:
1359 return cx.begin(graph_name, readonly=readonly,
1360 # after=after, metadata=metadata, timeout=timeout
1361 )
File ~/.local/lib/python3.8/site-packages/py2neo/client/__init__.py:1111, in Connector._acquire(self, graph_name, readonly)
1109 return self._acquire_ro(graph_name)
1110 else:
-> 1111 return self._acquire_rw(graph_name)
File ~/.local/lib/python3.8/site-packages/py2neo/client/__init__.py:1203, in Connector._acquire_rw(self, graph_name)
1199 # TODO: exit immediately if the server/cluster is in readonly mode
1201 while True:
-> 1203 ro_profiles, rw_profiles = self._get_profiles(graph_name, readonly=False)
1204 if rw_profiles:
1205 # There is at least one writer, so collect the pools
1206 # for those writers. In all implementations to date,
1207 # a Neo4j cluster will only ever contain at most one
1208 # writer (per database). But this algorithm should
1209 # still survive if that changes.
1210 pools = [pool for profile, pool in list(self._pools.items())
1211 if profile in rw_profiles]
File ~/.local/lib/python3.8/site-packages/py2neo/client/__init__.py:1016, in Connector._get_profiles(self, graph_name, readonly)
1014 rt.wait_until_updated()
1015 else:
-> 1016 self.refresh_routing_table(graph_name)
File ~/.local/lib/python3.8/site-packages/py2neo/client/__init__.py:1064, in Connector.refresh_routing_table(self, graph_name)
1062 cx.release()
1063 else:
-> 1064 raise ServiceUnavailable("Cannot connect to any known routers")
1065 finally:
1066 rt.set_not_updating()
ServiceUnavailable: Cannot connect to any known routers
Appreciate any help in resolving this. Thank you!

Trying to download dataset, code doesn't work in Jupyter notebook but it does work in Pycharm

I'm trying to download the MNIST dataset from openml, using the openml library.
I tried using Jupyter notebooks because I don't want to download the same dataset every time.
Problem is, after running the following code, I get an error:
from openml.datasets import get_dataset
mnist = get_dataset(554)
x, y, p, q = mnist.get_data(
dataset_format="dataframe", target=mnist.default_target_attribute
)
I'm pasting the whole error message I get, the problem occurs when I try assigning the .get_data to x, y, p and q.
The environment I'm running this on is called Oceanic.
---------------------------------------------------------------------------
OSError Traceback (most recent call last)
File ~\anaconda3\envs\Oceanic\lib\site-packages\openml\datasets\dataset.py:491, in OpenMLDataset._cache_compressed_file_from_file(self, data_file)
490 try:
--> 491 data = pd.read_parquet(data_file)
492 except Exception as e:
File ~\anaconda3\envs\Oceanic\lib\site-packages\pandas\io\parquet.py:493, in read_parquet(path, engine, columns, storage_options, use_nullable_dtypes, **kwargs)
491 impl = get_engine(engine)
--> 493 return impl.read(
494 path,
495 columns=columns,
496 storage_options=storage_options,
497 use_nullable_dtypes=use_nullable_dtypes,
498 **kwargs,
499 )
File ~\anaconda3\envs\Oceanic\lib\site-packages\pandas\io\parquet.py:240, in PyArrowImpl.read(self, path, columns, use_nullable_dtypes, storage_options, **kwargs)
239 try:
--> 240 result = self.api.parquet.read_table(
241 path_or_handle, columns=columns, **kwargs
242 ).to_pandas(**to_pandas_kwargs)
243 if manager == "array":
File ~\anaconda3\envs\Oceanic\lib\site-packages\pyarrow\parquet.py:1731, in read_table(source, columns, use_threads, metadata, use_pandas_metadata, memory_map, read_dictionary, filesystem, filters, buffer_size, partitioning, use_legacy_dataset, ignore_prefixes)
1727 dataset = ParquetFile(
1728 source, metadata=metadata, read_dictionary=read_dictionary,
1729 memory_map=memory_map, buffer_size=buffer_size)
-> 1731 return dataset.read(columns=columns, use_threads=use_threads,
1732 use_pandas_metadata=use_pandas_metadata)
1734 if ignore_prefixes is not None:
File ~\anaconda3\envs\Oceanic\lib\site-packages\pyarrow\parquet.py:1608, in _ParquetDatasetV2.read(self, columns, use_threads, use_pandas_metadata)
1606 use_threads = False
-> 1608 table = self._dataset.to_table(
1609 columns=columns, filter=self._filter_expression,
1610 use_threads=use_threads
1611 )
1613 # if use_pandas_metadata, restore the pandas metadata (which gets
1614 # lost if doing a specific `columns` selection in to_table)
File ~\anaconda3\envs\Oceanic\lib\site-packages\pyarrow\_dataset.pyx:458, in pyarrow._dataset.Dataset.to_table()
File ~\anaconda3\envs\Oceanic\lib\site-packages\pyarrow\_dataset.pyx:2889, in pyarrow._dataset.Scanner.to_table()
File ~\anaconda3\envs\Oceanic\lib\site-packages\pyarrow\error.pxi:141, in pyarrow.lib.pyarrow_internal_check_status()
File ~\anaconda3\envs\Oceanic\lib\site-packages\pyarrow\error.pxi:112, in pyarrow.lib.check_status()
OSError: NotImplemented: Support for codec 'snappy' not built
The above exception was the direct cause of the following exception:
Exception Traceback (most recent call last)
Input In [10], in <cell line: 1>()
----> 1 x, y, p, q = mnist.get_data(
2 dataset_format="dataframe", target=mnist.default_target_attribute
3 )
File ~\anaconda3\envs\Oceanic\lib\site-packages\openml\datasets\dataset.py:698, in OpenMLDataset.get_data(self, target, include_row_id, include_ignore_attribute, dataset_format)
658 def get_data(
659 self,
660 target: Optional[Union[List[str], str]] = None,
(...)
668 List[str],
669 ]:
670 """ Returns dataset content as dataframes or sparse matrices.
671
672 Parameters
(...)
696 List of attribute names.
697 """
--> 698 data, categorical, attribute_names = self._load_data()
700 to_exclude = []
701 if not include_row_id and self.row_id_attribute is not None:
File ~\anaconda3\envs\Oceanic\lib\site-packages\openml\datasets\dataset.py:531, in OpenMLDataset._load_data(self)
528 self._download_data()
530 file_to_load = self.data_file if self.parquet_file is None else self.parquet_file
--> 531 return self._cache_compressed_file_from_file(file_to_load)
533 # helper variable to help identify where errors occur
534 fpath = self.data_feather_file if self.cache_format == "feather" else self.data_pickle_file
File ~\anaconda3\envs\Oceanic\lib\site-packages\openml\datasets\dataset.py:493, in OpenMLDataset._cache_compressed_file_from_file(self, data_file)
491 data = pd.read_parquet(data_file)
492 except Exception as e:
--> 493 raise Exception(f"File: {data_file}") from e
495 categorical = [data[c].dtype.name == "category" for c in data.columns]
496 attribute_names = list(data.columns)
Exception: File: C:\Users\Irving\.openml\org\openml\www\datasets\554\dataset.pq
Now, I've written the same code on Pycharm and it works just fine, I managed to correctly assign the dataframes and show them to me. I've got no idea why this isn't working and I would like to know why because I would prefer to work with Jupyter notebooks.
Any help is appreciated, thanks in advance.

OSError while calling Detectron2LayoutModel

After successfully installing Layout Parser in Windows, getting the below OS Error.
Code Used:
model = lp.Detectron2LayoutModel(config_path="lp://PubLayNet/mask_rcnn_X_101_32x8d_FPN_3x/config",
extra_config=["MODEL.ROI_HEADS.SCORE_THRESH_TEST", 0.8],
label_map={0: "Text", 1: "Title", 2: "List", 3:"Table", 4:"Figure"})
Using layout parser, trying to extract the content from image. But when I try to load models in Layout parser, it fails with the below error
---------------------------------------------------------------------------
OSError Traceback (most recent call last)
~\AppData\Local\Temp\ipykernel_178664\3254664512.py in <module>
1 model = lp.Detectron2LayoutModel(config_path="lp://PubLayNet/mask_rcnn_X_101_32x8d_FPN_3x/config",
2 extra_config=["MODEL.ROI_HEADS.SCORE_THRESH_TEST", 0.8],
----> 3 label_map={0: "Text", 1: "Title", 2: "List", 3:"Table", 4:"Figure"})
4 # Load the deep layout model from the layoutparser API
5 # For all the supported model, please check the Model
~\Anaconda3\envs\layout\lib\site-packages\layoutparser\models\detectron2\layoutmodel.py in __init__(self, config_path, model_path, label_map, extra_config, enforce_cpu, device)
89 config_path, model_path, allow_empty_path=True
90 )
---> 91 config_path = PathManager.get_local_path(config_path)
92
93 if label_map is None:
~\Anaconda3\envs\layout\lib\site-packages\iopath\common\file_io.py in get_local_path(self, path, force, **kwargs)
1195 handler = self.__get_path_handler(path) # type: ignore
1196 try:
-> 1197 bret = handler._get_local_path(path, force=force, **kwargs)
1198 except TypeError:
1199 bret = handler._get_local_path(path, **kwargs)
~\Anaconda3\envs\layout\lib\site-packages\layoutparser\models\detectron2\catalog.py in _get_local_path(self, path, **kwargs)
134 else:
135 raise ValueError(f"Unknown data_type {data_type}")
--> 136 return PathManager.get_local_path(model_url, **kwargs)
137
138 def _open(self, path, mode="r", **kwargs):
~\Anaconda3\envs\layout\lib\site-packages\iopath\common\file_io.py in get_local_path(self, path, force, **kwargs)
1195 handler = self.__get_path_handler(path) # type: ignore
1196 try:
-> 1197 bret = handler._get_local_path(path, force=force, **kwargs)
1198 except TypeError:
1199 bret = handler._get_local_path(path, **kwargs)
~\Anaconda3\envs\layout\lib\site-packages\iopath\common\file_io.py in _get_local_path(self, path, force, cache_dir, **kwargs)
792
793 cached = os.path.join(dirname, filename)
--> 794 with file_lock(cached):
795 if not os.path.isfile(cached):
796 logger.info("Downloading {} ...".format(path))
~\Anaconda3\envs\layout\lib\site-packages\portalocker\utils.py in __enter__(self)
155
156 def __enter__(self):
--> 157 return self.acquire()
158
159 def __exit__(self,
~\Anaconda3\envs\layout\lib\site-packages\portalocker\utils.py in acquire(self, timeout, check_interval, fail_when_locked)
237
238 # Get a new filehandler
--> 239 fh = self._get_fh()
240
241 def try_close(): # pragma: no cover
~\Anaconda3\envs\layout\lib\site-packages\portalocker\utils.py in _get_fh(self)
287 def _get_fh(self) -> typing.IO:
288 '''Get a new filehandle'''
--> 289 return open(self.filename, self.mode, **self.file_open_kwargs)
290
291 def _get_lock(self, fh: typing.IO) -> typing.IO:
OSError: [Errno 22] Invalid argument: 'C:\\Users\\vchinna/.torch/iopath_cache\\s/nau5ut6zgthunil\\config.yaml?dl=1.lock'
Not sure whether it is a kind of lock or something.
Please help
Even I got a similar error. I tried out manually some work around in Windows.
I am using your case as example: OSError: [Errno 22] Invalid argument: 'C:\Users\vchinna/.torch/iopath_cache\s/nau5ut6zgthunil\config.yaml?dl=1.lock'
Please follow the following process.
Navigate to C:\Users\vchinna/.torch/iopath_cache\s/nau5ut6zgthunil\config.yaml
Open that config.yaml file
Scroll down to WEIGHTS: https://www.dropbox.com/s/h7th27jfv19rxiy/model_final.pth?dl=1 should be around 265 line.
Copy that link and paste it in your browser, a 'model_final.pth' will be downloaded. Copy this file to your desired folder.
Now replace the path to WEIGHTS: your_desired_folder/model_final.pth
Save it and run the code it works!
But there is a small work around I think before you do this (if you have not done)
[iopath work around][1]
https://github.com/Layout-Parser/layout-parser/issues/15 (Github link to the issue)

writing from to parquet using pandas

Trying to export and convert my data to a parquet file. Data is sba data from kaggle that we've transformed bit. Trying to covert it to parquet to load onto a hfds server.
Data link
https://www.kaggle.com/mirbektoktogaraev/should-this-loan-be-approved-or-denied
tryin to use the code:
sba.to_parquet('sba.parquet.gzip', compression = 'gzip', partition_cols= 'State')
but get the error:
---------------------------------------------------------------------------
ArrowInvalid Traceback (most recent call last)
<ipython-input-39-377ee6551e44> in <module>
----> 1 sba.to_parquet('sba.parquet.gzip', compression = 'gzip', partition_cols= 'State')
/opt/conda/lib/python3.8/site-packages/pandas/util/_decorators.py in wrapper(*args, **kwargs)
197 else:
198 kwargs[new_arg_name] = new_arg_value
--> 199 return func(*args, **kwargs)
200
201 return cast(F, wrapper)
/opt/conda/lib/python3.8/site-packages/pandas/core/frame.py in to_parquet(self, path, engine, compression, index, partition_cols, storage_options, **kwargs)
2453 from pandas.io.parquet import to_parquet
2454
-> 2455 return to_parquet(
2456 self,
2457 path,
/opt/conda/lib/python3.8/site-packages/pandas/io/parquet.py in to_parquet(df, path, engine, compression, index, storage_options, partition_cols, **kwargs)
388 path_or_buf: FilePathOrBuffer = io.BytesIO() if path is None else path
389
--> 390 impl.write(
391 df,
392 path_or_buf,
/opt/conda/lib/python3.8/site-packages/pandas/io/parquet.py in write(self, df, path, compression, index, storage_options, partition_cols, **kwargs)
150 from_pandas_kwargs["preserve_index"] = index
151
--> 152 table = self.api.Table.from_pandas(df, **from_pandas_kwargs)
153
154 path_or_handle, handles, kwargs["filesystem"] = _get_path_or_handle(
/opt/conda/lib/python3.8/site-packages/pyarrow/table.pxi in pyarrow.lib.Table.from_pandas()
/opt/conda/lib/python3.8/site-packages/pyarrow/pandas_compat.py in dataframe_to_arrays(df, schema, preserve_index, nthreads, columns, safe)
600 for i, maybe_fut in enumerate(arrays):
601 if isinstance(maybe_fut, futures.Future):
--> 602 arrays[i] = maybe_fut.result()
603
604 types = [x.type for x in arrays]
/opt/conda/lib/python3.8/concurrent/futures/_base.py in result(self, timeout)
430 raise CancelledError()
431 elif self._state == FINISHED:
--> 432 return self.__get_result()
433
434 self._condition.wait(timeout)
/opt/conda/lib/python3.8/concurrent/futures/_base.py in __get_result(self)
386 def __get_result(self):
387 if self._exception:
--> 388 raise self._exception
389 else:
390 return self._result
/opt/conda/lib/python3.8/concurrent/futures/thread.py in run(self)
55
56 try:
---> 57 result = self.fn(*self.args, **self.kwargs)
58 except BaseException as exc:
59 self.future.set_exception(exc)
/opt/conda/lib/python3.8/site-packages/pyarrow/pandas_compat.py in convert_column(col, field)
572 e.args += ("Conversion failed for column {!s} with type {!s}"
573 .format(col.name, col.dtype),)
--> 574 raise e
575 if not field_nullable and result.null_count > 0:
576 raise ValueError("Field {} was non-nullable but pandas column "
/opt/conda/lib/python3.8/site-packages/pyarrow/pandas_compat.py in convert_column(col, field)
566
567 try:
--> 568 result = pa.array(col, type=type_, from_pandas=True, safe=safe)
569 except (pa.ArrowInvalid,
570 pa.ArrowNotImplementedError,
/opt/conda/lib/python3.8/site-packages/pyarrow/array.pxi in pyarrow.lib.array()
/opt/conda/lib/python3.8/site-packages/pyarrow/array.pxi in pyarrow.lib._ndarray_to_array()
/opt/conda/lib/python3.8/site-packages/pyarrow/error.pxi in pyarrow.lib.check_status()
ArrowInvalid: ('Could not convert 2004 with type str: tried to convert to int', 'Conversion failed for column ApprovalFY with type object')
Any help would be amazing.
#Micah Kornfield is correct. Here is more specific answer.
If you look at your data, more specifically between rows 688127 and 688128 you find the following
df.loc[688127,'ApprovalFY']
2004
vs
df.loc[688128,'ApprovalFY']
'2004'
This type of change in data causes issue when parsing as parquet file. I am not an expert on parquet file, however the way that I understood is that parquet files identify the type of data in order to store them more efficiently. Therefore if you have a two different type of data in the same column you will receive the error. A lot of people run to this type of issue when they save their data into csv and then try to read csv file and concatenate the csv data with new data that they get from API,etc.
Every time you save your data in the csv format it converts it to text and when you read it it can change it from 2004 to '2004'.
Back to original question, it is a good idea to perform some data type checking before saving your data as parquet.

(Memory) Problem when accessing Dask type arrays

I need to load some meteorological data to analyze several months but such data is stored in files that cover only one day so I need to acces many files at once.
I am following some pre-given instruction that told me to create a memory partition in my computer.
from datetime import datetime, timedelta
import dask.array as da
from dask.distributed import Client, LocalCluster
import xarray
try:
client
except NameError:
client = Client(n_workers=1, threads_per_worker=4, memory_limit='2GB')
else:
print("Client already exists")
After this, I create an array dates that goes from 1st June to 1st October and that is need in "files" to get the link to the meteorological data.
dates=[datetime(2019,6,1) + timedelta(days=i) for i in range(3*30)]
files= [date.strftime('http://mandeo.meteogalicia.es/thredds/dodsC/modelos/WRF_HIST/d03/%Y/%m/wrf_arw_det_history_d03_%Y%m%d_0000.nc4') for date in dates]
My issue starts when I try to unzip all that data as
multi = xarray.open_mfdataset(files, preprocess= lambda a : a.isel(time=slice(0,24)))
It raises the error:
KeyError Traceback (most recent call last)
~\Nueva carpeta\lib\site-packages\xarray\backends\file_manager.py in _acquire_with_cache_info(self, needs_lock)
197 try:
--> 198 file = self._cache[self._key]
199 except KeyError:
~\Nueva carpeta\lib\site-packages\xarray\backends\lru_cache.py in __getitem__(self, key)
52 with self._lock:
---> 53 value = self._cache[key]
54 self._cache.move_to_end(key)
KeyError: [<class 'netCDF4._netCDF4.Dataset'>, ('http://mandeo.meteogalicia.es/thredds/dodsC/modelos/WRF_HIST/d03/2019/06/wrf_arw_det_history_d03_20190626_0000.nc4',), 'r', (('clobber', True), ('diskless', False), ('format', 'NETCDF4'), ('persist', False))]
During handling of the above exception, another exception occurred:
OSError Traceback (most recent call last)
<ipython-input-19-c3d0f4a8cc26> in <module>
----> 1 multi = xarray.open_mfdataset(files, preprocess= lambda a : a.isel(time=slice(0,24)))
~\Nueva carpeta\lib\site-packages\xarray\backends\api.py in open_mfdataset(paths, chunks, concat_dim, compat, preprocess, engine, lock, data_vars, coords, combine, autoclose, parallel, join, attrs_file, **kwargs)
916 getattr_ = getattr
917
--> 918 datasets = [open_(p, **open_kwargs) for p in paths]
919 file_objs = [getattr_(ds, "_file_obj") for ds in datasets]
920 if preprocess is not None:
~\Nueva carpeta\lib\site-packages\xarray\backends\api.py in <listcomp>(.0)
916 getattr_ = getattr
917
--> 918 datasets = [open_(p, **open_kwargs) for p in paths]
919 file_objs = [getattr_(ds, "_file_obj") for ds in datasets]
920 if preprocess is not None:
~\Nueva carpeta\lib\site-packages\xarray\backends\api.py in open_dataset(filename_or_obj, group, decode_cf, mask_and_scale, decode_times, autoclose, concat_characters, decode_coords, engine, chunks, lock, cache, drop_variables, backend_kwargs, use_cftime, decode_timedelta)
507 if engine == "netcdf4":
508 store = backends.NetCDF4DataStore.open(
--> 509 filename_or_obj, group=group, lock=lock, **backend_kwargs
510 )
511 elif engine == "scipy":
~\Nueva carpeta\lib\site-packages\xarray\backends\netCDF4_.py in open(cls, filename, mode, format, group, clobber, diskless, persist, lock, lock_maker, autoclose)
356 netCDF4.Dataset, filename, mode=mode, kwargs=kwargs
357 )
--> 358 return cls(manager, group=group, mode=mode, lock=lock, autoclose=autoclose)
359
360 def _acquire(self, needs_lock=True):
~\Nueva carpeta\lib\site-packages\xarray\backends\netCDF4_.py in __init__(self, manager, group, mode, lock, autoclose)
312 self._group = group
313 self._mode = mode
--> 314 self.format = self.ds.data_model
315 self._filename = self.ds.filepath()
316 self.is_remote = is_remote_uri(self._filename)
~\Nueva carpeta\lib\site-packages\xarray\backends\netCDF4_.py in ds(self)
365 #property
366 def ds(self):
--> 367 return self._acquire()
368
369 def open_store_variable(self, name, var):
~\Nueva carpeta\lib\site-packages\xarray\backends\netCDF4_.py in _acquire(self, needs_lock)
359
360 def _acquire(self, needs_lock=True):
--> 361 with self._manager.acquire_context(needs_lock) as root:
362 ds = _nc4_require_group(root, self._group, self._mode)
363 return ds
~\Nueva carpeta\lib\contextlib.py in __enter__(self)
110 del self.args, self.kwds, self.func
111 try:
--> 112 return next(self.gen)
113 except StopIteration:
114 raise RuntimeError("generator didn't yield") from None
~\Nueva carpeta\lib\site-packages\xarray\backends\file_manager.py in acquire_context(self, needs_lock)
184 def acquire_context(self, needs_lock=True):
185 """Context manager for acquiring a file."""
--> 186 file, cached = self._acquire_with_cache_info(needs_lock)
187 try:
188 yield file
~\Nueva carpeta\lib\site-packages\xarray\backends\file_manager.py in _acquire_with_cache_info(self, needs_lock)
202 kwargs = kwargs.copy()
203 kwargs["mode"] = self._mode
--> 204 file = self._opener(*self._args, **kwargs)
205 if self._mode == "w":
206 # ensure file doesn't get overriden when opened again
netCDF4\_netCDF4.pyx in netCDF4._netCDF4.Dataset.__init__()
netCDF4\_netCDF4.pyx in netCDF4._netCDF4._ensure_nc_success()
OSError: [Errno -37] NetCDF: Write to read only: b'http://mandeo.meteogalicia.es/thredds/dodsC/modelos/WRF_HIST/d03/2019/06/wrf_arw_det_history_d03_20190626_0000.nc4'
Does anyone know why this error occurs?

Categories