py2neo Issue: ConnectionUnavailable: Cannot open connection to ConnectionProfile('bolt://localhost:7687') - python

I am trying to replicate this example on neo4j desktop:
https://stellargraph.readthedocs.io/en/stable/demos/connector/neo4j/load-cora-into-neo4j.html
I am able to reproduce everything until I get to the following line:
import py2neo
default_host = os.environ.get("STELLARGRAPH_NEO4J_HOST")
# Create the Neo4j Graph database object; the arguments can be edited to specify location and authentication
graph = py2neo.Graph(host=default_host, port=None, user=None, password=None)
I have tried the following attempts to create the neo4j database object:
#1
default_host = os.environ.get("StellarGraph")
graph = py2neo.Graph(host=default_host, port=None, user=None, password=None)
#2
uri = 'bolt://localhost:7687'
graph = Graph(uri, auth=("neo4j", "password"), port= 7687, secure=True)
#3
uri = uri = 'bolt://localhost:7687'
graph = Graph(uri, auth=("neo4j", "password"), port= 7687, secure=True, name= "StellarGraph")
However, each time I attempt this, it results in some variation of this error:
IndexError Traceback (most recent call last)
File ~/.local/lib/python3.8/site-packages/py2neo/client/__init__.py:806, in ConnectionPool.acquire(self, force_reset, can_overfill)
804 try:
805 # Plan A: select a free connection from the pool
--> 806 cx = self._free_list.popleft()
807 except IndexError:
IndexError: pop from an empty deque
During handling of the above exception, another exception occurred:
ConnectionRefusedError Traceback (most recent call last)
File ~/.local/lib/python3.8/site-packages/py2neo/wiring.py:62, in Wire.open(cls, address, timeout, keep_alive, on_broken)
61 try:
---> 62 s.connect(address)
63 except (IOError, OSError) as error:
ConnectionRefusedError: [Errno 111] Connection refused
The above exception was the direct cause of the following exception:
WireError Traceback (most recent call last)
File ~/.local/lib/python3.8/site-packages/py2neo/client/bolt.py:355, in Bolt.open(cls, profile, user_agent, on_release, on_broken)
354 try:
--> 355 wire = cls._connect(profile, on_broken=on_broken)
356 protocol_version = cls._handshake(wire)
File ~/.local/lib/python3.8/site-packages/py2neo/client/bolt.py:369, in Bolt._connect(cls, profile, on_broken)
368 log.debug("[#%04X] C: (Dialing <%s>)", 0, profile.address)
--> 369 wire = Wire.open(profile.address, keep_alive=True, on_broken=on_broken)
370 local_port = wire.local_address.port_number
File ~/.local/lib/python3.8/site-packages/py2neo/wiring.py:64, in Wire.open(cls, address, timeout, keep_alive, on_broken)
63 except (IOError, OSError) as error:
---> 64 raise_from(WireError("Cannot connect to %r" % (address,)), error)
65 return cls(s, on_broken=on_broken)
File <string>:3, in raise_from(value, from_value)
WireError: Cannot connect to IPv4Address(('localhost', 7687))
The above exception was the direct cause of the following exception:
ConnectionUnavailable Traceback (most recent call last)
/home/myname/Project1/graph_import.ipynb Cell 13' in <cell line: 2>()
1 uri = 'bolt://localhost:7687'
----> 2 graph = Graph(uri, auth=("neo4j", "mypass"), port= 7687, secure=True, name= "StellarGraph")
File ~/.local/lib/python3.8/site-packages/py2neo/database.py:288, in Graph.__init__(self, profile, name, **settings)
287 def __init__(self, profile=None, name=None, **settings):
--> 288 self.service = GraphService(profile, **settings)
289 self.__name__ = name
290 self.schema = Schema(self)
File ~/.local/lib/python3.8/site-packages/py2neo/database.py:119, in GraphService.__init__(self, profile, **settings)
116 if connector_settings["init_size"] is None and not profile.routing:
117 # Ensures credentials are checked on construction
118 connector_settings["init_size"] = 1
--> 119 self._connector = Connector(profile, **connector_settings)
120 self._graphs = {}
File ~/.local/lib/python3.8/site-packages/py2neo/client/__init__.py:960, in Connector.__init__(self, profile, user_agent, init_size, max_size, max_age, routing_refresh_ttl)
958 else:
959 self._router = None
--> 960 self._add_pools(*self._initial_routers)
File ~/.local/lib/python3.8/site-packages/py2neo/client/__init__.py:982, in Connector._add_pools(self, *profiles)
980 continue
981 log.debug("Adding connection pool for profile %r", profile)
--> 982 pool = ConnectionPool.open(
983 profile,
984 user_agent=self._user_agent,
985 init_size=self._init_size,
986 max_size=self._max_size,
987 max_age=self._max_age,
988 on_broken=self._on_broken)
989 self._pools[profile] = pool
File ~/.local/lib/python3.8/site-packages/py2neo/client/__init__.py:649, in ConnectionPool.open(cls, profile, user_agent, init_size, max_size, max_age, on_broken)
627 """ Create a new connection pool, with an option to seed one
628 or more initial connections.
629
(...)
646 scheme
647 """
648 pool = cls(profile, user_agent, max_size, max_age, on_broken)
--> 649 seeds = [pool.acquire() for _ in range(init_size or cls.default_init_size)]
650 for seed in seeds:
651 seed.release()
File ~/.local/lib/python3.8/site-packages/py2neo/client/__init__.py:649, in <listcomp>(.0)
627 """ Create a new connection pool, with an option to seed one
628 or more initial connections.
629
(...)
646 scheme
647 """
648 pool = cls(profile, user_agent, max_size, max_age, on_broken)
--> 649 seeds = [pool.acquire() for _ in range(init_size or cls.default_init_size)]
650 for seed in seeds:
651 seed.release()
File ~/.local/lib/python3.8/site-packages/py2neo/client/__init__.py:813, in ConnectionPool.acquire(self, force_reset, can_overfill)
807 except IndexError:
808 if self._has_capacity() or can_overfill:
809 # Plan B: if the pool isn't full, open
810 # a new connection. This may raise a
811 # ConnectionUnavailable exception, which
812 # should bubble up to the caller.
--> 813 cx = self._connect()
814 if cx.supports_multi():
815 self._supports_multi = True
File ~/.local/lib/python3.8/site-packages/py2neo/client/__init__.py:764, in ConnectionPool._connect(self)
761 def _connect(self):
762 """ Open and return a new connection.
763 """
--> 764 cx = Connection.open(self.profile, user_agent=self.user_agent,
765 on_release=lambda c: self.release(c),
766 on_broken=lambda msg: self.__on_broken(msg))
767 self._server_agent = cx.server_agent
768 return cx
File ~/.local/lib/python3.8/site-packages/py2neo/client/__init__.py:174, in Connection.open(cls, profile, user_agent, on_release, on_broken)
172 if profile.protocol == "bolt":
173 from py2neo.client.bolt import Bolt
--> 174 return Bolt.open(profile, user_agent=user_agent,
175 on_release=on_release, on_broken=on_broken)
176 elif profile.protocol == "http":
177 from py2neo.client.http import HTTP
File ~/.local/lib/python3.8/site-packages/py2neo/client/bolt.py:364, in Bolt.open(cls, profile, user_agent, on_release, on_broken)
362 return bolt
363 except (TypeError, WireError) as error:
--> 364 raise_from(ConnectionUnavailable("Cannot open connection to %r" % profile), error)
File <string>:3, in raise_from(value, from_value)
ConnectionUnavailable: Cannot open connection to ConnectionProfile('bolt+s://localhost:7687')
I have also tried variations on this fix as well, but had the same error:
ISSUE IN CONNECTING py2neo v4 to my neo4j server
I appreciate any help resolving this issue. Thanks!

I was able to resolve this with the following syntax:
graph = Graph('neo4j://localhost:7687', user="neo4j", password="999")
However, I am now having an issue with the following block:
empty_db_query = """
MATCH(n) DETACH
DELETE(n)
"""
tx = graph.begin(autocommit=True)
tx.evaluate(empty_db_query)
For the newer version of py2neo, the graph.begin argument takes readonly = F instead of autocommit = True, but in any case, I have this error now:
ServiceUnavailable Traceback (most recent call last)
/home/myname/Project1/graph_import.ipynb Cell 13' in <cell line: 6>()
1 empty_db_query = """
2 MATCH(n) DETACH
3 DELETE(n)
4 """
----> 6 tx = graph.begin(readonly=False)
7 tx.evaluate(empty_db_query)
File ~/.local/lib/python3.8/site-packages/py2neo/database.py:351, in Graph.begin(self, readonly)
340 def begin(self, readonly=False,
341 # after=None, metadata=None, timeout=None
342 ):
343 """ Begin a new :class:`~py2neo.Transaction`.
344
345 :param readonly: if :py:const:`True`, will begin a readonly
(...)
349 removed. Use the 'auto' method instead.*
350 """
--> 351 return Transaction(self, autocommit=False, readonly=readonly,
352 # after, metadata, timeout
353 )
File ~/.local/lib/python3.8/site-packages/py2neo/database.py:915, in Transaction.__init__(self, graph, autocommit, readonly)
913 self._ref = None
914 else:
--> 915 self._ref = self._connector.begin(self.graph.name, readonly=readonly,
916 # after, metadata, timeout
917 )
918 self._readonly = readonly
919 self._closed = False
File ~/.local/lib/python3.8/site-packages/py2neo/client/__init__.py:1357, in Connector.begin(self, graph_name, readonly)
1345 def begin(self, graph_name, readonly=False,
1346 # after=None, metadata=None, timeout=None
1347 ):
1348 """ Begin a new explicit transaction.
1349
1350 :param graph_name:
(...)
1355 :raises Failure: if the server signals a failure condition
1356 """
-> 1357 cx = self._acquire(graph_name)
1358 try:
1359 return cx.begin(graph_name, readonly=readonly,
1360 # after=after, metadata=metadata, timeout=timeout
1361 )
File ~/.local/lib/python3.8/site-packages/py2neo/client/__init__.py:1111, in Connector._acquire(self, graph_name, readonly)
1109 return self._acquire_ro(graph_name)
1110 else:
-> 1111 return self._acquire_rw(graph_name)
File ~/.local/lib/python3.8/site-packages/py2neo/client/__init__.py:1203, in Connector._acquire_rw(self, graph_name)
1199 # TODO: exit immediately if the server/cluster is in readonly mode
1201 while True:
-> 1203 ro_profiles, rw_profiles = self._get_profiles(graph_name, readonly=False)
1204 if rw_profiles:
1205 # There is at least one writer, so collect the pools
1206 # for those writers. In all implementations to date,
1207 # a Neo4j cluster will only ever contain at most one
1208 # writer (per database). But this algorithm should
1209 # still survive if that changes.
1210 pools = [pool for profile, pool in list(self._pools.items())
1211 if profile in rw_profiles]
File ~/.local/lib/python3.8/site-packages/py2neo/client/__init__.py:1016, in Connector._get_profiles(self, graph_name, readonly)
1014 rt.wait_until_updated()
1015 else:
-> 1016 self.refresh_routing_table(graph_name)
File ~/.local/lib/python3.8/site-packages/py2neo/client/__init__.py:1064, in Connector.refresh_routing_table(self, graph_name)
1062 cx.release()
1063 else:
-> 1064 raise ServiceUnavailable("Cannot connect to any known routers")
1065 finally:
1066 rt.set_not_updating()
ServiceUnavailable: Cannot connect to any known routers
Appreciate any help in resolving this. Thank you!

Related

Connection Refused Error on Colab when converting pandas series to numpy

I am doing a machine learning task. I keep getting a connection refused error on colab when I convert my pandas series to a numpy array. I have to restart runtime everytime because if this happens once then none of the cells seem to work anymore and give the same error. It works sometimes when the length of the series is small i guess? But most of the times it doesnt work and gives me this error.
X = train['finished_embeddings'].values
ConnectionRefusedError Traceback (most recent call last)
<ipython-input-21-fc36e96b715b> in <module>
----> 1 X = train['finished_embeddings'].values
12 frames
/usr/local/lib/python3.7/dist-packages/pyspark/pandas/generic.py in values(self)
637 """
638 warnings.warn("We recommend using `{}.to_numpy()` instead.".format(type(self).__name__))
--> 639 return self.to_numpy()
640
641 def to_csv(
/usr/local/lib/python3.7/dist-packages/pyspark/pandas/generic.py in to_numpy(self)
574 array(['a', 'b', 'a'], dtype=object)
575 """
--> 576 return self.to_pandas().values
577
578 #property
/usr/local/lib/python3.7/dist-packages/pyspark/pandas/series.py in to_pandas(self)
1540 Name: dogs, dtype: float64
1541 """
-> 1542 return self._to_internal_pandas().copy()
1543
1544 def to_list(self) -> List:
/usr/local/lib/python3.7/dist-packages/pyspark/pandas/series.py in _to_internal_pandas(self)
6287 This method is for internal use only.
6288 """
-> 6289 return self._psdf._internal.to_pandas_frame[self.name]
6290
6291 def __repr__(self) -> str_type:
/usr/local/lib/python3.7/dist-packages/pyspark/pandas/utils.py in wrapped_lazy_property(self)
578 def wrapped_lazy_property(self):
579 if not hasattr(self, attr_name):
--> 580 setattr(self, attr_name, fn(self))
581 return getattr(self, attr_name)
582
/usr/local/lib/python3.7/dist-packages/pyspark/pandas/internal.py in to_pandas_frame(self)
1049 """Return as pandas DataFrame."""
1050 sdf = self.to_internal_spark_frame
-> 1051 pdf = sdf.toPandas()
1052 if len(pdf) == 0 and len(sdf.schema) > 0:
1053 pdf = pdf.astype(
/usr/local/lib/python3.7/dist-packages/pyspark/sql/pandas/conversion.py in toPandas(self)
65 import pandas as pd
66
---> 67 timezone = self.sql_ctx._conf.sessionLocalTimeZone()
68
69 if self.sql_ctx._conf.arrowPySparkEnabled():
/usr/local/lib/python3.7/dist-packages/pyspark/sql/context.py in _conf(self)
107 def _conf(self):
108 """Accessor for the JVM SQL-specific configurations"""
--> 109 return self.sparkSession._jsparkSession.sessionState().conf()
110
111 #classmethod
/usr/local/lib/python3.7/dist-packages/py4j/java_gateway.py in __call__(self, *args)
1318 proto.END_COMMAND_PART
1319
-> 1320 answer = self.gateway_client.send_command(command)
1321 return_value = get_return_value(
1322 answer, self.gateway_client, self.target_id, self.name)
/usr/local/lib/python3.7/dist-packages/py4j/java_gateway.py in send_command(self, command, retry, binary)
1034 if `binary` is `True`.
1035 """
-> 1036 connection = self._get_connection()
1037 try:
1038 response = connection.send_command(command)
/usr/local/lib/python3.7/dist-packages/py4j/clientserver.py in _get_connection(self)
279
280 if connection is None or connection.socket is None:
--> 281 connection = self._create_new_connection()
282 return connection
283
/usr/local/lib/python3.7/dist-packages/py4j/clientserver.py in _create_new_connection(self)
286 self.java_parameters, self.python_parameters,
287 self.gateway_property, self)
--> 288 connection.connect_to_java_server()
289 self.set_thread_connection(connection)
290 return connection
/usr/local/lib/python3.7/dist-packages/py4j/clientserver.py in connect_to_java_server(self)
400 self.socket = self.ssl_context.wrap_socket(
401 self.socket, server_hostname=self.java_address)
--> 402 self.socket.connect((self.java_address, self.java_port))
403 self.stream = self.socket.makefile("rb")
404 self.is_connected = True
ConnectionRefusedError: [Errno 111] Connection refused

Mlflow "load_model" goes in deadlock

Trying to load a model from past run in mlflow, in jupyterlab, never finishes. After waiting for hours, interrupting the run throws the below state.
---------------------------------------------------------------------------
KeyboardInterrupt Traceback (most recent call last)
Input In [21], in <cell line: 2>()
1 logged_model = 'runs:/7f6932baef144fa69847ba11ef66f8e6/model/'
----> 2 loaded_model = mlflow.tensorflow.load_model(logged_model)
File ~/.conda/envs/tensorflow/lib/python3.8/site-packages/mlflow/tensorflow/__init__.py:397, in load_model(model_uri, dst_path)
360 def load_model(model_uri, dst_path=None):
361 """
362 Load an MLflow model that contains the TensorFlow flavor from the specified path.
363
(...)
395 for _, output_signature in signature_definition.outputs.items()]
396 """
--> 397 local_model_path = _download_artifact_from_uri(artifact_uri=model_uri, output_path=dst_path)
398 flavor_conf = _get_flavor_configuration(local_model_path, FLAVOR_NAME)
399 _add_code_from_conf_to_system_path(local_model_path, flavor_conf)
File ~/.conda/envs/tensorflow/lib/python3.8/site-packages/mlflow/tracking/artifact_utils.py:95, in _download_artifact_from_uri(artifact_uri, output_path)
92 parsed_uri = parsed_uri._replace(path=posixpath.dirname(parsed_uri.path))
93 root_uri = prefix + urllib.parse.urlunparse(parsed_uri)
---> 95 return get_artifact_repository(artifact_uri=root_uri).download_artifacts(
96 artifact_path=artifact_path, dst_path=output_path
97 )
File ~/.conda/envs/tensorflow/lib/python3.8/site-packages/mlflow/store/artifact/runs_artifact_repo.py:125, in RunsArtifactRepository.download_artifacts(self, artifact_path, dst_path)
110 def download_artifacts(self, artifact_path, dst_path=None):
111 """
112 Download an artifact file or directory to a local directory if applicable, and return a
113 local path for it.
(...)
123 :return: Absolute path of the local filesystem location containing the desired artifacts.
124 """
--> 125 return self.repo.download_artifacts(artifact_path, dst_path)
File ~/.conda/envs/tensorflow/lib/python3.8/site-packages/mlflow/store/artifact/artifact_repo.py:242, in ArtifactRepository.download_artifacts(self, artifact_path, dst_path)
240 # Check if the artifacts points to a directory
241 if self._is_directory(artifact_path):
--> 242 dst_local_path, inflight_downloads = async_download_artifact_dir(
243 src_artifact_dir_path=artifact_path, dst_local_dir_path=dst_path
244 )
245 else:
246 inflight_downloads = async_download_artifact(
247 src_artifact_path=artifact_path, dst_local_dir_path=dst_path
248 )
File ~/.conda/envs/tensorflow/lib/python3.8/site-packages/mlflow/store/artifact/artifact_repo.py:208, in ArtifactRepository.download_artifacts.<locals>.async_download_artifact_dir(src_artifact_dir_path, dst_local_dir_path)
206 for file_info in dir_content:
207 if file_info.is_dir:
--> 208 inflight_downloads += async_download_artifact_dir(
209 src_artifact_dir_path=file_info.path,
210 dst_local_dir_path=dst_local_dir_path,
211 )[2]
212 else:
213 inflight_downloads += async_download_artifact(
214 src_artifact_path=file_info.path,
215 dst_local_dir_path=dst_local_dir_path,
216 )
File ~/.conda/envs/tensorflow/lib/python3.8/site-packages/mlflow/store/artifact/artifact_repo.py:199, in ArtifactRepository.download_artifacts.<locals>.async_download_artifact_dir(src_artifact_dir_path, dst_local_dir_path)
195 local_dir = os.path.join(dst_local_dir_path, src_artifact_dir_path)
196 inflight_downloads = []
197 dir_content = [ # prevent infinite loop, sometimes the dir is recursively included
198 file_info
--> 199 for file_info in self.list_artifacts(src_artifact_dir_path)
200 if file_info.path != "." and file_info.path != src_artifact_dir_path
201 ]
202 if not dir_content: # empty dir
203 if not os.path.exists(local_dir):
File ~/.conda/envs/tensorflow/lib/python3.8/site-packages/mlflow/store/artifact/sftp_artifact_repo.py:94, in SFTPArtifactRepository.list_artifacts(self, path)
92 artifact_dir = self.path
93 list_dir = posixpath.join(artifact_dir, path) if path else artifact_dir
---> 94 if not self.sftp.isdir(list_dir):
95 return []
96 artifact_files = self.sftp.listdir(list_dir)
File ~/.conda/envs/tensorflow/lib/python3.8/site-packages/pysftp/__init__.py:652, in Connection.isdir(self, remotepath)
650 self._sftp_connect()
651 try:
--> 652 result = S_ISDIR(self._sftp.stat(remotepath).st_mode)
653 except IOError: # no such file
654 result = False
File ~/.conda/envs/tensorflow/lib/python3.8/site-packages/paramiko/sftp_client.py:493, in SFTPClient.stat(self, path)
491 path = self._adjust_cwd(path)
492 self._log(DEBUG, "stat({!r})".format(path))
--> 493 t, msg = self._request(CMD_STAT, path)
494 if t != CMD_ATTRS:
495 raise SFTPError("Expected attributes")
File ~/.conda/envs/tensorflow/lib/python3.8/site-packages/paramiko/sftp_client.py:822, in SFTPClient._request(self, t, *arg)
820 def _request(self, t, *arg):
821 num = self._async_request(type(None), t, *arg)
--> 822 return self._read_response(num)
File ~/.conda/envs/tensorflow/lib/python3.8/site-packages/paramiko/sftp_client.py:852, in SFTPClient._read_response(self, waitfor)
850 while True:
851 try:
--> 852 t, data = self._read_packet()
853 except EOFError as e:
854 raise SSHException("Server connection dropped: {}".format(e))
File ~/.conda/envs/tensorflow/lib/python3.8/site-packages/paramiko/sftp.py:201, in BaseSFTP._read_packet(self)
200 def _read_packet(self):
--> 201 x = self._read_all(4)
202 # most sftp servers won't accept packets larger than about 32k, so
203 # anything with the high byte set (> 16MB) is just garbage.
204 if byte_ord(x[0]):
File ~/.conda/envs/tensorflow/lib/python3.8/site-packages/paramiko/sftp.py:185, in BaseSFTP._read_all(self, n)
183 break
184 else:
--> 185 x = self.sock.recv(n)
187 if len(x) == 0:
188 raise EOFError()
File ~/.conda/envs/tensorflow/lib/python3.8/site-packages/paramiko/channel.py:699, in Channel.recv(self, nbytes)
686 """
687 Receive data from the channel. The return value is a string
688 representing the data received. The maximum amount of data to be
(...)
696 if no data is ready before the timeout set by `settimeout`.
697 """
698 try:
--> 699 out = self.in_buffer.read(nbytes, self.timeout)
700 except PipeTimeout:
701 raise socket.timeout()
File ~/.conda/envs/tensorflow/lib/python3.8/site-packages/paramiko/buffered_pipe.py:160, in BufferedPipe.read(self, nbytes, timeout)
158 while (len(self._buffer) == 0) and not self._closed:
159 then = time.time()
--> 160 self._cv.wait(timeout)
161 if timeout is not None:
162 timeout -= time.time() - then
File ~/.conda/envs/tensorflow/lib/python3.8/threading.py:302, in Condition.wait(self, timeout)
300 try: # restore state no matter what (e.g., KeyboardInterrupt)
301 if timeout is None:
--> 302 waiter.acquire()
303 gotit = True
304 else:
KeyboardInterrupt:
The mlflow tracking server is working properly for all the other operations. I am able to log params, metrics and artifacts. But I am not able to load a model or retrive any of the artifacts.
Update:
Looks like a bug as per https://github.com/mlflow/mlflow/issues/5656.
Please upgrade mlflow, their is some issue with version 1.26.0
pip install mlflow==1.27.0
Assuming you are also using above version

Scattering data to dask cluster workers: unknown address scheme 'gateway'

I am following the code found on the accepted answer to this SO question (the "Chunk then scatter" part) and I get a strange error while trying to scatter a pandas.DataFrame to the workers.
I am working in jupyter notebook if that matters.
I am not sure what this error means, it's quite cryptic, so any help would be greatly appreciated.
from dask_gateway import Gateway
import dask.dataframe as dd
import dask
gateway = Gateway()
options = gateway.cluster_options()
cluster = gateway.new_cluster(cluster_options=options)
cluster.scale(10)
client = cluster.get_client()
X_train = ... # build pandas.DataFrame
x = dd.from_pandas(X_train, npartitions=10)
x = x.persist(get=dask.threaded.get) # chunk locally
futures = client.scatter(dict(x.dask)) # scatter chunks
x.dask = x
---------------------------------------------------------------------------
Exception Traceback (most recent call last)
/tmp/ipykernel_567/3586545525.py in <module>
1 x = dd.from_pandas(X_train, npartitions=10)
2 x = x.persist(get=dask.threaded.get) # chunk locally
----> 3 futures = client.scatter(dict(x.dask)) # scatter chunks
4 x.dask = x
/srv/conda/envs/notebook/lib/python3.9/site-packages/distributed/client.py in scatter(self, data, workers, broadcast, direct, hash, timeout, asynchronous)
2182 else:
2183 local_worker = None
-> 2184 return self.sync(
2185 self._scatter,
2186 data,
/srv/conda/envs/notebook/lib/python3.9/site-packages/distributed/client.py in sync(self, func, asynchronous, callback_timeout, *args, **kwargs)
866 return future
867 else:
--> 868 return sync(
869 self.loop, func, *args, callback_timeout=callback_timeout, **kwargs
870 )
/srv/conda/envs/notebook/lib/python3.9/site-packages/distributed/utils.py in sync(loop, func, callback_timeout, *args, **kwargs)
330 if error[0]:
331 typ, exc, tb = error[0]
--> 332 raise exc.with_traceback(tb)
333 else:
334 return result[0]
/srv/conda/envs/notebook/lib/python3.9/site-packages/distributed/utils.py in f()
313 if callback_timeout is not None:
314 future = asyncio.wait_for(future, callback_timeout)
--> 315 result[0] = yield future
316 except Exception:
317 error[0] = sys.exc_info()
/srv/conda/envs/notebook/lib/python3.9/site-packages/tornado/gen.py in run(self)
760
761 try:
--> 762 value = future.result()
763 except Exception:
764 exc_info = sys.exc_info()
/srv/conda/envs/notebook/lib/python3.9/site-packages/distributed/client.py in _scatter(self, data, workers, broadcast, direct, local_worker, timeout, hash)
2004 isinstance(k, (bytes, str)) for k in data
2005 ):
-> 2006 d = await self._scatter(keymap(stringify, data), workers, broadcast)
2007 return {k: d[stringify(k)] for k in data}
2008
/srv/conda/envs/notebook/lib/python3.9/site-packages/distributed/client.py in _scatter(self, data, workers, broadcast, direct, local_worker, timeout, hash)
2073 )
2074 else:
-> 2075 await self.scheduler.scatter(
2076 data=data2,
2077 workers=workers,
/srv/conda/envs/notebook/lib/python3.9/site-packages/distributed/core.py in send_recv_from_rpc(**kwargs)
893 name, comm.name = comm.name, "ConnectionPool." + key
894 try:
--> 895 result = await send_recv(comm=comm, op=key, **kwargs)
896 finally:
897 self.pool.reuse(self.addr, comm)
/srv/conda/envs/notebook/lib/python3.9/site-packages/distributed/core.py in send_recv(comm, reply, serializers, deserializers, **kwargs)
686 if comm.deserialize:
687 typ, exc, tb = clean_exception(**response)
--> 688 raise exc.with_traceback(tb)
689 else:
690 raise Exception(response["exception_text"])
/srv/conda/envs/notebook/lib/python3.9/site-packages/distributed/core.py in handle_comm()
528 result = asyncio.ensure_future(result)
529 self._ongoing_coroutines.add(result)
--> 530 result = await result
531 except (CommClosedError, CancelledError):
532 if self.status in (Status.running, Status.paused):
/srv/conda/envs/notebook/lib/python3.9/site-packages/distributed/scheduler.py in scatter()
5795 assert isinstance(data, dict)
5796
-> 5797 keys, who_has, nbytes = await scatter_to_workers(
5798 nthreads, data, rpc=self.rpc, report=False
5799 )
/srv/conda/envs/notebook/lib/python3.9/site-packages/distributed/utils_comm.py in scatter_to_workers()
143 rpcs = {addr: rpc(addr) for addr in d}
144 try:
--> 145 out = await All(
146 [
147 rpcs[address].update_data(
/srv/conda/envs/notebook/lib/python3.9/site-packages/distributed/utils.py in All()
214 while not tasks.done():
215 try:
--> 216 result = await tasks.next()
217 except Exception:
218
/srv/conda/envs/notebook/lib/python3.9/site-packages/distributed/core.py in send_recv_from_rpc()
893 name, comm.name = comm.name, "ConnectionPool." + key
894 try:
--> 895 result = await send_recv(comm=comm, op=key, **kwargs)
896 finally:
897 self.pool.reuse(self.addr, comm)
/srv/conda/envs/notebook/lib/python3.9/site-packages/distributed/core.py in send_recv()
688 raise exc.with_traceback(tb)
689 else:
--> 690 raise Exception(response["exception_text"])
691 return response
692
Exception: ValueError("unknown address scheme 'gateway' (known schemes: ['inproc', 'tcp', 'tls', 'ucx', 'ws', 'wss'])")
dd.from_pandas() does this "partitioning-then-scattering" internally, so you don't have to do it manually anymore. You can directly use the Dask DataFrame API on x, and the compute should automatically work on your cluster. :)
The answer you've linked is from 5 years ago, which is now outdated because Dask has matured a lot since. For instance, x.dask now refers to a "high level graph" (recently added feature) instead of a low-level graph. Dask Gateway uses its own URL scheme, and I'm guessing it's not able to interface with this older Dask syntax properly.
Also, note that mixing schedulers (as done in that answer) isn't recommended anymore.

Snowflake InterfaceError: Failed to convert current row, cause: int too big to convert

I am trying to built a utility script which pulls data from Snowflake into a dataframe within a local jupyter notebook. When I run it, I keep getting this error:
InterfaceError: (snowflake.connector.errors.InterfaceError) 252005: Failed to convert current row, cause: int too big to convert
Can someone guide me what I am doing wrong here? Any help is highly appreciated.
Code:
def read_sql (path, file):
"""Reads SQL file from local env and returns query as string"""
with open(os.path.join(path, file)) as sql_file:
sql = sql_file.read()
return sql
def db_con(sql,
user,
password,
warehouse):
"""Setup db connection, runs query and returns pandas df"""
engine = create_engine(
URL(account='123,
user,
password,
database,
schema,
warehouse,
numpy=True,),
poolclass=pool.StaticPool
)
try:
connection = engine.connect()
connection.execute('Use Warehouse {warehouse};'.format(warehouse=warehouse))
df = pd.read_sql(sql,engine)
finally:
connection.close()
engine.dispose()
return df
When I call this function, I get that error:
#Get Data
sql = '''select * from "d1"."public"."some_table" limit 1 '''
df = db_con(sql,
USERNAME,
PASSWORD,
WAREHOUSE,
DATABASE)
df.head()
Error:
Initialize Query
---------------------------------------------------------------------------
InterfaceError Traceback (most recent call last)
/opt/anaconda3/envs/modeling/lib/python3.8/site-packages/sqlalchemy/engine/cursor.py in fetchall(self, result, dbapi_cursor)
972 try:
--> 973 rows = dbapi_cursor.fetchall()
974 result._soft_close()
/opt/anaconda3/envs/modeling/lib/python3.8/site-packages/snowflake/connector/cursor.py in fetchall(self)
990 while True:
--> 991 row = self.fetchone()
992 if row is None:
/opt/anaconda3/envs/modeling/lib/python3.8/site-packages/snowflake/connector/cursor.py in fetchone(self)
956 try:
--> 957 return next(self._result)
958 except StopIteration:
src/snowflake/connector/arrow_result.pyx in snowflake.connector.arrow_result.ArrowResult.__next__()
src/snowflake/connector/arrow_iterator.pyx in snowflake.connector.arrow_iterator.PyArrowIterator.__next__()
/opt/anaconda3/envs/modeling/lib/python3.8/site-packages/snowflake/connector/errors.py in errorhandler_wrapper(connection, cursor, error_class, error_value)
257 cursor.messages.append((error_class, error_value))
--> 258 cursor.errorhandler(connection, cursor, error_class, error_value)
259 return
/opt/anaconda3/envs/modeling/lib/python3.8/site-packages/snowflake/connector/errors.py in default_errorhandler(connection, cursor, error_class, error_value)
187 """
--> 188 raise error_class(
189 msg=error_value.get("msg"),
InterfaceError: 252005: Failed to convert current row, cause: int too big to convert
The above exception was the direct cause of the following exception:
InterfaceError Traceback (most recent call last)
<ipython-input-48-373f7ee62b3f> in <module>
2 sql = '''select * from "PRODUCTION"."SFDC"."CONTRACT" limit 1 '''
3
----> 4 df = db_con(sql,
5 USERNAME,
6 PASSWORD,
<ipython-input-45-0f30a2664209> in db_con(sql, user, password, warehouse, database, schema)
29 print('Initialize Query')
30 connection.execute('Use Warehouse {warehouse};'.format(warehouse=warehouse))
---> 31 df = pd.read_sql(sql,engine)
32 finally:
33 connection.close()
/opt/anaconda3/envs/modeling/lib/python3.8/site-packages/pandas/io/sql.py in read_sql(sql, con, index_col, coerce_float, params, parse_dates, columns, chunksize)
519 )
520 else:
--> 521 return pandas_sql.read_query(
522 sql,
523 index_col=index_col,
/opt/anaconda3/envs/modeling/lib/python3.8/site-packages/pandas/io/sql.py in read_query(self, sql, index_col, coerce_float, parse_dates, params, chunksize)
1319 )
1320 else:
-> 1321 data = result.fetchall()
1322 frame = _wrap_result(
1323 data,
/opt/anaconda3/envs/modeling/lib/python3.8/site-packages/sqlalchemy/engine/result.py in fetchall(self)
996 """A synonym for the :meth:`_engine.Result.all` method."""
997
--> 998 return self._allrows()
999
1000 def fetchone(self):
/opt/anaconda3/envs/modeling/lib/python3.8/site-packages/sqlalchemy/engine/result.py in _allrows(self)
406 make_row = self._row_getter
407
--> 408 rows = self._fetchall_impl()
409 if make_row:
410 made_rows = [make_row(row) for row in rows]
/opt/anaconda3/envs/modeling/lib/python3.8/site-packages/sqlalchemy/engine/cursor.py in _fetchall_impl(self)
1772
1773 def _fetchall_impl(self):
-> 1774 return self.cursor_strategy.fetchall(self, self.cursor)
1775
1776 def _fetchmany_impl(self, size=None):
/opt/anaconda3/envs/modeling/lib/python3.8/site-packages/sqlalchemy/engine/cursor.py in fetchall(self, result, dbapi_cursor)
975 return rows
976 except BaseException as e:
--> 977 self.handle_exception(result, dbapi_cursor, e)
978
979
/opt/anaconda3/envs/modeling/lib/python3.8/site-packages/sqlalchemy/engine/cursor.py in handle_exception(self, result, dbapi_cursor, err)
935
936 def handle_exception(self, result, dbapi_cursor, err):
--> 937 result.connection._handle_dbapi_exception(
938 err, None, None, dbapi_cursor, result.context
939 )
/opt/anaconda3/envs/modeling/lib/python3.8/site-packages/sqlalchemy/engine/base.py in _handle_dbapi_exception(self, e, statement, parameters, cursor, context)
1927 util.raise_(newraise, with_traceback=exc_info[2], from_=e)
1928 elif should_wrap:
-> 1929 util.raise_(
1930 sqlalchemy_exception, with_traceback=exc_info[2], from_=e
1931 )
/opt/anaconda3/envs/modeling/lib/python3.8/site-packages/sqlalchemy/util/compat.py in raise_(***failed resolving arguments***)
209
210 try:
--> 211 raise exception
212 finally:
213 # credit to
/opt/anaconda3/envs/modeling/lib/python3.8/site-packages/sqlalchemy/engine/cursor.py in fetchall(self, result, dbapi_cursor)
971 def fetchall(self, result, dbapi_cursor):
972 try:
--> 973 rows = dbapi_cursor.fetchall()
974 result._soft_close()
975 return rows
/opt/anaconda3/envs/modeling/lib/python3.8/site-packages/snowflake/connector/cursor.py in fetchall(self)
989 ret = []
990 while True:
--> 991 row = self.fetchone()
992 if row is None:
993 break
/opt/anaconda3/envs/modeling/lib/python3.8/site-packages/snowflake/connector/cursor.py in fetchone(self)
955 self._prefetch_hook()
956 try:
--> 957 return next(self._result)
958 except StopIteration:
959 return None
src/snowflake/connector/arrow_result.pyx in snowflake.connector.arrow_result.ArrowResult.__next__()
src/snowflake/connector/arrow_iterator.pyx in snowflake.connector.arrow_iterator.PyArrowIterator.__next__()
/opt/anaconda3/envs/modeling/lib/python3.8/site-packages/snowflake/connector/errors.py in errorhandler_wrapper(connection, cursor, error_class, error_value)
256 if cursor is not None:
257 cursor.messages.append((error_class, error_value))
--> 258 cursor.errorhandler(connection, cursor, error_class, error_value)
259 return
260 elif connection is not None:
/opt/anaconda3/envs/modeling/lib/python3.8/site-packages/snowflake/connector/errors.py in default_errorhandler(connection, cursor, error_class, error_value)
186 A Snowflake error.
187 """
--> 188 raise error_class(
189 msg=error_value.get("msg"),
190 errno=error_value.get("errno"),
InterfaceError: (snowflake.connector.errors.InterfaceError) 252005: Failed to convert current row, cause: int too big to convert
(Background on this error at: http://sqlalche.me/e/14/rvf5)

(Memory) Problem when accessing Dask type arrays

I need to load some meteorological data to analyze several months but such data is stored in files that cover only one day so I need to acces many files at once.
I am following some pre-given instruction that told me to create a memory partition in my computer.
from datetime import datetime, timedelta
import dask.array as da
from dask.distributed import Client, LocalCluster
import xarray
try:
client
except NameError:
client = Client(n_workers=1, threads_per_worker=4, memory_limit='2GB')
else:
print("Client already exists")
After this, I create an array dates that goes from 1st June to 1st October and that is need in "files" to get the link to the meteorological data.
dates=[datetime(2019,6,1) + timedelta(days=i) for i in range(3*30)]
files= [date.strftime('http://mandeo.meteogalicia.es/thredds/dodsC/modelos/WRF_HIST/d03/%Y/%m/wrf_arw_det_history_d03_%Y%m%d_0000.nc4') for date in dates]
My issue starts when I try to unzip all that data as
multi = xarray.open_mfdataset(files, preprocess= lambda a : a.isel(time=slice(0,24)))
It raises the error:
KeyError Traceback (most recent call last)
~\Nueva carpeta\lib\site-packages\xarray\backends\file_manager.py in _acquire_with_cache_info(self, needs_lock)
197 try:
--> 198 file = self._cache[self._key]
199 except KeyError:
~\Nueva carpeta\lib\site-packages\xarray\backends\lru_cache.py in __getitem__(self, key)
52 with self._lock:
---> 53 value = self._cache[key]
54 self._cache.move_to_end(key)
KeyError: [<class 'netCDF4._netCDF4.Dataset'>, ('http://mandeo.meteogalicia.es/thredds/dodsC/modelos/WRF_HIST/d03/2019/06/wrf_arw_det_history_d03_20190626_0000.nc4',), 'r', (('clobber', True), ('diskless', False), ('format', 'NETCDF4'), ('persist', False))]
During handling of the above exception, another exception occurred:
OSError Traceback (most recent call last)
<ipython-input-19-c3d0f4a8cc26> in <module>
----> 1 multi = xarray.open_mfdataset(files, preprocess= lambda a : a.isel(time=slice(0,24)))
~\Nueva carpeta\lib\site-packages\xarray\backends\api.py in open_mfdataset(paths, chunks, concat_dim, compat, preprocess, engine, lock, data_vars, coords, combine, autoclose, parallel, join, attrs_file, **kwargs)
916 getattr_ = getattr
917
--> 918 datasets = [open_(p, **open_kwargs) for p in paths]
919 file_objs = [getattr_(ds, "_file_obj") for ds in datasets]
920 if preprocess is not None:
~\Nueva carpeta\lib\site-packages\xarray\backends\api.py in <listcomp>(.0)
916 getattr_ = getattr
917
--> 918 datasets = [open_(p, **open_kwargs) for p in paths]
919 file_objs = [getattr_(ds, "_file_obj") for ds in datasets]
920 if preprocess is not None:
~\Nueva carpeta\lib\site-packages\xarray\backends\api.py in open_dataset(filename_or_obj, group, decode_cf, mask_and_scale, decode_times, autoclose, concat_characters, decode_coords, engine, chunks, lock, cache, drop_variables, backend_kwargs, use_cftime, decode_timedelta)
507 if engine == "netcdf4":
508 store = backends.NetCDF4DataStore.open(
--> 509 filename_or_obj, group=group, lock=lock, **backend_kwargs
510 )
511 elif engine == "scipy":
~\Nueva carpeta\lib\site-packages\xarray\backends\netCDF4_.py in open(cls, filename, mode, format, group, clobber, diskless, persist, lock, lock_maker, autoclose)
356 netCDF4.Dataset, filename, mode=mode, kwargs=kwargs
357 )
--> 358 return cls(manager, group=group, mode=mode, lock=lock, autoclose=autoclose)
359
360 def _acquire(self, needs_lock=True):
~\Nueva carpeta\lib\site-packages\xarray\backends\netCDF4_.py in __init__(self, manager, group, mode, lock, autoclose)
312 self._group = group
313 self._mode = mode
--> 314 self.format = self.ds.data_model
315 self._filename = self.ds.filepath()
316 self.is_remote = is_remote_uri(self._filename)
~\Nueva carpeta\lib\site-packages\xarray\backends\netCDF4_.py in ds(self)
365 #property
366 def ds(self):
--> 367 return self._acquire()
368
369 def open_store_variable(self, name, var):
~\Nueva carpeta\lib\site-packages\xarray\backends\netCDF4_.py in _acquire(self, needs_lock)
359
360 def _acquire(self, needs_lock=True):
--> 361 with self._manager.acquire_context(needs_lock) as root:
362 ds = _nc4_require_group(root, self._group, self._mode)
363 return ds
~\Nueva carpeta\lib\contextlib.py in __enter__(self)
110 del self.args, self.kwds, self.func
111 try:
--> 112 return next(self.gen)
113 except StopIteration:
114 raise RuntimeError("generator didn't yield") from None
~\Nueva carpeta\lib\site-packages\xarray\backends\file_manager.py in acquire_context(self, needs_lock)
184 def acquire_context(self, needs_lock=True):
185 """Context manager for acquiring a file."""
--> 186 file, cached = self._acquire_with_cache_info(needs_lock)
187 try:
188 yield file
~\Nueva carpeta\lib\site-packages\xarray\backends\file_manager.py in _acquire_with_cache_info(self, needs_lock)
202 kwargs = kwargs.copy()
203 kwargs["mode"] = self._mode
--> 204 file = self._opener(*self._args, **kwargs)
205 if self._mode == "w":
206 # ensure file doesn't get overriden when opened again
netCDF4\_netCDF4.pyx in netCDF4._netCDF4.Dataset.__init__()
netCDF4\_netCDF4.pyx in netCDF4._netCDF4._ensure_nc_success()
OSError: [Errno -37] NetCDF: Write to read only: b'http://mandeo.meteogalicia.es/thredds/dodsC/modelos/WRF_HIST/d03/2019/06/wrf_arw_det_history_d03_20190626_0000.nc4'
Does anyone know why this error occurs?

Categories