Connection Refused Error on Colab when converting pandas series to numpy - python

I am doing a machine learning task. I keep getting a connection refused error on colab when I convert my pandas series to a numpy array. I have to restart runtime everytime because if this happens once then none of the cells seem to work anymore and give the same error. It works sometimes when the length of the series is small i guess? But most of the times it doesnt work and gives me this error.
X = train['finished_embeddings'].values
ConnectionRefusedError Traceback (most recent call last)
<ipython-input-21-fc36e96b715b> in <module>
----> 1 X = train['finished_embeddings'].values
12 frames
/usr/local/lib/python3.7/dist-packages/pyspark/pandas/generic.py in values(self)
637 """
638 warnings.warn("We recommend using `{}.to_numpy()` instead.".format(type(self).__name__))
--> 639 return self.to_numpy()
640
641 def to_csv(
/usr/local/lib/python3.7/dist-packages/pyspark/pandas/generic.py in to_numpy(self)
574 array(['a', 'b', 'a'], dtype=object)
575 """
--> 576 return self.to_pandas().values
577
578 #property
/usr/local/lib/python3.7/dist-packages/pyspark/pandas/series.py in to_pandas(self)
1540 Name: dogs, dtype: float64
1541 """
-> 1542 return self._to_internal_pandas().copy()
1543
1544 def to_list(self) -> List:
/usr/local/lib/python3.7/dist-packages/pyspark/pandas/series.py in _to_internal_pandas(self)
6287 This method is for internal use only.
6288 """
-> 6289 return self._psdf._internal.to_pandas_frame[self.name]
6290
6291 def __repr__(self) -> str_type:
/usr/local/lib/python3.7/dist-packages/pyspark/pandas/utils.py in wrapped_lazy_property(self)
578 def wrapped_lazy_property(self):
579 if not hasattr(self, attr_name):
--> 580 setattr(self, attr_name, fn(self))
581 return getattr(self, attr_name)
582
/usr/local/lib/python3.7/dist-packages/pyspark/pandas/internal.py in to_pandas_frame(self)
1049 """Return as pandas DataFrame."""
1050 sdf = self.to_internal_spark_frame
-> 1051 pdf = sdf.toPandas()
1052 if len(pdf) == 0 and len(sdf.schema) > 0:
1053 pdf = pdf.astype(
/usr/local/lib/python3.7/dist-packages/pyspark/sql/pandas/conversion.py in toPandas(self)
65 import pandas as pd
66
---> 67 timezone = self.sql_ctx._conf.sessionLocalTimeZone()
68
69 if self.sql_ctx._conf.arrowPySparkEnabled():
/usr/local/lib/python3.7/dist-packages/pyspark/sql/context.py in _conf(self)
107 def _conf(self):
108 """Accessor for the JVM SQL-specific configurations"""
--> 109 return self.sparkSession._jsparkSession.sessionState().conf()
110
111 #classmethod
/usr/local/lib/python3.7/dist-packages/py4j/java_gateway.py in __call__(self, *args)
1318 proto.END_COMMAND_PART
1319
-> 1320 answer = self.gateway_client.send_command(command)
1321 return_value = get_return_value(
1322 answer, self.gateway_client, self.target_id, self.name)
/usr/local/lib/python3.7/dist-packages/py4j/java_gateway.py in send_command(self, command, retry, binary)
1034 if `binary` is `True`.
1035 """
-> 1036 connection = self._get_connection()
1037 try:
1038 response = connection.send_command(command)
/usr/local/lib/python3.7/dist-packages/py4j/clientserver.py in _get_connection(self)
279
280 if connection is None or connection.socket is None:
--> 281 connection = self._create_new_connection()
282 return connection
283
/usr/local/lib/python3.7/dist-packages/py4j/clientserver.py in _create_new_connection(self)
286 self.java_parameters, self.python_parameters,
287 self.gateway_property, self)
--> 288 connection.connect_to_java_server()
289 self.set_thread_connection(connection)
290 return connection
/usr/local/lib/python3.7/dist-packages/py4j/clientserver.py in connect_to_java_server(self)
400 self.socket = self.ssl_context.wrap_socket(
401 self.socket, server_hostname=self.java_address)
--> 402 self.socket.connect((self.java_address, self.java_port))
403 self.stream = self.socket.makefile("rb")
404 self.is_connected = True
ConnectionRefusedError: [Errno 111] Connection refused

Related

FSSpec Error Handling in Python - Timeout Error

I am trying to get Terraclimate Data from Microsoft Planetary and facing time out error. Is there a possiblity of increasing the timeout time ? Please find the code below and the error I am facing. I am using fsspec and xarray for downloading spatial data from MS Planetary portal.
import fsspec
import xarray as xr
store = fsspec.get_mapper(asset.href)
data = xr.open_zarr(store, **asset.extra_fields["xarray:open_kwargs"])
clipped_data = data.sel(time=slice('2015-01-01','2019-12-31'),lon=slice(min_lon,max_lon),lat=slice(max_lat,min_lat))
parsed_data = clipped_data[['tmax', 'tmin', 'ppt', 'soil']]
lat_list = parsed_data['lat'].values.tolist()
lon_list = parsed_data['lon'].values.tolist()
filename = "Soil_Moisture_sample.csv"
for(i,j) in zip(lat_list,lon_list):
parsed_data[["soil","tmax","tmin","ppt"]].sel(lon=i, lat=j, method="nearest").to_dataframe().to_csv(filename,mode='a',index=False, header=False)
I am getting the following error
TimeoutError Traceback (most recent call last)
File ~\Anaconda3\envs\satellite\lib\site-packages\fsspec\asyn.py:53, in _runner(event, coro, result, timeout)
52 try:
---> 53 result[0] = await coro
54 except Exception as ex:
File ~\Anaconda3\envs\satellite\lib\site-packages\fsspec\asyn.py:423, in AsyncFileSystem._cat(self, path, recursive, on_error, batch_size, **kwargs)
422 if ex:
--> 423 raise ex
424 if (
425 len(paths) > 1
426 or isinstance(path, list)
427 or paths[0] != self._strip_protocol(path)
428 ):
File ~\Anaconda3\envs\satellite\lib\asyncio\tasks.py:455, in wait_for(fut, timeout, loop)
454 if timeout is None:
--> 455 return await fut
457 if timeout <= 0:
File ~\Anaconda3\envs\satellite\lib\site-packages\fsspec\implementations\http.py:221, in HTTPFileSystem._cat_file(self, url, start, end, **kwargs)
220 async with session.get(url, **kw) as r:
--> 221 out = await r.read()
222 self._raise_not_found_for_status(r, url)
File ~\Anaconda3\envs\satellite\lib\site-packages\aiohttp\client_reqrep.py:1036, in ClientResponse.read(self)
1035 try:
-> 1036 self._body = await self.content.read()
1037 for trace in self._traces:
File ~\Anaconda3\envs\satellite\lib\site-packages\aiohttp\streams.py:375, in StreamReader.read(self, n)
374 while True:
--> 375 block = await self.readany()
376 if not block:
File ~\Anaconda3\envs\satellite\lib\site-packages\aiohttp\streams.py:397, in StreamReader.readany(self)
396 while not self._buffer and not self._eof:
--> 397 await self._wait("readany")
399 return self._read_nowait(-1)
File ~\Anaconda3\envs\satellite\lib\site-packages\aiohttp\streams.py:304, in StreamReader._wait(self, func_name)
303 with self._timer:
--> 304 await waiter
305 else:
File ~\Anaconda3\envs\satellite\lib\site-packages\aiohttp\helpers.py:721, in TimerContext.__exit__(self, exc_type, exc_val, exc_tb)
720 if exc_type is asyncio.CancelledError and self._cancelled:
--> 721 raise asyncio.TimeoutError from None
722 return None
TimeoutError:
The above exception was the direct cause of the following exception:
FSTimeoutError Traceback (most recent call last)
Input In [62], in <cell line: 3>()
1 # Flood Region Point - Thiruvanthpuram
2 filename = "Soil_Moisture_sample.csv"
----> 3 parsed_data[["soil","tmax","tmin","ppt"]].sel(lon=8.520833, lat=76.4375, method="nearest").to_dataframe().to_csv(filename,mode='a',index=False, header=False)
File ~\Anaconda3\envs\satellite\lib\site-packages\xarray\core\dataset.py:5898, in Dataset.to_dataframe(self, dim_order)
5870 """Convert this dataset into a pandas.DataFrame.
5871
5872 Non-index variables in this dataset form the columns of the
(...)
5893
5894 """
5896 ordered_dims = self._normalize_dim_order(dim_order=dim_order)
-> 5898 return self._to_dataframe(ordered_dims=ordered_dims)
File ~\Anaconda3\envs\satellite\lib\site-packages\xarray\core\dataset.py:5862, in Dataset._to_dataframe(self, ordered_dims)
5860 def _to_dataframe(self, ordered_dims: Mapping[Any, int]):
5861 columns = [k for k in self.variables if k not in self.dims]
-> 5862 data = [
5863 self._variables[k].set_dims(ordered_dims).values.reshape(-1)
5864 for k in columns
5865 ]
5866 index = self.coords.to_index([*ordered_dims])
5867 return pd.DataFrame(dict(zip(columns, data)), index=index)
File ~\Anaconda3\envs\satellite\lib\site-packages\xarray\core\dataset.py:5863, in <listcomp>(.0)
5860 def _to_dataframe(self, ordered_dims: Mapping[Any, int]):
5861 columns = [k for k in self.variables if k not in self.dims]
5862 data = [
-> 5863 self._variables[k].set_dims(ordered_dims).values.reshape(-1)
5864 for k in columns
5865 ]
5866 index = self.coords.to_index([*ordered_dims])
5867 return pd.DataFrame(dict(zip(columns, data)), index=index)
File ~\Anaconda3\envs\satellite\lib\site-packages\xarray\core\variable.py:527, in Variable.values(self)
524 #property
525 def values(self):
526 """The variable's data as a numpy.ndarray"""
--> 527 return _as_array_or_item(self._data)
File ~\Anaconda3\envs\satellite\lib\site-packages\xarray\core\variable.py:267, in _as_array_or_item(data)
253 def _as_array_or_item(data):
254 """Return the given values as a numpy array, or as an individual item if
255 it's a 0d datetime64 or timedelta64 array.
256
(...)
265 TODO: remove this (replace with np.asarray) once these issues are fixed
266 """
--> 267 data = np.asarray(data)
268 if data.ndim == 0:
269 if data.dtype.kind == "M":
File ~\AppData\Roaming\Python\Python38\site-packages\dask\array\core.py:1696, in Array.__array__(self, dtype, **kwargs)
1695 def __array__(self, dtype=None, **kwargs):
-> 1696 x = self.compute()
1697 if dtype and x.dtype != dtype:
1698 x = x.astype(dtype)
File ~\AppData\Roaming\Python\Python38\site-packages\dask\base.py:315, in DaskMethodsMixin.compute(self, **kwargs)
291 def compute(self, **kwargs):
292 """Compute this dask collection
293
294 This turns a lazy Dask collection into its in-memory equivalent.
(...)
313 dask.base.compute
314 """
--> 315 (result,) = compute(self, traverse=False, **kwargs)
316 return result
File ~\AppData\Roaming\Python\Python38\site-packages\dask\base.py:600, in compute(traverse, optimize_graph, scheduler, get, *args, **kwargs)
597 keys.append(x.__dask_keys__())
598 postcomputes.append(x.__dask_postcompute__())
--> 600 results = schedule(dsk, keys, **kwargs)
601 return repack([f(r, *a) for r, (f, a) in zip(results, postcomputes)])
File ~\AppData\Roaming\Python\Python38\site-packages\dask\threaded.py:89, in get(dsk, keys, cache, num_workers, pool, **kwargs)
86 elif isinstance(pool, multiprocessing.pool.Pool):
87 pool = MultiprocessingPoolExecutor(pool)
---> 89 results = get_async(
90 pool.submit,
91 pool._max_workers,
92 dsk,
93 keys,
94 cache=cache,
95 get_id=_thread_get_id,
96 pack_exception=pack_exception,
97 **kwargs,
98 )
100 # Cleanup pools associated to dead threads
101 with pools_lock:
File ~\AppData\Roaming\Python\Python38\site-packages\dask\local.py:511, in get_async(submit, num_workers, dsk, result, cache, get_id, rerun_exceptions_locally, pack_exception, raise_exception, callbacks, dumps, loads, chunksize, **kwargs)
509 _execute_task(task, data) # Re-execute locally
510 else:
--> 511 raise_exception(exc, tb)
512 res, worker_id = loads(res_info)
513 state["cache"][key] = res
File ~\AppData\Roaming\Python\Python38\site-packages\dask\local.py:319, in reraise(exc, tb)
317 if exc.__traceback__ is not tb:
318 raise exc.with_traceback(tb)
--> 319 raise exc
File ~\AppData\Roaming\Python\Python38\site-packages\dask\local.py:224, in execute_task(key, task_info, dumps, loads, get_id, pack_exception)
222 try:
223 task, data = loads(task_info)
--> 224 result = _execute_task(task, data)
225 id = get_id()
226 result = dumps((result, id))
File ~\AppData\Roaming\Python\Python38\site-packages\dask\core.py:119, in _execute_task(arg, cache, dsk)
115 func, args = arg[0], arg[1:]
116 # Note: Don't assign the subtask results to a variable. numpy detects
117 # temporaries by their reference count and can execute certain
118 # operations in-place.
--> 119 return func(*(_execute_task(a, cache) for a in args))
120 elif not ishashable(arg):
121 return arg
File ~\AppData\Roaming\Python\Python38\site-packages\dask\array\core.py:128, in getter(a, b, asarray, lock)
123 # Below we special-case `np.matrix` to force a conversion to
124 # `np.ndarray` and preserve original Dask behavior for `getter`,
125 # as for all purposes `np.matrix` is array-like and thus
126 # `is_arraylike` evaluates to `True` in that case.
127 if asarray and (not is_arraylike(c) or isinstance(c, np.matrix)):
--> 128 c = np.asarray(c)
129 finally:
130 if lock:
File ~\Anaconda3\envs\satellite\lib\site-packages\xarray\core\indexing.py:459, in ImplicitToExplicitIndexingAdapter.__array__(self, dtype)
458 def __array__(self, dtype=None):
--> 459 return np.asarray(self.array, dtype=dtype)
File ~\Anaconda3\envs\satellite\lib\site-packages\xarray\core\indexing.py:623, in CopyOnWriteArray.__array__(self, dtype)
622 def __array__(self, dtype=None):
--> 623 return np.asarray(self.array, dtype=dtype)
File ~\Anaconda3\envs\satellite\lib\site-packages\xarray\core\indexing.py:524, in LazilyIndexedArray.__array__(self, dtype)
522 def __array__(self, dtype=None):
523 array = as_indexable(self.array)
--> 524 return np.asarray(array[self.key], dtype=None)
File ~\Anaconda3\envs\satellite\lib\site-packages\xarray\backends\zarr.py:76, in ZarrArrayWrapper.__getitem__(self, key)
74 array = self.get_array()
75 if isinstance(key, indexing.BasicIndexer):
---> 76 return array[key.tuple]
77 elif isinstance(key, indexing.VectorizedIndexer):
78 return array.vindex[
79 indexing._arrayize_vectorized_indexer(key, self.shape).tuple
80 ]
File ~\Anaconda3\envs\satellite\lib\site-packages\zarr\core.py:788, in Array.__getitem__(self, selection)
786 result = self.vindex[selection]
787 else:
--> 788 result = self.get_basic_selection(pure_selection, fields=fields)
789 return result
File ~\Anaconda3\envs\satellite\lib\site-packages\zarr\core.py:914, in Array.get_basic_selection(self, selection, out, fields)
911 return self._get_basic_selection_zd(selection=selection, out=out,
912 fields=fields)
913 else:
--> 914 return self._get_basic_selection_nd(selection=selection, out=out,
915 fields=fields)
File ~\Anaconda3\envs\satellite\lib\site-packages\zarr\core.py:957, in Array._get_basic_selection_nd(self, selection, out, fields)
951 def _get_basic_selection_nd(self, selection, out=None, fields=None):
952 # implementation of basic selection for array with at least one dimension
953
954 # setup indexer
955 indexer = BasicIndexer(selection, self)
--> 957 return self._get_selection(indexer=indexer, out=out, fields=fields)
File ~\Anaconda3\envs\satellite\lib\site-packages\zarr\core.py:1247, in Array._get_selection(self, indexer, out, fields)
1241 if not hasattr(self.chunk_store, "getitems") or \
1242 any(map(lambda x: x == 0, self.shape)):
1243 # sequentially get one key at a time from storage
1244 for chunk_coords, chunk_selection, out_selection in indexer:
1245
1246 # load chunk selection into output array
-> 1247 self._chunk_getitem(chunk_coords, chunk_selection, out, out_selection,
1248 drop_axes=indexer.drop_axes, fields=fields)
1249 else:
1250 # allow storage to get multiple items at once
1251 lchunk_coords, lchunk_selection, lout_selection = zip(*indexer)
File ~\Anaconda3\envs\satellite\lib\site-packages\zarr\core.py:1939, in Array._chunk_getitem(self, chunk_coords, chunk_selection, out, out_selection, drop_axes, fields)
1935 ckey = self._chunk_key(chunk_coords)
1937 try:
1938 # obtain compressed data for chunk
-> 1939 cdata = self.chunk_store[ckey]
1941 except KeyError:
1942 # chunk not initialized
1943 if self._fill_value is not None:
File ~\Anaconda3\envs\satellite\lib\site-packages\zarr\storage.py:717, in KVStore.__getitem__(self, key)
716 def __getitem__(self, key):
--> 717 return self._mutable_mapping[key]
File ~\Anaconda3\envs\satellite\lib\site-packages\fsspec\mapping.py:137, in FSMap.__getitem__(self, key, default)
135 k = self._key_to_str(key)
136 try:
--> 137 result = self.fs.cat(k)
138 except self.missing_exceptions:
139 if default is not None:
File ~\Anaconda3\envs\satellite\lib\site-packages\fsspec\asyn.py:111, in sync_wrapper.<locals>.wrapper(*args, **kwargs)
108 #functools.wraps(func)
109 def wrapper(*args, **kwargs):
110 self = obj or args[0]
--> 111 return sync(self.loop, func, *args, **kwargs)
File ~\Anaconda3\envs\satellite\lib\site-packages\fsspec\asyn.py:94, in sync(loop, func, timeout, *args, **kwargs)
91 return_result = result[0]
92 if isinstance(return_result, asyncio.TimeoutError):
93 # suppress asyncio.TimeoutError, raise FSTimeoutError
---> 94 raise FSTimeoutError from return_result
95 elif isinstance(return_result, BaseException):
96 raise return_result
FSTimeoutError:
In the line:
store = fsspec.get_mapper(asset.href)
You can pass extra arguments to the fsspec backend, in this case HTTP, see fsspec.implementations.http.HTTPFileSystem. In this case, client_kwargs get passed to aiohttp.ClientSession, and include an optional timeout argument. Your call may look something like
from aiohttp import ClientTimeout
store = get_mapper(asset.href, client_kwargs={"timeout": ClientTimeout(total=5000, connect=1000)})

py2neo Issue: ConnectionUnavailable: Cannot open connection to ConnectionProfile('bolt://localhost:7687')

I am trying to replicate this example on neo4j desktop:
https://stellargraph.readthedocs.io/en/stable/demos/connector/neo4j/load-cora-into-neo4j.html
I am able to reproduce everything until I get to the following line:
import py2neo
default_host = os.environ.get("STELLARGRAPH_NEO4J_HOST")
# Create the Neo4j Graph database object; the arguments can be edited to specify location and authentication
graph = py2neo.Graph(host=default_host, port=None, user=None, password=None)
I have tried the following attempts to create the neo4j database object:
#1
default_host = os.environ.get("StellarGraph")
graph = py2neo.Graph(host=default_host, port=None, user=None, password=None)
#2
uri = 'bolt://localhost:7687'
graph = Graph(uri, auth=("neo4j", "password"), port= 7687, secure=True)
#3
uri = uri = 'bolt://localhost:7687'
graph = Graph(uri, auth=("neo4j", "password"), port= 7687, secure=True, name= "StellarGraph")
However, each time I attempt this, it results in some variation of this error:
IndexError Traceback (most recent call last)
File ~/.local/lib/python3.8/site-packages/py2neo/client/__init__.py:806, in ConnectionPool.acquire(self, force_reset, can_overfill)
804 try:
805 # Plan A: select a free connection from the pool
--> 806 cx = self._free_list.popleft()
807 except IndexError:
IndexError: pop from an empty deque
During handling of the above exception, another exception occurred:
ConnectionRefusedError Traceback (most recent call last)
File ~/.local/lib/python3.8/site-packages/py2neo/wiring.py:62, in Wire.open(cls, address, timeout, keep_alive, on_broken)
61 try:
---> 62 s.connect(address)
63 except (IOError, OSError) as error:
ConnectionRefusedError: [Errno 111] Connection refused
The above exception was the direct cause of the following exception:
WireError Traceback (most recent call last)
File ~/.local/lib/python3.8/site-packages/py2neo/client/bolt.py:355, in Bolt.open(cls, profile, user_agent, on_release, on_broken)
354 try:
--> 355 wire = cls._connect(profile, on_broken=on_broken)
356 protocol_version = cls._handshake(wire)
File ~/.local/lib/python3.8/site-packages/py2neo/client/bolt.py:369, in Bolt._connect(cls, profile, on_broken)
368 log.debug("[#%04X] C: (Dialing <%s>)", 0, profile.address)
--> 369 wire = Wire.open(profile.address, keep_alive=True, on_broken=on_broken)
370 local_port = wire.local_address.port_number
File ~/.local/lib/python3.8/site-packages/py2neo/wiring.py:64, in Wire.open(cls, address, timeout, keep_alive, on_broken)
63 except (IOError, OSError) as error:
---> 64 raise_from(WireError("Cannot connect to %r" % (address,)), error)
65 return cls(s, on_broken=on_broken)
File <string>:3, in raise_from(value, from_value)
WireError: Cannot connect to IPv4Address(('localhost', 7687))
The above exception was the direct cause of the following exception:
ConnectionUnavailable Traceback (most recent call last)
/home/myname/Project1/graph_import.ipynb Cell 13' in <cell line: 2>()
1 uri = 'bolt://localhost:7687'
----> 2 graph = Graph(uri, auth=("neo4j", "mypass"), port= 7687, secure=True, name= "StellarGraph")
File ~/.local/lib/python3.8/site-packages/py2neo/database.py:288, in Graph.__init__(self, profile, name, **settings)
287 def __init__(self, profile=None, name=None, **settings):
--> 288 self.service = GraphService(profile, **settings)
289 self.__name__ = name
290 self.schema = Schema(self)
File ~/.local/lib/python3.8/site-packages/py2neo/database.py:119, in GraphService.__init__(self, profile, **settings)
116 if connector_settings["init_size"] is None and not profile.routing:
117 # Ensures credentials are checked on construction
118 connector_settings["init_size"] = 1
--> 119 self._connector = Connector(profile, **connector_settings)
120 self._graphs = {}
File ~/.local/lib/python3.8/site-packages/py2neo/client/__init__.py:960, in Connector.__init__(self, profile, user_agent, init_size, max_size, max_age, routing_refresh_ttl)
958 else:
959 self._router = None
--> 960 self._add_pools(*self._initial_routers)
File ~/.local/lib/python3.8/site-packages/py2neo/client/__init__.py:982, in Connector._add_pools(self, *profiles)
980 continue
981 log.debug("Adding connection pool for profile %r", profile)
--> 982 pool = ConnectionPool.open(
983 profile,
984 user_agent=self._user_agent,
985 init_size=self._init_size,
986 max_size=self._max_size,
987 max_age=self._max_age,
988 on_broken=self._on_broken)
989 self._pools[profile] = pool
File ~/.local/lib/python3.8/site-packages/py2neo/client/__init__.py:649, in ConnectionPool.open(cls, profile, user_agent, init_size, max_size, max_age, on_broken)
627 """ Create a new connection pool, with an option to seed one
628 or more initial connections.
629
(...)
646 scheme
647 """
648 pool = cls(profile, user_agent, max_size, max_age, on_broken)
--> 649 seeds = [pool.acquire() for _ in range(init_size or cls.default_init_size)]
650 for seed in seeds:
651 seed.release()
File ~/.local/lib/python3.8/site-packages/py2neo/client/__init__.py:649, in <listcomp>(.0)
627 """ Create a new connection pool, with an option to seed one
628 or more initial connections.
629
(...)
646 scheme
647 """
648 pool = cls(profile, user_agent, max_size, max_age, on_broken)
--> 649 seeds = [pool.acquire() for _ in range(init_size or cls.default_init_size)]
650 for seed in seeds:
651 seed.release()
File ~/.local/lib/python3.8/site-packages/py2neo/client/__init__.py:813, in ConnectionPool.acquire(self, force_reset, can_overfill)
807 except IndexError:
808 if self._has_capacity() or can_overfill:
809 # Plan B: if the pool isn't full, open
810 # a new connection. This may raise a
811 # ConnectionUnavailable exception, which
812 # should bubble up to the caller.
--> 813 cx = self._connect()
814 if cx.supports_multi():
815 self._supports_multi = True
File ~/.local/lib/python3.8/site-packages/py2neo/client/__init__.py:764, in ConnectionPool._connect(self)
761 def _connect(self):
762 """ Open and return a new connection.
763 """
--> 764 cx = Connection.open(self.profile, user_agent=self.user_agent,
765 on_release=lambda c: self.release(c),
766 on_broken=lambda msg: self.__on_broken(msg))
767 self._server_agent = cx.server_agent
768 return cx
File ~/.local/lib/python3.8/site-packages/py2neo/client/__init__.py:174, in Connection.open(cls, profile, user_agent, on_release, on_broken)
172 if profile.protocol == "bolt":
173 from py2neo.client.bolt import Bolt
--> 174 return Bolt.open(profile, user_agent=user_agent,
175 on_release=on_release, on_broken=on_broken)
176 elif profile.protocol == "http":
177 from py2neo.client.http import HTTP
File ~/.local/lib/python3.8/site-packages/py2neo/client/bolt.py:364, in Bolt.open(cls, profile, user_agent, on_release, on_broken)
362 return bolt
363 except (TypeError, WireError) as error:
--> 364 raise_from(ConnectionUnavailable("Cannot open connection to %r" % profile), error)
File <string>:3, in raise_from(value, from_value)
ConnectionUnavailable: Cannot open connection to ConnectionProfile('bolt+s://localhost:7687')
I have also tried variations on this fix as well, but had the same error:
ISSUE IN CONNECTING py2neo v4 to my neo4j server
I appreciate any help resolving this issue. Thanks!
I was able to resolve this with the following syntax:
graph = Graph('neo4j://localhost:7687', user="neo4j", password="999")
However, I am now having an issue with the following block:
empty_db_query = """
MATCH(n) DETACH
DELETE(n)
"""
tx = graph.begin(autocommit=True)
tx.evaluate(empty_db_query)
For the newer version of py2neo, the graph.begin argument takes readonly = F instead of autocommit = True, but in any case, I have this error now:
ServiceUnavailable Traceback (most recent call last)
/home/myname/Project1/graph_import.ipynb Cell 13' in <cell line: 6>()
1 empty_db_query = """
2 MATCH(n) DETACH
3 DELETE(n)
4 """
----> 6 tx = graph.begin(readonly=False)
7 tx.evaluate(empty_db_query)
File ~/.local/lib/python3.8/site-packages/py2neo/database.py:351, in Graph.begin(self, readonly)
340 def begin(self, readonly=False,
341 # after=None, metadata=None, timeout=None
342 ):
343 """ Begin a new :class:`~py2neo.Transaction`.
344
345 :param readonly: if :py:const:`True`, will begin a readonly
(...)
349 removed. Use the 'auto' method instead.*
350 """
--> 351 return Transaction(self, autocommit=False, readonly=readonly,
352 # after, metadata, timeout
353 )
File ~/.local/lib/python3.8/site-packages/py2neo/database.py:915, in Transaction.__init__(self, graph, autocommit, readonly)
913 self._ref = None
914 else:
--> 915 self._ref = self._connector.begin(self.graph.name, readonly=readonly,
916 # after, metadata, timeout
917 )
918 self._readonly = readonly
919 self._closed = False
File ~/.local/lib/python3.8/site-packages/py2neo/client/__init__.py:1357, in Connector.begin(self, graph_name, readonly)
1345 def begin(self, graph_name, readonly=False,
1346 # after=None, metadata=None, timeout=None
1347 ):
1348 """ Begin a new explicit transaction.
1349
1350 :param graph_name:
(...)
1355 :raises Failure: if the server signals a failure condition
1356 """
-> 1357 cx = self._acquire(graph_name)
1358 try:
1359 return cx.begin(graph_name, readonly=readonly,
1360 # after=after, metadata=metadata, timeout=timeout
1361 )
File ~/.local/lib/python3.8/site-packages/py2neo/client/__init__.py:1111, in Connector._acquire(self, graph_name, readonly)
1109 return self._acquire_ro(graph_name)
1110 else:
-> 1111 return self._acquire_rw(graph_name)
File ~/.local/lib/python3.8/site-packages/py2neo/client/__init__.py:1203, in Connector._acquire_rw(self, graph_name)
1199 # TODO: exit immediately if the server/cluster is in readonly mode
1201 while True:
-> 1203 ro_profiles, rw_profiles = self._get_profiles(graph_name, readonly=False)
1204 if rw_profiles:
1205 # There is at least one writer, so collect the pools
1206 # for those writers. In all implementations to date,
1207 # a Neo4j cluster will only ever contain at most one
1208 # writer (per database). But this algorithm should
1209 # still survive if that changes.
1210 pools = [pool for profile, pool in list(self._pools.items())
1211 if profile in rw_profiles]
File ~/.local/lib/python3.8/site-packages/py2neo/client/__init__.py:1016, in Connector._get_profiles(self, graph_name, readonly)
1014 rt.wait_until_updated()
1015 else:
-> 1016 self.refresh_routing_table(graph_name)
File ~/.local/lib/python3.8/site-packages/py2neo/client/__init__.py:1064, in Connector.refresh_routing_table(self, graph_name)
1062 cx.release()
1063 else:
-> 1064 raise ServiceUnavailable("Cannot connect to any known routers")
1065 finally:
1066 rt.set_not_updating()
ServiceUnavailable: Cannot connect to any known routers
Appreciate any help in resolving this. Thank you!

Got 504 Deadline Exceeded in Jupiter Notebook (Python) with Big query

I am trying to get a the result of a Google Bigquery query in a pandas dataframe (in Jupiter notebook).
But everytime I try to run the query I get a DeadlineExceeded: 504 Deadline Exceeded.
This happens not only for queries in my own BQ project but also for other projects.
I have tried a lot of option to run the query like in here: https://cloud.google.com/bigquery/docs/bigquery-storage-python-pandas
Anyone have a idea how to fix this?
Query:
%load_ext google.cloud.bigquery
%%bigquery tax_forms --use_bqstorage_api
SELECT * FROM `bigquery-public-data.irs_990.irs_990_2012`
---------------------------------------------------------------------------
_MultiThreadedRendezvous Traceback (most recent call last)
~\AppData\Local\Continuum\anaconda3\lib\site-packages\google\api_core\grpc_helpers.py in error_remapped_callable(*args, **kwargs)
149 prefetch_first = getattr(callable_, "_prefetch_first_result_", True)
--> 150 return _StreamingResponseIterator(result, prefetch_first_result=prefetch_first)
151 except grpc.RpcError as exc:
~\AppData\Local\Continuum\anaconda3\lib\site-packages\google\api_core\grpc_helpers.py in __init__(self, wrapped, prefetch_first_result)
72 if prefetch_first_result:
---> 73 self._stored_first_result = six.next(self._wrapped)
74 except TypeError:
~\AppData\Local\Continuum\anaconda3\lib\site-packages\grpc\_channel.py in __next__(self)
415 def __next__(self):
--> 416 return self._next()
417
~\AppData\Local\Continuum\anaconda3\lib\site-packages\grpc\_channel.py in _next(self)
705 elif self._state.code is not None:
--> 706 raise self
707
_MultiThreadedRendezvous: <_MultiThreadedRendezvous of RPC that terminated with:
status = StatusCode.DEADLINE_EXCEEDED
details = "Deadline Exceeded"
debug_error_string = "{"created":"#1597838569.388000000","description":"Error received from peer ipv4:172.217.168.202:443","file":"src/core/lib/surface/call.cc","file_line":1062,"grpc_message":"Deadline Exceeded","grpc_status":4}"
>
The above exception was the direct cause of the following exception:
DeadlineExceeded Traceback (most recent call last)
<ipython-input-2-4fdaec7219df> in <module>
----> 1 get_ipython().run_cell_magic('bigquery', 'tax_forms --use_bqstorage_api', 'SELECT * FROM `bigquery-public-data.irs_990.irs_990_2012`\n')
~\AppData\Local\Continuum\anaconda3\lib\site-packages\IPython\core\interactiveshell.py in run_cell_magic(self, magic_name, line, cell)
2357 with self.builtin_trap:
2358 args = (magic_arg_s, cell)
-> 2359 result = fn(*args, **kwargs)
2360 return result
2361
~\AppData\Local\Continuum\anaconda3\lib\site-packages\google\cloud\bigquery\magics.py in _cell_magic(line, query)
589 )
590 else:
--> 591 result = query_job.to_dataframe(bqstorage_client=bqstorage_client)
592
593 if args.destination_var:
~\AppData\Local\Continuum\anaconda3\lib\site-packages\google\cloud\bigquery\job.py in to_dataframe(self, bqstorage_client, dtypes, progress_bar_type, create_bqstorage_client, date_as_object)
3381 progress_bar_type=progress_bar_type,
3382 create_bqstorage_client=create_bqstorage_client,
-> 3383 date_as_object=date_as_object,
3384 )
3385
~\AppData\Local\Continuum\anaconda3\lib\site-packages\google\cloud\bigquery\table.py in to_dataframe(self, bqstorage_client, dtypes, progress_bar_type, create_bqstorage_client, date_as_object)
1726 progress_bar_type=progress_bar_type,
1727 bqstorage_client=bqstorage_client,
-> 1728 create_bqstorage_client=create_bqstorage_client,
1729 )
1730
~\AppData\Local\Continuum\anaconda3\lib\site-packages\google\cloud\bigquery\table.py in to_arrow(self, progress_bar_type, bqstorage_client, create_bqstorage_client)
1544 record_batches = []
1545 for record_batch in self._to_arrow_iterable(
-> 1546 bqstorage_client=bqstorage_client
1547 ):
1548 record_batches.append(record_batch)
~\AppData\Local\Continuum\anaconda3\lib\site-packages\google\cloud\bigquery\table.py in _to_page_iterable(self, bqstorage_download, tabledata_list_download, bqstorage_client)
1433 ):
1434 if bqstorage_client is not None:
-> 1435 for item in bqstorage_download():
1436 yield item
1437 return
~\AppData\Local\Continuum\anaconda3\lib\site-packages\google\cloud\bigquery\_pandas_helpers.py in _download_table_bqstorage(project_id, table, bqstorage_client, preserve_order, selected_fields, page_to_item)
723 # Call result() on any finished threads to raise any
724 # exceptions encountered.
--> 725 future.result()
726
727 try:
~\AppData\Local\Continuum\anaconda3\lib\concurrent\futures\_base.py in result(self, timeout)
426 raise CancelledError()
427 elif self._state == FINISHED:
--> 428 return self.__get_result()
429
430 self._condition.wait(timeout)
~\AppData\Local\Continuum\anaconda3\lib\concurrent\futures\_base.py in __get_result(self)
382 def __get_result(self):
383 if self._exception:
--> 384 raise self._exception
385 else:
386 return self._result
~\AppData\Local\Continuum\anaconda3\lib\concurrent\futures\thread.py in run(self)
55
56 try:
---> 57 result = self.fn(*self.args, **self.kwargs)
58 except BaseException as exc:
59 self.future.set_exception(exc)
~\AppData\Local\Continuum\anaconda3\lib\site-packages\google\cloud\bigquery\_pandas_helpers.py in _download_table_bqstorage_stream(download_state, bqstorage_client, session, stream, worker_queue, page_to_item)
591 rowstream = bqstorage_client.read_rows(position).rows(session)
592 else:
--> 593 rowstream = bqstorage_client.read_rows(stream.name).rows(session)
594
595 for page in rowstream.pages:
~\AppData\Local\Continuum\anaconda3\lib\site-packages\google\cloud\bigquery_storage_v1\client.py in read_rows(self, name, offset, retry, timeout, metadata)
120 retry=retry,
121 timeout=timeout,
--> 122 metadata=metadata,
123 )
124 return reader.ReadRowsStream(
~\AppData\Local\Continuum\anaconda3\lib\site-packages\google\cloud\bigquery_storage_v1\gapic\big_query_read_client.py in read_rows(self, read_stream, offset, retry, timeout, metadata)
370
371 return self._inner_api_calls["read_rows"](
--> 372 request, retry=retry, timeout=timeout, metadata=metadata
373 )
374
~\AppData\Local\Continuum\anaconda3\lib\site-packages\google\api_core\gapic_v1\method.py in __call__(self, *args, **kwargs)
143 kwargs["metadata"] = metadata
144
--> 145 return wrapped_func(*args, **kwargs)
146
147
~\AppData\Local\Continuum\anaconda3\lib\site-packages\google\api_core\retry.py in retry_wrapped_func(*args, **kwargs)
284 sleep_generator,
285 self._deadline,
--> 286 on_error=on_error,
287 )
288
~\AppData\Local\Continuum\anaconda3\lib\site-packages\google\api_core\retry.py in retry_target(target, predicate, sleep_generator, deadline, on_error)
182 for sleep in sleep_generator:
183 try:
--> 184 return target()
185
186 # pylint: disable=broad-except
~\AppData\Local\Continuum\anaconda3\lib\site-packages\google\api_core\timeout.py in func_with_timeout(*args, **kwargs)
212 """Wrapped function that adds timeout."""
213 kwargs["timeout"] = next(timeouts)
--> 214 return func(*args, **kwargs)
215
216 return func_with_timeout
~\AppData\Local\Continuum\anaconda3\lib\site-packages\google\api_core\grpc_helpers.py in error_remapped_callable(*args, **kwargs)
150 return _StreamingResponseIterator(result, prefetch_first_result=prefetch_first)
151 except grpc.RpcError as exc:
--> 152 six.raise_from(exceptions.from_grpc_error(exc), exc)
153
154 return error_remapped_callable
~\AppData\Local\Continuum\anaconda3\lib\site-packages\six.py in raise_from(value, from_value)
DeadlineExceeded: 504 Deadline Exceeded
Let me know if you need to know more. Thanks in advance.
Rutger
It turned out to be a conflict between a Conda package and a pip packages.
I resolved it by reinstall all the packages.

OSError:invalid argument while converting sqldataframe to pandas dataframe in pyspark

I loaded a csv file using the following code
from pyspark import SparkContext
from pyspark.sql import *
sc = SparkContext(master='local[1]')
df = sq.read.csv(file_path,header='true',inferSchema='true')
But, when i tried to convert this spark dataframe i have to a pandas dataframe using the following code
pdf = df.toPandas()
i got the following error
OSError Traceback (most recent call last)
<ipython-input-27-cf3578af3a8d> in <module>()
----> 1 a = df.toPandas()
D:\softwares\anaconda\lib\site-packages\pyspark\sql\dataframe.py in toPandas(self)
1964 raise RuntimeError("%s\n%s" % (_exception_message(e), msg))
1965 else:
-> 1966 pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
1967
1968 dtype = {}
D:\softwares\anaconda\lib\site-packages\pyspark\sql\dataframe.py in collect(self)
465 with SCCallSiteSync(self._sc) as css:
466 port = self._jdf.collectToPython()
--> 467 return list(_load_from_socket(port, BatchedSerializer(PickleSerializer())))
468
469 #ignore_unicode_prefix
D:\softwares\anaconda\lib\site-packages\pyspark\serializers.py in load_stream(self, stream)
143 while True:
144 try:
--> 145 yield self._read_with_length(stream)
146 except EOFError:
147 return
D:\softwares\anaconda\lib\site-packages\pyspark\serializers.py in _read_with_length(self, stream)
168 if len(obj) < length:
169 raise EOFError
--> 170 return self.loads(obj)
171
172 def dumps(self, obj):
D:\softwares\anaconda\lib\site-packages\pyspark\serializers.py in loads(self, obj, encoding)
557 if sys.version >= '3':
558 def loads(self, obj, encoding="bytes"):
--> 559 return pickle.loads(obj, encoding=encoding)
560 else:
561 def loads(self, obj, encoding=None):
D:\softwares\anaconda\lib\site-packages\pyspark\sql\types.py in <lambda>(*a)
1426 # This is used to unpickle a Row from JVM
1427 def _create_row_inbound_converter(dataType):
-> 1428 return lambda *a: dataType.fromInternal(a)
1429
1430
D:\softwares\anaconda\lib\site-packages\pyspark\sql\types.py in fromInternal(self, obj)
628 # Only calling fromInternal function for fields that need conversion
629 values = [f.fromInternal(v) if c else v
--> 630 for f, v, c in zip(self.fields, obj, self._needConversion)]
631 else:
632 values = obj
D:\softwares\anaconda\lib\site-packages\pyspark\sql\types.py in <listcomp>(.0)
628 # Only calling fromInternal function for fields that need conversion
629 values = [f.fromInternal(v) if c else v
--> 630 for f, v, c in zip(self.fields, obj, self._needConversion)]
631 else:
632 values = obj
D:\softwares\anaconda\lib\site-packages\pyspark\sql\types.py in fromInternal(self, obj)
440
441 def fromInternal(self, obj):
--> 442 return self.dataType.fromInternal(obj)
443
444 def typeName(self):
D:\softwares\anaconda\lib\site-packages\pyspark\sql\types.py in fromInternal(self, ts)
198 if ts is not None:
199 # using int to avoid precision loss in float
--> 200 return datetime.datetime.fromtimestamp(ts // 1000000).replace(microsecond=ts % 1000000)
201
202
OSError: [Errno 22] Invalid argument
Can anyone help me on how to solve this error?

Pandas duplicate datetimeindex entries lead to odd exception

Let's take the following contrived example where I create a DataFrame and then make a DatetimeIndex using a column with duplicate entries. I then place this DataFrame into a Panel and then attempt to iterate over the major axis.
import pandas as pd
import datetime as dt
a = [1371215933513120, 1371215933513121, 1371215933513122, 1371215933513122]
b = [1,2,3,4]
df = pd.DataFrame({'a':a, 'b':b, 'c':[dt.datetime.fromtimestamp(t/1000000.) for t in a]})
df.index=pd.DatetimeIndex(df['c'])
d = OrderedDict()
d['x'] = df
p = pd.Panel(d)
for y in p.major_axis:
print y
print p.major_xs(y)
This leads to the following output:
2013-06-14 15:18:53.513120
x
a 1371215933513120
b 1
c 2013-06-14 15:18:53.513120
2013-06-14 15:18:53.513121
x
a 1371215933513121
b 2
c 2013-06-14 15:18:53.513121
2013-06-14 15:18:53.513122
Followed by a rather cryptic (to me) error:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-35-045aaae5a074> in <module>()
13 for y in p.major_axis:
14 print y
---> 15 print p.major_xs(y)
/usr/local/lib/python2.7/dist-packages/pandas-0.11.0-py2.7-linux-x86_64.egg/pandas/core/frame.py in __str__(self)
667 if py3compat.PY3:
668 return self.__unicode__()
--> 669 return self.__bytes__()
670
671 def __bytes__(self):
/usr/local/lib/python2.7/dist-packages/pandas-0.11.0-py2.7-linux-x86_64.egg/pandas/core/frame.py in __bytes__(self)
677 """
678 encoding = com.get_option("display.encoding")
--> 679 return self.__unicode__().encode(encoding, 'replace')
680
681 def __unicode__(self):
/usr/local/lib/python2.7/dist-packages/pandas-0.11.0-py2.7-linux-x86_64.egg/pandas/core/frame.py in __unicode__(self)
692 # This needs to compute the entire repr
693 # so don't do it unless rownum is bounded
--> 694 fits_horizontal = self._repr_fits_horizontal_()
695
696 if fits_vertical and fits_horizontal:
/usr/local/lib/python2.7/dist-packages/pandas-0.11.0-py2.7-linux-x86_64.egg/pandas/core/frame.py in _repr_fits_horizontal_(self)
652 d=d.iloc[:min(max_rows, height,len(d))]
653
--> 654 d.to_string(buf=buf)
655 value = buf.getvalue()
656 repr_width = max([len(l) for l in value.split('\n')])
/usr/local/lib/python2.7/dist-packages/pandas-0.11.0-py2.7-linux-x86_64.egg/pandas/core/frame.py in to_string(self, buf, columns, col_space, colSpace, header, index, na_rep, formatters, float_format, sparsify, nanRep, index_names, justify, force_unicode, line_width)
1489 header=header, index=index,
1490 line_width=line_width)
-> 1491 formatter.to_string()
1492
1493 if buf is None:
/usr/local/lib/python2.7/dist-packages/pandas-0.11.0-py2.7-linux-x86_64.egg/pandas/core/format.py in to_string(self, force_unicode)
312 text = info_line
313 else:
--> 314 strcols = self._to_str_columns()
315 if self.line_width is None:
316 text = adjoin(1, *strcols)
/usr/local/lib/python2.7/dist-packages/pandas-0.11.0-py2.7-linux-x86_64.egg/pandas/core/format.py in _to_str_columns(self)
265 for i, c in enumerate(self.columns):
266 if self.header:
--> 267 fmt_values = self._format_col(i)
268 cheader = str_columns[i]
269
/usr/local/lib/python2.7/dist-packages/pandas-0.11.0-py2.7-linux-x86_64.egg/pandas/core/format.py in _format_col(self, i)
403 float_format=self.float_format,
404 na_rep=self.na_rep,
--> 405 space=self.col_space)
406
407 def to_html(self, classes=None):
/usr/local/lib/python2.7/dist-packages/pandas-0.11.0-py2.7-linux-x86_64.egg/pandas/core/format.py in format_array(values, formatter, float_format, na_rep, digits, space, justify)
1319 justify=justify)
1320
-> 1321 return fmt_obj.get_result()
1322
1323
/usr/local/lib/python2.7/dist-packages/pandas-0.11.0-py2.7-linux-x86_64.egg/pandas/core/format.py in get_result(self)
1335
1336 def get_result(self):
-> 1337 fmt_values = self._format_strings()
1338 return _make_fixed_width(fmt_values, self.justify)
1339
/usr/local/lib/python2.7/dist-packages/pandas-0.11.0-py2.7-linux-x86_64.egg/pandas/core/format.py in _format_strings(self)
1362
1363 print "vals:", vals
-> 1364 is_float = lib.map_infer(vals, com.is_float) & notnull(vals)
1365 leading_space = is_float.any()
1366
ValueError: operands could not be broadcast together with shapes (2) (2,3)
Now, having explained that I'm creating an index with duplicate entries, the source of the error is clear. Without having known that, however, it would have been more difficult (again, for a novice like me) to figure out why this Exception was popping up.
This leads me to a few questions.
Is this really the expected behavior of pandas? Is it forbidden to create an index with duplicate entries, or is it just forbidden to iterate over them?
If it's forbidden to create such an index, then shouldn't an exception be raised when initially creating it?
If the iteration is somehow incorrect, shouldn't the error be more informative?
Am I doing something wrong?

Categories