Running HIve query through impala.dbapi fails when embedding "Add Jar" - python

I'm launching a hive query through python impala.dbapi, which works nicely as following:
import os
import pandas as pd
from impala.dbapi import connect
from impala.util import as_pandas
from datetime import datetime
user=os.environ['HIVE_USER']
password=os.environ['HIVE_PASSWORD']
up_to_date_query = '''
select * from dejavu.tracking_events limit 1
'''
conn = connect(host='ecprdbhdp02-clientgw.kenshooprd.local', port=10000,
user=user,
password=password,
auth_mechanism='PLAIN')
cursor = conn.cursor()
cursor.execute(up_to_date_query)
df = as_pandas(cursor)
df.head()
But when i'm adding the following "Add Jar" clause as following:
up_to_date_query = '''
ADD JAR hdfs://BICluster/user/yossis/udfs/hive-udf-0.1-SNAPSHOT.jar;
select * from dejavu.tracking_events limit 1
'''
I'm getting the following error:
---------------------------------------------------------------------------
HiveServer2Error Traceback (most recent call last)
<ipython-input-10-1e512abcc69e> in <module>()
4 auth_mechanism='PLAIN')
5 cursor = conn.cursor()
----> 6 cursor.execute(up_to_date_query)
7 df = as_pandas(cursor)
8 df.head()
/home/yehoshaphats/anaconda/lib/python2.7/site-packages/impala/hiveserver2.pyc in execute(self, operation, parameters, configuration)
300 # PEP 249
301 self.execute_async(operation, parameters=parameters,
--> 302 configuration=configuration)
303 log.debug('Waiting for query to finish')
304 self._wait_to_finish() # make execute synchronous
/home/yehoshaphats/anaconda/lib/python2.7/site-packages/impala/hiveserver2.pyc in execute_async(self, operation, parameters, configuration)
341 self._last_operation = op
342
--> 343 self._execute_async(op)
344
345 def _debug_log_state(self):
/home/yehoshaphats/anaconda/lib/python2.7/site-packages/impala/hiveserver2.pyc in _execute_async(self, operation_fn)
360 self._reset_state()
361 self._debug_log_state()
--> 362 operation_fn()
363 self._last_operation_active = True
364 self._debug_log_state()
/home/yehoshaphats/anaconda/lib/python2.7/site-packages/impala/hiveserver2.pyc in op()
338 op = self.session.execute(self._last_operation_string,
339 configuration,
--> 340 async=True)
341 self._last_operation = op
342
/home/yehoshaphats/anaconda/lib/python2.7/site-packages/impala/hiveserver2.pyc in execute(self, statement, configuration, async)
1025 confOverlay=configuration,
1026 runAsync=async)
-> 1027 return self._operation('ExecuteStatement', req)
1028
1029 def get_databases(self, schema='.*'):
/home/yehoshaphats/anaconda/lib/python2.7/site-packages/impala/hiveserver2.pyc in _operation(self, kind, request)
955
956 def _operation(self, kind, request):
--> 957 resp = self._rpc(kind, request)
958 return self._get_operation(resp.operationHandle)
959
/home/yehoshaphats/anaconda/lib/python2.7/site-packages/impala/hiveserver2.pyc in _rpc(self, func_name, request)
923 response = self._execute(func_name, request)
924 self._log_response(func_name, response)
--> 925 err_if_rpc_not_ok(response)
926 return response
927
/home/yehoshaphats/anaconda/lib/python2.7/site-packages/impala/hiveserver2.pyc in err_if_rpc_not_ok(resp)
702 resp.status.statusCode != TStatusCode.SUCCESS_WITH_INFO_STATUS and
703 resp.status.statusCode != TStatusCode.STILL_EXECUTING_STATUS):
--> 704 raise HiveServer2Error(resp.status.errorMessage)
705
706
HiveServer2Error: Error while processing statement: null
Notice that the query works properly when running it directly in Hive (through the hue console).
After searching for similar questions, it seems like no one asked for exactly the same problem :(
Thanks in advance!

It seems like the ; separator causes trouble, I've just separated it as following:
cursor.execute('ADD JAR hdfs://BICluster/user/yossis/udfs/hive-udf-0.1-SNAPSHOT.jar')
up_to_date_query = '''
select * from dejavu.tracking_events limit 1
'''
cursor.execute(up_to_date_query)

Related

py2neo Issue: ConnectionUnavailable: Cannot open connection to ConnectionProfile('bolt://localhost:7687')

I am trying to replicate this example on neo4j desktop:
https://stellargraph.readthedocs.io/en/stable/demos/connector/neo4j/load-cora-into-neo4j.html
I am able to reproduce everything until I get to the following line:
import py2neo
default_host = os.environ.get("STELLARGRAPH_NEO4J_HOST")
# Create the Neo4j Graph database object; the arguments can be edited to specify location and authentication
graph = py2neo.Graph(host=default_host, port=None, user=None, password=None)
I have tried the following attempts to create the neo4j database object:
#1
default_host = os.environ.get("StellarGraph")
graph = py2neo.Graph(host=default_host, port=None, user=None, password=None)
#2
uri = 'bolt://localhost:7687'
graph = Graph(uri, auth=("neo4j", "password"), port= 7687, secure=True)
#3
uri = uri = 'bolt://localhost:7687'
graph = Graph(uri, auth=("neo4j", "password"), port= 7687, secure=True, name= "StellarGraph")
However, each time I attempt this, it results in some variation of this error:
IndexError Traceback (most recent call last)
File ~/.local/lib/python3.8/site-packages/py2neo/client/__init__.py:806, in ConnectionPool.acquire(self, force_reset, can_overfill)
804 try:
805 # Plan A: select a free connection from the pool
--> 806 cx = self._free_list.popleft()
807 except IndexError:
IndexError: pop from an empty deque
During handling of the above exception, another exception occurred:
ConnectionRefusedError Traceback (most recent call last)
File ~/.local/lib/python3.8/site-packages/py2neo/wiring.py:62, in Wire.open(cls, address, timeout, keep_alive, on_broken)
61 try:
---> 62 s.connect(address)
63 except (IOError, OSError) as error:
ConnectionRefusedError: [Errno 111] Connection refused
The above exception was the direct cause of the following exception:
WireError Traceback (most recent call last)
File ~/.local/lib/python3.8/site-packages/py2neo/client/bolt.py:355, in Bolt.open(cls, profile, user_agent, on_release, on_broken)
354 try:
--> 355 wire = cls._connect(profile, on_broken=on_broken)
356 protocol_version = cls._handshake(wire)
File ~/.local/lib/python3.8/site-packages/py2neo/client/bolt.py:369, in Bolt._connect(cls, profile, on_broken)
368 log.debug("[#%04X] C: (Dialing <%s>)", 0, profile.address)
--> 369 wire = Wire.open(profile.address, keep_alive=True, on_broken=on_broken)
370 local_port = wire.local_address.port_number
File ~/.local/lib/python3.8/site-packages/py2neo/wiring.py:64, in Wire.open(cls, address, timeout, keep_alive, on_broken)
63 except (IOError, OSError) as error:
---> 64 raise_from(WireError("Cannot connect to %r" % (address,)), error)
65 return cls(s, on_broken=on_broken)
File <string>:3, in raise_from(value, from_value)
WireError: Cannot connect to IPv4Address(('localhost', 7687))
The above exception was the direct cause of the following exception:
ConnectionUnavailable Traceback (most recent call last)
/home/myname/Project1/graph_import.ipynb Cell 13' in <cell line: 2>()
1 uri = 'bolt://localhost:7687'
----> 2 graph = Graph(uri, auth=("neo4j", "mypass"), port= 7687, secure=True, name= "StellarGraph")
File ~/.local/lib/python3.8/site-packages/py2neo/database.py:288, in Graph.__init__(self, profile, name, **settings)
287 def __init__(self, profile=None, name=None, **settings):
--> 288 self.service = GraphService(profile, **settings)
289 self.__name__ = name
290 self.schema = Schema(self)
File ~/.local/lib/python3.8/site-packages/py2neo/database.py:119, in GraphService.__init__(self, profile, **settings)
116 if connector_settings["init_size"] is None and not profile.routing:
117 # Ensures credentials are checked on construction
118 connector_settings["init_size"] = 1
--> 119 self._connector = Connector(profile, **connector_settings)
120 self._graphs = {}
File ~/.local/lib/python3.8/site-packages/py2neo/client/__init__.py:960, in Connector.__init__(self, profile, user_agent, init_size, max_size, max_age, routing_refresh_ttl)
958 else:
959 self._router = None
--> 960 self._add_pools(*self._initial_routers)
File ~/.local/lib/python3.8/site-packages/py2neo/client/__init__.py:982, in Connector._add_pools(self, *profiles)
980 continue
981 log.debug("Adding connection pool for profile %r", profile)
--> 982 pool = ConnectionPool.open(
983 profile,
984 user_agent=self._user_agent,
985 init_size=self._init_size,
986 max_size=self._max_size,
987 max_age=self._max_age,
988 on_broken=self._on_broken)
989 self._pools[profile] = pool
File ~/.local/lib/python3.8/site-packages/py2neo/client/__init__.py:649, in ConnectionPool.open(cls, profile, user_agent, init_size, max_size, max_age, on_broken)
627 """ Create a new connection pool, with an option to seed one
628 or more initial connections.
629
(...)
646 scheme
647 """
648 pool = cls(profile, user_agent, max_size, max_age, on_broken)
--> 649 seeds = [pool.acquire() for _ in range(init_size or cls.default_init_size)]
650 for seed in seeds:
651 seed.release()
File ~/.local/lib/python3.8/site-packages/py2neo/client/__init__.py:649, in <listcomp>(.0)
627 """ Create a new connection pool, with an option to seed one
628 or more initial connections.
629
(...)
646 scheme
647 """
648 pool = cls(profile, user_agent, max_size, max_age, on_broken)
--> 649 seeds = [pool.acquire() for _ in range(init_size or cls.default_init_size)]
650 for seed in seeds:
651 seed.release()
File ~/.local/lib/python3.8/site-packages/py2neo/client/__init__.py:813, in ConnectionPool.acquire(self, force_reset, can_overfill)
807 except IndexError:
808 if self._has_capacity() or can_overfill:
809 # Plan B: if the pool isn't full, open
810 # a new connection. This may raise a
811 # ConnectionUnavailable exception, which
812 # should bubble up to the caller.
--> 813 cx = self._connect()
814 if cx.supports_multi():
815 self._supports_multi = True
File ~/.local/lib/python3.8/site-packages/py2neo/client/__init__.py:764, in ConnectionPool._connect(self)
761 def _connect(self):
762 """ Open and return a new connection.
763 """
--> 764 cx = Connection.open(self.profile, user_agent=self.user_agent,
765 on_release=lambda c: self.release(c),
766 on_broken=lambda msg: self.__on_broken(msg))
767 self._server_agent = cx.server_agent
768 return cx
File ~/.local/lib/python3.8/site-packages/py2neo/client/__init__.py:174, in Connection.open(cls, profile, user_agent, on_release, on_broken)
172 if profile.protocol == "bolt":
173 from py2neo.client.bolt import Bolt
--> 174 return Bolt.open(profile, user_agent=user_agent,
175 on_release=on_release, on_broken=on_broken)
176 elif profile.protocol == "http":
177 from py2neo.client.http import HTTP
File ~/.local/lib/python3.8/site-packages/py2neo/client/bolt.py:364, in Bolt.open(cls, profile, user_agent, on_release, on_broken)
362 return bolt
363 except (TypeError, WireError) as error:
--> 364 raise_from(ConnectionUnavailable("Cannot open connection to %r" % profile), error)
File <string>:3, in raise_from(value, from_value)
ConnectionUnavailable: Cannot open connection to ConnectionProfile('bolt+s://localhost:7687')
I have also tried variations on this fix as well, but had the same error:
ISSUE IN CONNECTING py2neo v4 to my neo4j server
I appreciate any help resolving this issue. Thanks!
I was able to resolve this with the following syntax:
graph = Graph('neo4j://localhost:7687', user="neo4j", password="999")
However, I am now having an issue with the following block:
empty_db_query = """
MATCH(n) DETACH
DELETE(n)
"""
tx = graph.begin(autocommit=True)
tx.evaluate(empty_db_query)
For the newer version of py2neo, the graph.begin argument takes readonly = F instead of autocommit = True, but in any case, I have this error now:
ServiceUnavailable Traceback (most recent call last)
/home/myname/Project1/graph_import.ipynb Cell 13' in <cell line: 6>()
1 empty_db_query = """
2 MATCH(n) DETACH
3 DELETE(n)
4 """
----> 6 tx = graph.begin(readonly=False)
7 tx.evaluate(empty_db_query)
File ~/.local/lib/python3.8/site-packages/py2neo/database.py:351, in Graph.begin(self, readonly)
340 def begin(self, readonly=False,
341 # after=None, metadata=None, timeout=None
342 ):
343 """ Begin a new :class:`~py2neo.Transaction`.
344
345 :param readonly: if :py:const:`True`, will begin a readonly
(...)
349 removed. Use the 'auto' method instead.*
350 """
--> 351 return Transaction(self, autocommit=False, readonly=readonly,
352 # after, metadata, timeout
353 )
File ~/.local/lib/python3.8/site-packages/py2neo/database.py:915, in Transaction.__init__(self, graph, autocommit, readonly)
913 self._ref = None
914 else:
--> 915 self._ref = self._connector.begin(self.graph.name, readonly=readonly,
916 # after, metadata, timeout
917 )
918 self._readonly = readonly
919 self._closed = False
File ~/.local/lib/python3.8/site-packages/py2neo/client/__init__.py:1357, in Connector.begin(self, graph_name, readonly)
1345 def begin(self, graph_name, readonly=False,
1346 # after=None, metadata=None, timeout=None
1347 ):
1348 """ Begin a new explicit transaction.
1349
1350 :param graph_name:
(...)
1355 :raises Failure: if the server signals a failure condition
1356 """
-> 1357 cx = self._acquire(graph_name)
1358 try:
1359 return cx.begin(graph_name, readonly=readonly,
1360 # after=after, metadata=metadata, timeout=timeout
1361 )
File ~/.local/lib/python3.8/site-packages/py2neo/client/__init__.py:1111, in Connector._acquire(self, graph_name, readonly)
1109 return self._acquire_ro(graph_name)
1110 else:
-> 1111 return self._acquire_rw(graph_name)
File ~/.local/lib/python3.8/site-packages/py2neo/client/__init__.py:1203, in Connector._acquire_rw(self, graph_name)
1199 # TODO: exit immediately if the server/cluster is in readonly mode
1201 while True:
-> 1203 ro_profiles, rw_profiles = self._get_profiles(graph_name, readonly=False)
1204 if rw_profiles:
1205 # There is at least one writer, so collect the pools
1206 # for those writers. In all implementations to date,
1207 # a Neo4j cluster will only ever contain at most one
1208 # writer (per database). But this algorithm should
1209 # still survive if that changes.
1210 pools = [pool for profile, pool in list(self._pools.items())
1211 if profile in rw_profiles]
File ~/.local/lib/python3.8/site-packages/py2neo/client/__init__.py:1016, in Connector._get_profiles(self, graph_name, readonly)
1014 rt.wait_until_updated()
1015 else:
-> 1016 self.refresh_routing_table(graph_name)
File ~/.local/lib/python3.8/site-packages/py2neo/client/__init__.py:1064, in Connector.refresh_routing_table(self, graph_name)
1062 cx.release()
1063 else:
-> 1064 raise ServiceUnavailable("Cannot connect to any known routers")
1065 finally:
1066 rt.set_not_updating()
ServiceUnavailable: Cannot connect to any known routers
Appreciate any help in resolving this. Thank you!

How do I insert dynamic dates passed into a sql query using a python function?

I have a function that queries google cloud platform through a jupyter notebook. How do I change the function to allow for dynamic date entry?
See below an example:
from google.cloud import bigquery
import pandas as pd
def get_data(start, end):
start_ = start
end_ = end
client = bigquery.Client(location="US")
query = """
select date, sales
from data_table
where date between start_ and end_
"""
query_job = client.query(
query,
# Location must match that of the dataset(s) referenced in the query.
location="US",
) # API request - starts the query
df = query_job.to_dataframe()
I tried the following:
def get_data(start, end):
start_ = start
end_ = end
client = bigquery.Client(location="US")
query = """
select date, sales
from data_table
where date between {} and {}
""".format(start_, end_)
query_job = client.query(
query,
# Location must match that of the dataset(s) referenced in the query.
location="US",
) # API request - starts the query
df = query_job.to_dataframe()
But that didn't recognize it and I got the following:
BadRequest Traceback (most recent call last)
/tmp/ipykernel_12962/4209258901.py in <module>
----> 1 df_test = get_data_test.test_data(start = 202201, end = 202204)
~/get_data_test.py in test_data(start, end)
174 ) # API request - starts the query
175
--> 176 df = query_job.to_dataframe()
177 print("data pulled....changing types")
178
/opt/conda/lib/python3.7/site-packages/google/cloud/bigquery/job/query.py in to_dataframe(self, bqstorage_client, dtypes, progress_bar_type, create_bqstorage_client, date_as_object, max_results, geography_as_object)
1692 :mod:`shapely` library cannot be imported.
1693 """
-> 1694 query_result = wait_for_query(self, progress_bar_type, max_results=max_results)
1695 return query_result.to_dataframe(
1696 bqstorage_client=bqstorage_client,
/opt/conda/lib/python3.7/site-packages/google/cloud/bigquery/_tqdm_helpers.py in wait_for_query(query_job, progress_bar_type, max_results)
86 )
87 if progress_bar is None:
---> 88 return query_job.result(max_results=max_results)
89
90 i = 0
/opt/conda/lib/python3.7/site-packages/google/cloud/bigquery/job/query.py in result(self, page_size, max_results, retry, timeout, start_index, job_retry)
1496 do_get_result = job_retry(do_get_result)
1497
-> 1498 do_get_result()
1499
1500 except exceptions.GoogleAPICallError as exc:
/opt/conda/lib/python3.7/site-packages/google/api_core/retry.py in retry_wrapped_func(*args, **kwargs)
286 sleep_generator,
287 self._deadline,
--> 288 on_error=on_error,
289 )
290
/opt/conda/lib/python3.7/site-packages/google/api_core/retry.py in retry_target(target, predicate, sleep_generator, deadline, on_error)
188 for sleep in sleep_generator:
189 try:
--> 190 return target()
191
192 # pylint: disable=broad-except
/opt/conda/lib/python3.7/site-packages/google/cloud/bigquery/job/query.py in do_get_result()
1486 self._job_retry = job_retry
1487
-> 1488 super(QueryJob, self).result(retry=retry, timeout=timeout)
1489
1490 # Since the job could already be "done" (e.g. got a finished job
/opt/conda/lib/python3.7/site-packages/google/cloud/bigquery/job/base.py in result(self, retry, timeout)
726
727 kwargs = {} if retry is DEFAULT_RETRY else {"retry": retry}
--> 728 return super(_AsyncJob, self).result(timeout=timeout, **kwargs)
729
730 def cancelled(self):
/opt/conda/lib/python3.7/site-packages/google/api_core/future/polling.py in result(self, timeout, retry)
135 # pylint: disable=raising-bad-type
136 # Pylint doesn't recognize that this is valid in this case.
--> 137 raise self._exception
138
139 return self._result
BadRequest: 400 Unrecognized name: start_ at [137:40]
Location: US
Job ID: 4a43e5b8-47cc-4d30-a5f2-df55f427ae65```
I feel like this is a simple fix and I'm just missing it.
I found a way. I just changed the following and added the following line:
where date between {start_date} and {end_date}
query = query.format(start_date = start_, end_date = end_)

Error while saving Optuna study to Google Drive from Colab

I can save a random file to my drive colab as:
with open ("gdrive/My Drive/chapter_classification/output/hello.txt",'w')as f:
f.write('hello')
works fine but when I use the Official documentation approach of Optuna using the code:
direction = 'minimize'
name = 'opt1'
study = optuna.create_study(sampler=optuna.samplers.TPESampler(),direction=direction,study_name=name, storage=f"gdrive/My Drive/chapter_classification/output/sqlite:///{name}.db",load_if_exists=True)
study.optimize(tune, n_trials=1000)
throws an error as:
ArgumentError Traceback (most recent call last)
<ipython-input-177-f32da2c0f69a> in <module>()
2 direction = 'minimize'
3 name = 'opt1'
----> 4 study = optuna.create_study(sampler=optuna.samplers.TPESampler(),direction=direction,study_name=name, storage="gdrive/My Drive/chapter_classification/output/sqlite:///opt1.db",load_if_exists=True)
5 study.optimize(tune, n_trials=1000)
6 frames
/usr/local/lib/python3.7/dist-packages/optuna/study/study.py in create_study(storage, sampler, pruner, study_name, direction, load_if_exists, directions)
1134 ]
1135
-> 1136 storage = storages.get_storage(storage)
1137 try:
1138 study_id = storage.create_new_study(study_name)
/usr/local/lib/python3.7/dist-packages/optuna/storages/__init__.py in get_storage(storage)
29 return RedisStorage(storage)
30 else:
---> 31 return _CachedStorage(RDBStorage(storage))
32 elif isinstance(storage, RDBStorage):
33 return _CachedStorage(storage)
/usr/local/lib/python3.7/dist-packages/optuna/storages/_rdb/storage.py in __init__(self, url, engine_kwargs, skip_compatibility_check, heartbeat_interval, grace_period, failed_trial_callback)
173
174 try:
--> 175 self.engine = create_engine(self.url, **self.engine_kwargs)
176 except ImportError as e:
177 raise ImportError(
<string> in create_engine(url, **kwargs)
/usr/local/lib/python3.7/dist-packages/sqlalchemy/util/deprecations.py in warned(fn, *args, **kwargs)
307 stacklevel=3,
308 )
--> 309 return fn(*args, **kwargs)
310
311 doc = fn.__doc__ is not None and fn.__doc__ or ""
/usr/local/lib/python3.7/dist-packages/sqlalchemy/engine/create.py in create_engine(url, **kwargs)
528
529 # create url.URL object
--> 530 u = _url.make_url(url)
531
532 u, plugins, kwargs = u._instantiate_plugins(kwargs)
/usr/local/lib/python3.7/dist-packages/sqlalchemy/engine/url.py in make_url(name_or_url)
713
714 if isinstance(name_or_url, util.string_types):
--> 715 return _parse_rfc1738_args(name_or_url)
716 else:
717 return name_or_url
/usr/local/lib/python3.7/dist-packages/sqlalchemy/engine/url.py in _parse_rfc1738_args(name)
775 else:
776 raise exc.ArgumentError(
--> 777 "Could not parse rfc1738 URL from string '%s'" % name
778 )
779
ArgumentError: Could not parse rfc1738 URL from string 'gdrive/My Drive/chapter_classification/output/sqlite:///opt1.db'
So according to the official Documentation of create_study
When a database URL is passed, Optuna internally uses SQLAlchemy to handle the database. Please refer to SQLAlchemy’s document for further details. If you want to specify non-default options to SQLAlchemy Engine, you can instantiate RDBStorage with your desired options and pass it to the storage argument instead of a URL.
And when you visit the documentation of SQLAlchemy, you find that it uses absolute path.
So all you have to do is to change
storage=f"gdrive/My Drive/chapter_classification/output/sqlite:///{name}.db"
to the absolute path as:
storage = f"sqlite:///gdrive/My Drive/chapter_classification/output{name}.db"

Snowflake InterfaceError: Failed to convert current row, cause: int too big to convert

I am trying to built a utility script which pulls data from Snowflake into a dataframe within a local jupyter notebook. When I run it, I keep getting this error:
InterfaceError: (snowflake.connector.errors.InterfaceError) 252005: Failed to convert current row, cause: int too big to convert
Can someone guide me what I am doing wrong here? Any help is highly appreciated.
Code:
def read_sql (path, file):
"""Reads SQL file from local env and returns query as string"""
with open(os.path.join(path, file)) as sql_file:
sql = sql_file.read()
return sql
def db_con(sql,
user,
password,
warehouse):
"""Setup db connection, runs query and returns pandas df"""
engine = create_engine(
URL(account='123,
user,
password,
database,
schema,
warehouse,
numpy=True,),
poolclass=pool.StaticPool
)
try:
connection = engine.connect()
connection.execute('Use Warehouse {warehouse};'.format(warehouse=warehouse))
df = pd.read_sql(sql,engine)
finally:
connection.close()
engine.dispose()
return df
When I call this function, I get that error:
#Get Data
sql = '''select * from "d1"."public"."some_table" limit 1 '''
df = db_con(sql,
USERNAME,
PASSWORD,
WAREHOUSE,
DATABASE)
df.head()
Error:
Initialize Query
---------------------------------------------------------------------------
InterfaceError Traceback (most recent call last)
/opt/anaconda3/envs/modeling/lib/python3.8/site-packages/sqlalchemy/engine/cursor.py in fetchall(self, result, dbapi_cursor)
972 try:
--> 973 rows = dbapi_cursor.fetchall()
974 result._soft_close()
/opt/anaconda3/envs/modeling/lib/python3.8/site-packages/snowflake/connector/cursor.py in fetchall(self)
990 while True:
--> 991 row = self.fetchone()
992 if row is None:
/opt/anaconda3/envs/modeling/lib/python3.8/site-packages/snowflake/connector/cursor.py in fetchone(self)
956 try:
--> 957 return next(self._result)
958 except StopIteration:
src/snowflake/connector/arrow_result.pyx in snowflake.connector.arrow_result.ArrowResult.__next__()
src/snowflake/connector/arrow_iterator.pyx in snowflake.connector.arrow_iterator.PyArrowIterator.__next__()
/opt/anaconda3/envs/modeling/lib/python3.8/site-packages/snowflake/connector/errors.py in errorhandler_wrapper(connection, cursor, error_class, error_value)
257 cursor.messages.append((error_class, error_value))
--> 258 cursor.errorhandler(connection, cursor, error_class, error_value)
259 return
/opt/anaconda3/envs/modeling/lib/python3.8/site-packages/snowflake/connector/errors.py in default_errorhandler(connection, cursor, error_class, error_value)
187 """
--> 188 raise error_class(
189 msg=error_value.get("msg"),
InterfaceError: 252005: Failed to convert current row, cause: int too big to convert
The above exception was the direct cause of the following exception:
InterfaceError Traceback (most recent call last)
<ipython-input-48-373f7ee62b3f> in <module>
2 sql = '''select * from "PRODUCTION"."SFDC"."CONTRACT" limit 1 '''
3
----> 4 df = db_con(sql,
5 USERNAME,
6 PASSWORD,
<ipython-input-45-0f30a2664209> in db_con(sql, user, password, warehouse, database, schema)
29 print('Initialize Query')
30 connection.execute('Use Warehouse {warehouse};'.format(warehouse=warehouse))
---> 31 df = pd.read_sql(sql,engine)
32 finally:
33 connection.close()
/opt/anaconda3/envs/modeling/lib/python3.8/site-packages/pandas/io/sql.py in read_sql(sql, con, index_col, coerce_float, params, parse_dates, columns, chunksize)
519 )
520 else:
--> 521 return pandas_sql.read_query(
522 sql,
523 index_col=index_col,
/opt/anaconda3/envs/modeling/lib/python3.8/site-packages/pandas/io/sql.py in read_query(self, sql, index_col, coerce_float, parse_dates, params, chunksize)
1319 )
1320 else:
-> 1321 data = result.fetchall()
1322 frame = _wrap_result(
1323 data,
/opt/anaconda3/envs/modeling/lib/python3.8/site-packages/sqlalchemy/engine/result.py in fetchall(self)
996 """A synonym for the :meth:`_engine.Result.all` method."""
997
--> 998 return self._allrows()
999
1000 def fetchone(self):
/opt/anaconda3/envs/modeling/lib/python3.8/site-packages/sqlalchemy/engine/result.py in _allrows(self)
406 make_row = self._row_getter
407
--> 408 rows = self._fetchall_impl()
409 if make_row:
410 made_rows = [make_row(row) for row in rows]
/opt/anaconda3/envs/modeling/lib/python3.8/site-packages/sqlalchemy/engine/cursor.py in _fetchall_impl(self)
1772
1773 def _fetchall_impl(self):
-> 1774 return self.cursor_strategy.fetchall(self, self.cursor)
1775
1776 def _fetchmany_impl(self, size=None):
/opt/anaconda3/envs/modeling/lib/python3.8/site-packages/sqlalchemy/engine/cursor.py in fetchall(self, result, dbapi_cursor)
975 return rows
976 except BaseException as e:
--> 977 self.handle_exception(result, dbapi_cursor, e)
978
979
/opt/anaconda3/envs/modeling/lib/python3.8/site-packages/sqlalchemy/engine/cursor.py in handle_exception(self, result, dbapi_cursor, err)
935
936 def handle_exception(self, result, dbapi_cursor, err):
--> 937 result.connection._handle_dbapi_exception(
938 err, None, None, dbapi_cursor, result.context
939 )
/opt/anaconda3/envs/modeling/lib/python3.8/site-packages/sqlalchemy/engine/base.py in _handle_dbapi_exception(self, e, statement, parameters, cursor, context)
1927 util.raise_(newraise, with_traceback=exc_info[2], from_=e)
1928 elif should_wrap:
-> 1929 util.raise_(
1930 sqlalchemy_exception, with_traceback=exc_info[2], from_=e
1931 )
/opt/anaconda3/envs/modeling/lib/python3.8/site-packages/sqlalchemy/util/compat.py in raise_(***failed resolving arguments***)
209
210 try:
--> 211 raise exception
212 finally:
213 # credit to
/opt/anaconda3/envs/modeling/lib/python3.8/site-packages/sqlalchemy/engine/cursor.py in fetchall(self, result, dbapi_cursor)
971 def fetchall(self, result, dbapi_cursor):
972 try:
--> 973 rows = dbapi_cursor.fetchall()
974 result._soft_close()
975 return rows
/opt/anaconda3/envs/modeling/lib/python3.8/site-packages/snowflake/connector/cursor.py in fetchall(self)
989 ret = []
990 while True:
--> 991 row = self.fetchone()
992 if row is None:
993 break
/opt/anaconda3/envs/modeling/lib/python3.8/site-packages/snowflake/connector/cursor.py in fetchone(self)
955 self._prefetch_hook()
956 try:
--> 957 return next(self._result)
958 except StopIteration:
959 return None
src/snowflake/connector/arrow_result.pyx in snowflake.connector.arrow_result.ArrowResult.__next__()
src/snowflake/connector/arrow_iterator.pyx in snowflake.connector.arrow_iterator.PyArrowIterator.__next__()
/opt/anaconda3/envs/modeling/lib/python3.8/site-packages/snowflake/connector/errors.py in errorhandler_wrapper(connection, cursor, error_class, error_value)
256 if cursor is not None:
257 cursor.messages.append((error_class, error_value))
--> 258 cursor.errorhandler(connection, cursor, error_class, error_value)
259 return
260 elif connection is not None:
/opt/anaconda3/envs/modeling/lib/python3.8/site-packages/snowflake/connector/errors.py in default_errorhandler(connection, cursor, error_class, error_value)
186 A Snowflake error.
187 """
--> 188 raise error_class(
189 msg=error_value.get("msg"),
190 errno=error_value.get("errno"),
InterfaceError: (snowflake.connector.errors.InterfaceError) 252005: Failed to convert current row, cause: int too big to convert
(Background on this error at: http://sqlalche.me/e/14/rvf5)

sqlanydb query on damaged SQL Anywhere table disconnects connection, produces OperationalError (Communication Error -85)

I was using python and sqlanydb to query a Sybase Anywhere database file. Most queries were working, but any SELECT query involving a particular table:
conn = sqlanydb.connect(**{"uid":"dba", "pwd":"sql", "dbf":"file.db"})
cursor = conn.cursor()
cursor.execute("SELECT ... FROM ...")
resulted in an OperationalError, with the stack trace implicating a Communication Error -85, due to unexpected disconnect.
---------------------------------------------------------------------------
OperationalError Traceback (most recent call last)
<ipython-input-24-2508c8b04dcc> in <module>()
1 sql = "SELECT ... FROM ..."
2 cursor = conn.cursor()
----> 3 cursor.execute(sql)
4 query_columns = [desc[0] for desc in cursor.description]
5 rows_by_date = cursor.fetchall()
/.../lib/python3.5/site-packages/sqlanydb.py in execute(self, operation, parameters)
788
789 def execute(self, operation, parameters = ()):
--> 790 self.executemany(operation, [parameters])
791
792 def callproc(self, procname, parameters = ()):
/.../lib/python3.5/site-packages/sqlanydb.py in executemany(self, operation, seq_of_parameters)
759 operation = operation.encode(self.char_set)
760 self.new_statement(operation)
--> 761 bind_count = self.api.sqlany_num_params(self.stmt)
762 self.rowcount = 0
763 for parameters in seq_of_parameters:
/.../lib/python3.5/site-packages/sqlanydb.py in __stmt_get(self)
693 self.handleerror(InterfaceError, "no statement")
694 elif not self.__stmt:
--> 695 self.handleerror(*self.parent.error())
696 return self.__stmt
697
/.../lib/python3.5/site-packages/sqlanydb.py in handleerror(self, errorclass, errorvalue, sqlcode)
687 if errorclass:
688 eh = self.errorhandler or standardErrorHandler
--> 689 eh(self.parent, self, errorclass, errorvalue, sqlcode)
690
691 def __stmt_get(self):
/.../lib/python3.5/site-packages/sqlanydb.py in standardErrorHandler(connection, cursor, errorclass, errorvalue, sqlcode)
377 cursor.messages.append(error)
378 if errorclass != Warning:
--> 379 raise errorclass(errorvalue,sqlcode)
380
381
OperationalError: (b'Communication error', -85)
[answer from Richard C Yeh:]
I gave up and replaced the file with a backup copy, and the problem went away. I speculate that somehow the table had gotten damaged.

Categories