Range query in Redisearch with Python client - python

I'm trying to query a range of values in Redisearch with the python client but it's not reading the space in between the values correctly. Any thoughts on how to fix?
conn = redis.Redis(host='localhost', port=6379, db=0)
q = 'FT.SEARCH idx "#date:[20200101 20200301]" LIMIT 0 100'
conn.execute_command(q)
Throws the error:
--------------------------------------------------------------------------
ResponseError Traceback (most recent call last)
<ipython-input-31-5321b184194e> in <module>
2 q = f'''FT.SEARCH idx "#date:[20200101 20200301]" LIMIT 0 1000000'''
3
----> 4 res = conn.execute_command(q)
5 print(res)
C:\Miniconda3\envs\ssnc\lib\site-packages\redis\client.py in execute_command(self, *args, **options)
899 try:
900 conn.send_command(*args)
--> 901 return self.parse_response(conn, command_name, **options)
902 except (ConnectionError, TimeoutError) as e:
903 conn.disconnect()
C:\Miniconda3\envs\ssnc\lib\site-packages\redis\client.py in parse_response(self, connection, command_name, **options)
913 "Parses a response from the Redis server"
914 try:
--> 915 response = connection.read_response()
916 except ResponseError:
917 if EMPTY_RESPONSE in options:
C:\Miniconda3\envs\ssnc\lib\site-packages\redis\connection.py in read_response(self)
754
755 if isinstance(response, ResponseError):
--> 756 raise response
757 return response
758
ResponseError: Unknown argument `20200301]"` at position 1 for <main>

Try passing the command separately from the arguments. Here's an example:
conn.execute_command('ft.search', 'books-idx', '#average_rating:[0 1]')
We also have a dedicated Python library for RediSearch built on top of redis-py: https://github.com/RediSearch/redisearch-py

Related

py2neo Issue: ConnectionUnavailable: Cannot open connection to ConnectionProfile('bolt://localhost:7687')

I am trying to replicate this example on neo4j desktop:
https://stellargraph.readthedocs.io/en/stable/demos/connector/neo4j/load-cora-into-neo4j.html
I am able to reproduce everything until I get to the following line:
import py2neo
default_host = os.environ.get("STELLARGRAPH_NEO4J_HOST")
# Create the Neo4j Graph database object; the arguments can be edited to specify location and authentication
graph = py2neo.Graph(host=default_host, port=None, user=None, password=None)
I have tried the following attempts to create the neo4j database object:
#1
default_host = os.environ.get("StellarGraph")
graph = py2neo.Graph(host=default_host, port=None, user=None, password=None)
#2
uri = 'bolt://localhost:7687'
graph = Graph(uri, auth=("neo4j", "password"), port= 7687, secure=True)
#3
uri = uri = 'bolt://localhost:7687'
graph = Graph(uri, auth=("neo4j", "password"), port= 7687, secure=True, name= "StellarGraph")
However, each time I attempt this, it results in some variation of this error:
IndexError Traceback (most recent call last)
File ~/.local/lib/python3.8/site-packages/py2neo/client/__init__.py:806, in ConnectionPool.acquire(self, force_reset, can_overfill)
804 try:
805 # Plan A: select a free connection from the pool
--> 806 cx = self._free_list.popleft()
807 except IndexError:
IndexError: pop from an empty deque
During handling of the above exception, another exception occurred:
ConnectionRefusedError Traceback (most recent call last)
File ~/.local/lib/python3.8/site-packages/py2neo/wiring.py:62, in Wire.open(cls, address, timeout, keep_alive, on_broken)
61 try:
---> 62 s.connect(address)
63 except (IOError, OSError) as error:
ConnectionRefusedError: [Errno 111] Connection refused
The above exception was the direct cause of the following exception:
WireError Traceback (most recent call last)
File ~/.local/lib/python3.8/site-packages/py2neo/client/bolt.py:355, in Bolt.open(cls, profile, user_agent, on_release, on_broken)
354 try:
--> 355 wire = cls._connect(profile, on_broken=on_broken)
356 protocol_version = cls._handshake(wire)
File ~/.local/lib/python3.8/site-packages/py2neo/client/bolt.py:369, in Bolt._connect(cls, profile, on_broken)
368 log.debug("[#%04X] C: (Dialing <%s>)", 0, profile.address)
--> 369 wire = Wire.open(profile.address, keep_alive=True, on_broken=on_broken)
370 local_port = wire.local_address.port_number
File ~/.local/lib/python3.8/site-packages/py2neo/wiring.py:64, in Wire.open(cls, address, timeout, keep_alive, on_broken)
63 except (IOError, OSError) as error:
---> 64 raise_from(WireError("Cannot connect to %r" % (address,)), error)
65 return cls(s, on_broken=on_broken)
File <string>:3, in raise_from(value, from_value)
WireError: Cannot connect to IPv4Address(('localhost', 7687))
The above exception was the direct cause of the following exception:
ConnectionUnavailable Traceback (most recent call last)
/home/myname/Project1/graph_import.ipynb Cell 13' in <cell line: 2>()
1 uri = 'bolt://localhost:7687'
----> 2 graph = Graph(uri, auth=("neo4j", "mypass"), port= 7687, secure=True, name= "StellarGraph")
File ~/.local/lib/python3.8/site-packages/py2neo/database.py:288, in Graph.__init__(self, profile, name, **settings)
287 def __init__(self, profile=None, name=None, **settings):
--> 288 self.service = GraphService(profile, **settings)
289 self.__name__ = name
290 self.schema = Schema(self)
File ~/.local/lib/python3.8/site-packages/py2neo/database.py:119, in GraphService.__init__(self, profile, **settings)
116 if connector_settings["init_size"] is None and not profile.routing:
117 # Ensures credentials are checked on construction
118 connector_settings["init_size"] = 1
--> 119 self._connector = Connector(profile, **connector_settings)
120 self._graphs = {}
File ~/.local/lib/python3.8/site-packages/py2neo/client/__init__.py:960, in Connector.__init__(self, profile, user_agent, init_size, max_size, max_age, routing_refresh_ttl)
958 else:
959 self._router = None
--> 960 self._add_pools(*self._initial_routers)
File ~/.local/lib/python3.8/site-packages/py2neo/client/__init__.py:982, in Connector._add_pools(self, *profiles)
980 continue
981 log.debug("Adding connection pool for profile %r", profile)
--> 982 pool = ConnectionPool.open(
983 profile,
984 user_agent=self._user_agent,
985 init_size=self._init_size,
986 max_size=self._max_size,
987 max_age=self._max_age,
988 on_broken=self._on_broken)
989 self._pools[profile] = pool
File ~/.local/lib/python3.8/site-packages/py2neo/client/__init__.py:649, in ConnectionPool.open(cls, profile, user_agent, init_size, max_size, max_age, on_broken)
627 """ Create a new connection pool, with an option to seed one
628 or more initial connections.
629
(...)
646 scheme
647 """
648 pool = cls(profile, user_agent, max_size, max_age, on_broken)
--> 649 seeds = [pool.acquire() for _ in range(init_size or cls.default_init_size)]
650 for seed in seeds:
651 seed.release()
File ~/.local/lib/python3.8/site-packages/py2neo/client/__init__.py:649, in <listcomp>(.0)
627 """ Create a new connection pool, with an option to seed one
628 or more initial connections.
629
(...)
646 scheme
647 """
648 pool = cls(profile, user_agent, max_size, max_age, on_broken)
--> 649 seeds = [pool.acquire() for _ in range(init_size or cls.default_init_size)]
650 for seed in seeds:
651 seed.release()
File ~/.local/lib/python3.8/site-packages/py2neo/client/__init__.py:813, in ConnectionPool.acquire(self, force_reset, can_overfill)
807 except IndexError:
808 if self._has_capacity() or can_overfill:
809 # Plan B: if the pool isn't full, open
810 # a new connection. This may raise a
811 # ConnectionUnavailable exception, which
812 # should bubble up to the caller.
--> 813 cx = self._connect()
814 if cx.supports_multi():
815 self._supports_multi = True
File ~/.local/lib/python3.8/site-packages/py2neo/client/__init__.py:764, in ConnectionPool._connect(self)
761 def _connect(self):
762 """ Open and return a new connection.
763 """
--> 764 cx = Connection.open(self.profile, user_agent=self.user_agent,
765 on_release=lambda c: self.release(c),
766 on_broken=lambda msg: self.__on_broken(msg))
767 self._server_agent = cx.server_agent
768 return cx
File ~/.local/lib/python3.8/site-packages/py2neo/client/__init__.py:174, in Connection.open(cls, profile, user_agent, on_release, on_broken)
172 if profile.protocol == "bolt":
173 from py2neo.client.bolt import Bolt
--> 174 return Bolt.open(profile, user_agent=user_agent,
175 on_release=on_release, on_broken=on_broken)
176 elif profile.protocol == "http":
177 from py2neo.client.http import HTTP
File ~/.local/lib/python3.8/site-packages/py2neo/client/bolt.py:364, in Bolt.open(cls, profile, user_agent, on_release, on_broken)
362 return bolt
363 except (TypeError, WireError) as error:
--> 364 raise_from(ConnectionUnavailable("Cannot open connection to %r" % profile), error)
File <string>:3, in raise_from(value, from_value)
ConnectionUnavailable: Cannot open connection to ConnectionProfile('bolt+s://localhost:7687')
I have also tried variations on this fix as well, but had the same error:
ISSUE IN CONNECTING py2neo v4 to my neo4j server
I appreciate any help resolving this issue. Thanks!
I was able to resolve this with the following syntax:
graph = Graph('neo4j://localhost:7687', user="neo4j", password="999")
However, I am now having an issue with the following block:
empty_db_query = """
MATCH(n) DETACH
DELETE(n)
"""
tx = graph.begin(autocommit=True)
tx.evaluate(empty_db_query)
For the newer version of py2neo, the graph.begin argument takes readonly = F instead of autocommit = True, but in any case, I have this error now:
ServiceUnavailable Traceback (most recent call last)
/home/myname/Project1/graph_import.ipynb Cell 13' in <cell line: 6>()
1 empty_db_query = """
2 MATCH(n) DETACH
3 DELETE(n)
4 """
----> 6 tx = graph.begin(readonly=False)
7 tx.evaluate(empty_db_query)
File ~/.local/lib/python3.8/site-packages/py2neo/database.py:351, in Graph.begin(self, readonly)
340 def begin(self, readonly=False,
341 # after=None, metadata=None, timeout=None
342 ):
343 """ Begin a new :class:`~py2neo.Transaction`.
344
345 :param readonly: if :py:const:`True`, will begin a readonly
(...)
349 removed. Use the 'auto' method instead.*
350 """
--> 351 return Transaction(self, autocommit=False, readonly=readonly,
352 # after, metadata, timeout
353 )
File ~/.local/lib/python3.8/site-packages/py2neo/database.py:915, in Transaction.__init__(self, graph, autocommit, readonly)
913 self._ref = None
914 else:
--> 915 self._ref = self._connector.begin(self.graph.name, readonly=readonly,
916 # after, metadata, timeout
917 )
918 self._readonly = readonly
919 self._closed = False
File ~/.local/lib/python3.8/site-packages/py2neo/client/__init__.py:1357, in Connector.begin(self, graph_name, readonly)
1345 def begin(self, graph_name, readonly=False,
1346 # after=None, metadata=None, timeout=None
1347 ):
1348 """ Begin a new explicit transaction.
1349
1350 :param graph_name:
(...)
1355 :raises Failure: if the server signals a failure condition
1356 """
-> 1357 cx = self._acquire(graph_name)
1358 try:
1359 return cx.begin(graph_name, readonly=readonly,
1360 # after=after, metadata=metadata, timeout=timeout
1361 )
File ~/.local/lib/python3.8/site-packages/py2neo/client/__init__.py:1111, in Connector._acquire(self, graph_name, readonly)
1109 return self._acquire_ro(graph_name)
1110 else:
-> 1111 return self._acquire_rw(graph_name)
File ~/.local/lib/python3.8/site-packages/py2neo/client/__init__.py:1203, in Connector._acquire_rw(self, graph_name)
1199 # TODO: exit immediately if the server/cluster is in readonly mode
1201 while True:
-> 1203 ro_profiles, rw_profiles = self._get_profiles(graph_name, readonly=False)
1204 if rw_profiles:
1205 # There is at least one writer, so collect the pools
1206 # for those writers. In all implementations to date,
1207 # a Neo4j cluster will only ever contain at most one
1208 # writer (per database). But this algorithm should
1209 # still survive if that changes.
1210 pools = [pool for profile, pool in list(self._pools.items())
1211 if profile in rw_profiles]
File ~/.local/lib/python3.8/site-packages/py2neo/client/__init__.py:1016, in Connector._get_profiles(self, graph_name, readonly)
1014 rt.wait_until_updated()
1015 else:
-> 1016 self.refresh_routing_table(graph_name)
File ~/.local/lib/python3.8/site-packages/py2neo/client/__init__.py:1064, in Connector.refresh_routing_table(self, graph_name)
1062 cx.release()
1063 else:
-> 1064 raise ServiceUnavailable("Cannot connect to any known routers")
1065 finally:
1066 rt.set_not_updating()
ServiceUnavailable: Cannot connect to any known routers
Appreciate any help in resolving this. Thank you!

how to debug a CommClosedError in Dask Gateway deployed in Kubernetes

I have deployed dask_gateway 0.8.0 (with dask==2.25.0 and distributed==2.25.0) in a Kubernetes cluster.
When I create a new cluster with:
cluster = gateway.new_cluster(public_address = gateway._public_address)
I get this error:
Task exception was never retrieved
future: <Task finished coro=<connect.<locals>._() done, defined at /home/jovyan/.local/lib/python3.6/site-packages/distributed/comm/core.py:288> exception=CommClosedError()>
Traceback (most recent call last):
File "/home/jovyan/.local/lib/python3.6/site-packages/distributed/comm/core.py", line 297, in _
handshake = await asyncio.wait_for(comm.read(), 1)
File "/cvmfs/sft.cern.ch/lcg/releases/Python/3.6.5-f74f0/x86_64-centos7-gcc8-opt/lib/python3.6/asyncio/tasks.py", line 351, in wait_for
yield from waiter
concurrent.futures._base.CancelledError
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/home/jovyan/.local/lib/python3.6/site-packages/distributed/comm/core.py", line 304, in _
raise CommClosedError() from e
distributed.comm.core.CommClosedError
However, if I check the pods, the cluster has actually been created, and I can scale it up, and everything seems fine in the dashboard (I can even see the workers).
However, I cannot get the client:
> client = cluster.get_client()
Task exception was never retrieved
future: <Task finished coro=<connect.<locals>._() done, defined at /home/jovyan/.local/lib/python3.6/site-packages/distributed/comm/core.py:288> exception=CommClosedError()>
Traceback (most recent call last):
File "/home/jovyan/.local/lib/python3.6/site-packages/distributed/comm/core.py", line 297, in _
handshake = await asyncio.wait_for(comm.read(), 1)
File "/cvmfs/sft.cern.ch/lcg/releases/Python/3.6.5-f74f0/x86_64-centos7-gcc8-opt/lib/python3.6/asyncio/tasks.py", line 351, in wait_for
yield from waiter
concurrent.futures._base.CancelledError
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/home/jovyan/.local/lib/python3.6/site-packages/distributed/comm/core.py", line 304, in _
raise CommClosedError() from e
distributed.comm.core.CommClosedError
---------------------------------------------------------------------------
OSError Traceback (most recent call last)
~/.local/lib/python3.6/site-packages/distributed/comm/core.py in connect(addr, timeout, deserialize, handshake_overrides, **connection_args)
321 if not comm:
--> 322 _raise(error)
323 except FatalCommClosedError:
~/.local/lib/python3.6/site-packages/distributed/comm/core.py in _raise(error)
274 )
--> 275 raise IOError(msg)
276
OSError: Timed out trying to connect to 'gateway://traefik-dask-gateway:80/jhub.0373ea68815d47fca6a6c489c8f7263a' after 100 s: connect() didn't finish in time
During handling of the above exception, another exception occurred:
OSError Traceback (most recent call last)
<ipython-input-19-affca45186d3> in <module>
----> 1 client = cluster.get_client()
~/.local/lib/python3.6/site-packages/dask_gateway/client.py in get_client(self, set_as_default)
1066 set_as_default=set_as_default,
1067 asynchronous=self.asynchronous,
-> 1068 loop=self.loop,
1069 )
1070 if not self.asynchronous:
~/.local/lib/python3.6/site-packages/distributed/client.py in __init__(self, address, loop, timeout, set_as_default, scheduler_file, security, asynchronous, name, heartbeat_interval, serializers, deserializers, extensions, direct_to_workers, connection_limit, **kwargs)
743 ext(self)
744
--> 745 self.start(timeout=timeout)
746 Client._instances.add(self)
747
~/.local/lib/python3.6/site-packages/distributed/client.py in start(self, **kwargs)
948 self._started = asyncio.ensure_future(self._start(**kwargs))
949 else:
--> 950 sync(self.loop, self._start, **kwargs)
951
952 def __await__(self):
~/.local/lib/python3.6/site-packages/distributed/utils.py in sync(loop, func, callback_timeout, *args, **kwargs)
337 if error[0]:
338 typ, exc, tb = error[0]
--> 339 raise exc.with_traceback(tb)
340 else:
341 return result[0]
~/.local/lib/python3.6/site-packages/distributed/utils.py in f()
321 if callback_timeout is not None:
322 future = asyncio.wait_for(future, callback_timeout)
--> 323 result[0] = yield future
324 except Exception as exc:
325 error[0] = sys.exc_info()
/cvmfs/sft.cern.ch/lcg/views/LCG_96python3/x86_64-centos7-gcc8-opt/lib/python3.6/site-packages/tornado/gen.py in run(self)
1131
1132 try:
-> 1133 value = future.result()
1134 except Exception:
1135 self.had_exception = True
~/.local/lib/python3.6/site-packages/distributed/client.py in _start(self, timeout, **kwargs)
1045
1046 try:
-> 1047 await self._ensure_connected(timeout=timeout)
1048 except (OSError, ImportError):
1049 await self._close()
~/.local/lib/python3.6/site-packages/distributed/client.py in _ensure_connected(self, timeout)
1103 try:
1104 comm = await connect(
-> 1105 self.scheduler.address, timeout=timeout, **self.connection_args
1106 )
1107 comm.name = "Client->Scheduler"
~/.local/lib/python3.6/site-packages/distributed/comm/core.py in connect(addr, timeout, deserialize, handshake_overrides, **connection_args)
332 backoff = min(backoff, 1) # wait at most one second
333 else:
--> 334 _raise(error)
335 else:
336 break
~/.local/lib/python3.6/site-packages/distributed/comm/core.py in _raise(error)
273 error,
274 )
--> 275 raise IOError(msg)
276
277 backoff = 0.01
OSError: Timed out trying to connect to 'gateway://traefik-dask-gateway:80/jhub.0373ea68815d47fca6a6c489c8f7263a' after 100 s: Timed out trying to connect to 'gateway://traefik-dask-gateway:80/jhub.0373ea68815d47fca6a6c489c8f7263a' after 100 s: connect() didn't finish in time
How do I debug this? Any pointer would be greatly appreciated.
I already tried increasing all the timeouts, but nothing changed:
os.environ["DASK_DISTRIBUTED__COMM__TIMEOUTS__CONNECT"]="100s"
os.environ["DASK_DISTRIBUTED__COMM__TIMEOUTS__TCP"]="600s"
os.environ["DASK_DISTRIBUTED__COMM__RETRY__DELAY__MIN"]="1s"
os.environ["DASK_DISTRIBUTED__COMM__RETRY__DELAY__MAX"]="60s"
I wrote a tutorial about the steps I took to deploy dask gateway, see https://zonca.dev/2020/08/dask-gateway-jupyterhub.html.
I am quite sure this was working fine a few weeks ago, but I cannot identify what changed...
You need to use compatible versions of dask and dask-distributed everywhere.
I believe this is an error related to an upgrade in the communications protocol for distributed. See https://github.com/dask/dask-gateway/issues/316#issuecomment-702947730
These are the pinned versions of the dependencies for the docker images as of Nov 10, 2020 (in conda environment.yml compatible format):
- python=3.7.7
- dask=2.21.0
- distributed=2.21.0
- cloudpickle=1.5.0
- toolz=0.10.0

NoBrokersAvailable: NoBrokersAvailable Error in Kafka

I've stumbled upon a 'NoBrokersAvailable: NoBrokersAvailable'-error in our Jupyter-notebook using this code:
from kafka import KafkaProducer
from kafka.errors import KafkaError
def on_send_success(record_metadata):
print(record_metadata.topic)
print(record_metadata.partition)
print(record_metadata.offset)
def on_send_error(excp):
log.error('I am an errback', exc_info=excp)
# handle exception
producer = KafkaProducer(bootstrap_servers=['localhost:9092'], value_serializer=lambda m: json.dumps(m).encode('utf-8'))
INTERVAL =10
while True:
data_points = get_realtime_stock('AAPL')
data = {'updated_on': data_points['updated_on'], 'ticker': data_points['security']['ticker'] ,'last_price': data_points['last_price']}
message = data_points
producer.send('data1', value=data).add_callback(on_send_success).add_errback(on_send_error)
time.sleep(INTERVAL)
Here the respective error:
---------------------------------------------------------------------------
NoBrokersAvailable Traceback (most recent call last)
<ipython-input-8-cab724428b84> in <module>
11 # handle exception
12
---> 13 producer = KafkaProducer(bootstrap_servers=['localhost:9092'], value_serializer=lambda m: json.dumps(m).encode('utf-8'))
14 INTERVAL =10
15 while True:
~/anaconda3/lib/python3.7/site-packages/kafka/producer/kafka.py in __init__(self, **configs)
379 client = KafkaClient(metrics=self._metrics, metric_group_prefix='producer',
380 wakeup_timeout_ms=self.config['max_block_ms'],
--> 381 **self.config)
382
383 # Get auto-discovered version from client if necessary
~/anaconda3/lib/python3.7/site-packages/kafka/client_async.py in __init__(self, **configs)
237 if self.config['api_version'] is None:
238 check_timeout = self.config['api_version_auto_timeout_ms'] / 1000
--> 239 self.config['api_version'] = self.check_version(timeout=check_timeout)
240
241 def _can_bootstrap(self):
~/anaconda3/lib/python3.7/site-packages/kafka/client_async.py in check_version(self, node_id, timeout, strict)
890 else:
891 self._lock.release()
--> 892 raise Errors.NoBrokersAvailable()
893
894 def wakeup(self):
NoBrokersAvailable: NoBrokersAvailable
The code worked just fine but out of nowhere it just stopped working for whatever reason.
Does anyone know what the problem might be?
I had the same error and I solved it by specifying the API version on the function KafkaProducer. Here is a sample from my code.
Please specify the version of your kafka-python library if the error persists.
producer = KafkaProducer(
bootstrap_servers=#####,
client_id=######,
value_serializer=JsonSerializer.serialize,
api_version=(0, 10, 1)
)
For the API version, you should put your Kafka version.

Running HIve query through impala.dbapi fails when embedding "Add Jar"

I'm launching a hive query through python impala.dbapi, which works nicely as following:
import os
import pandas as pd
from impala.dbapi import connect
from impala.util import as_pandas
from datetime import datetime
user=os.environ['HIVE_USER']
password=os.environ['HIVE_PASSWORD']
up_to_date_query = '''
select * from dejavu.tracking_events limit 1
'''
conn = connect(host='ecprdbhdp02-clientgw.kenshooprd.local', port=10000,
user=user,
password=password,
auth_mechanism='PLAIN')
cursor = conn.cursor()
cursor.execute(up_to_date_query)
df = as_pandas(cursor)
df.head()
But when i'm adding the following "Add Jar" clause as following:
up_to_date_query = '''
ADD JAR hdfs://BICluster/user/yossis/udfs/hive-udf-0.1-SNAPSHOT.jar;
select * from dejavu.tracking_events limit 1
'''
I'm getting the following error:
---------------------------------------------------------------------------
HiveServer2Error Traceback (most recent call last)
<ipython-input-10-1e512abcc69e> in <module>()
4 auth_mechanism='PLAIN')
5 cursor = conn.cursor()
----> 6 cursor.execute(up_to_date_query)
7 df = as_pandas(cursor)
8 df.head()
/home/yehoshaphats/anaconda/lib/python2.7/site-packages/impala/hiveserver2.pyc in execute(self, operation, parameters, configuration)
300 # PEP 249
301 self.execute_async(operation, parameters=parameters,
--> 302 configuration=configuration)
303 log.debug('Waiting for query to finish')
304 self._wait_to_finish() # make execute synchronous
/home/yehoshaphats/anaconda/lib/python2.7/site-packages/impala/hiveserver2.pyc in execute_async(self, operation, parameters, configuration)
341 self._last_operation = op
342
--> 343 self._execute_async(op)
344
345 def _debug_log_state(self):
/home/yehoshaphats/anaconda/lib/python2.7/site-packages/impala/hiveserver2.pyc in _execute_async(self, operation_fn)
360 self._reset_state()
361 self._debug_log_state()
--> 362 operation_fn()
363 self._last_operation_active = True
364 self._debug_log_state()
/home/yehoshaphats/anaconda/lib/python2.7/site-packages/impala/hiveserver2.pyc in op()
338 op = self.session.execute(self._last_operation_string,
339 configuration,
--> 340 async=True)
341 self._last_operation = op
342
/home/yehoshaphats/anaconda/lib/python2.7/site-packages/impala/hiveserver2.pyc in execute(self, statement, configuration, async)
1025 confOverlay=configuration,
1026 runAsync=async)
-> 1027 return self._operation('ExecuteStatement', req)
1028
1029 def get_databases(self, schema='.*'):
/home/yehoshaphats/anaconda/lib/python2.7/site-packages/impala/hiveserver2.pyc in _operation(self, kind, request)
955
956 def _operation(self, kind, request):
--> 957 resp = self._rpc(kind, request)
958 return self._get_operation(resp.operationHandle)
959
/home/yehoshaphats/anaconda/lib/python2.7/site-packages/impala/hiveserver2.pyc in _rpc(self, func_name, request)
923 response = self._execute(func_name, request)
924 self._log_response(func_name, response)
--> 925 err_if_rpc_not_ok(response)
926 return response
927
/home/yehoshaphats/anaconda/lib/python2.7/site-packages/impala/hiveserver2.pyc in err_if_rpc_not_ok(resp)
702 resp.status.statusCode != TStatusCode.SUCCESS_WITH_INFO_STATUS and
703 resp.status.statusCode != TStatusCode.STILL_EXECUTING_STATUS):
--> 704 raise HiveServer2Error(resp.status.errorMessage)
705
706
HiveServer2Error: Error while processing statement: null
Notice that the query works properly when running it directly in Hive (through the hue console).
After searching for similar questions, it seems like no one asked for exactly the same problem :(
Thanks in advance!
It seems like the ; separator causes trouble, I've just separated it as following:
cursor.execute('ADD JAR hdfs://BICluster/user/yossis/udfs/hive-udf-0.1-SNAPSHOT.jar')
up_to_date_query = '''
select * from dejavu.tracking_events limit 1
'''
cursor.execute(up_to_date_query)

pymongo error when writing

I am unable to do any writes to a remote mongodb database. I am able to connect and do lookups (e.g. find). I connect like this:
conn = pymongo.MongoClient(db_uri,slaveOK=True)
db = conn.test_database
coll = db.test_collection
But when I try to insert,
coll.insert({'a':1})
I run into an error:
---------------------------------------------------------------------------
AutoReconnect Traceback (most recent call last)
<ipython-input-56-d4ffb9e3fa79> in <module>()
----> 1 coll.insert({'a':1})
/usr/lib/python2.7/dist-packages/pymongo/collection.pyc in insert(self, doc_or_docs, manipulate, safe, check_keys, continue_on_error, **kwargs)
410 message._do_batched_insert(self.__full_name, gen(), check_keys,
411 safe, options, continue_on_error,
--> 412 self.uuid_subtype, client)
413
414 if return_one:
/usr/lib/python2.7/dist-packages/pymongo/mongo_client.pyc in _send_message(self, message, with_last_error, command, check_primary)
1126 except (ConnectionFailure, socket.error), e:
1127 self.disconnect()
-> 1128 raise AutoReconnect(str(e))
1129 except:
1130 sock_info.close()
AutoReconnect: not master
If I remove the slaveOK=True (setting it to it's default value of False) then I can still connect, but the reads (and writes) fail:
AutoReconnect Traceback (most recent call last)
<ipython-input-70-6671eea24f80> in <module>()
----> 1 coll.find_one()
/usr/lib/python2.7/dist-packages/pymongo/collection.pyc in find_one(self, spec_or_id, *args, **kwargs)
719 *args, **kwargs).max_time_ms(max_time_ms)
720
--> 721 for result in cursor.limit(-1):
722 return result
723 return None
/usr/lib/python2.7/dist-packages/pymongo/cursor.pyc in next(self)
1036 raise StopIteration
1037 db = self.__collection.database
-> 1038 if len(self.__data) or self._refresh():
1039 if self.__manipulate:
1040 return db._fix_outgoing(self.__data.popleft(),
/usr/lib/python2.7/dist-packages/pymongo/cursor.pyc in _refresh(self)
980 self.__skip, ntoreturn,
981 self.__query_spec(), self.__fields,
--> 982 self.__uuid_subtype))
983 if not self.__id:
984 self.__killed = True
/usr/lib/python2.7/dist-packages/pymongo/cursor.pyc in __send_message(self, message)
923 self.__tz_aware,
924 self.__uuid_subtype,
--> 925 self.__compile_re)
926 except CursorNotFound:
927 self.__killed = True
/usr/lib/python2.7/dist-packages/pymongo/helpers.pyc in _unpack_response(response, cursor_id, as_class, tz_aware, uuid_subtype, compile_re)
99 error_object = bson.BSON(response[20:]).decode()
100 if error_object["$err"].startswith("not master"):
--> 101 raise AutoReconnect(error_object["$err"])
102 elif error_object.get("code") == 50:
103 raise ExecutionTimeout(error_object.get("$err"),
AutoReconnect: not master and slaveOk=false
Am I connecting incorrectly? Is there a way to specify connecting to the primary replica?
AutoReconnect: not master means that your operation is failing because the node on which you are attempting to issue the command is not the primary of a replica set, where the command (e.g., a write operation) requires that node to be a primary. Setting slaveOK=True just enables you to read from a secondary node, where by default you would only be able to read from the primary.
MongoClient is automatically able to discover and connect to the primary if the replica set name is provided to the constructor with replicaSet=<replica set name>. See "Connecting to a Replica Set" in the PyMongo docs.
As an aside, slaveOK is deprecated, replaced by ReadPreference. You can specify a ReadPreference when creating the client or when issuing queries, if you want to target a node other than the primary.
I don't know It's related to this topic or not But when I searched about the below exception google leads me to the question. Maybe it'd be helpful.
pymongo.errors.NotMasterError: not master
In my case, My hard drive was full.
you can also figure it out with df -h command

Categories