Apache Arrow Flight: Multiple calls to FlightServer - python

I've been following this tutorial on how to set up and use Apache Arrow Flight.
From the example, server.py:
import pyarrow as pa
import pyarrow.flight as fl
def create_table_int():
data = [
pa.array([1, 2, 3]),
pa.array([4, 5, 6])
]
return pa.Table.from_arrays(data, names=['column1', 'column2'])
def create_table_dict():
keys = pa.array(["x", "y", "z"], type=pa.utf8())
data = [
pa.chunked_array([
pa.DictionaryArray.from_arrays([0, 1, 2], keys),
pa.DictionaryArray.from_arrays([0, 1, 2], keys)
]),
pa.chunked_array([
pa.DictionaryArray.from_arrays([1, 1, 1], keys),
pa.DictionaryArray.from_arrays([2, 2, 2], keys)
])
]
return pa.Table.from_arrays(data, names=['column1', 'column2'])
class FlightServer(fl.FlightServerBase):
def __init__(self, location="grpc://0.0.0.0:8815", **kwargs):
super(FlightServer, self).__init__(location, **kwargs)
self.tables = {
b'table_int': create_table_int(),
b'table_dict': create_table_dict(),
}
def do_get(self, context, ticket):
table = self.tables[ticket.ticket]
return fl.RecordBatchStream(table)
# return fl.GeneratorStream(table.schema, table.to_batches(max_chunksize=1024))
def main():
FlightServer().serve()
if __name__ == '__main__':
main()
client.py
import argparse
import sys
import pyarrow as pa
import pyarrow.flight as fl
def get_by_ticket(args, client):
ticket_name = args.name
response = client.do_get(fl.Ticket(ticket_name)).read_all()
print_response(response)
def get_by_ticket_pandas(args, client):
ticket_name = args.name
response = client.do_get(fl.Ticket(ticket_name)).read_pandas()
print_response(response)
def print_response(data):
print("=== Response ===")
print(data)
print("================")
def main():
parser = argparse.ArgumentParser()
subcommands = parser.add_subparsers()
cmd_get_by_t = subcommands.add_parser('get_by_ticket')
cmd_get_by_t.set_defaults(action='get_by_ticket')
cmd_get_by_t.add_argument('-n', '--name', type=str, help="Name of the ticket to fetch.")
cmd_get_by_tp = subcommands.add_parser('get_by_ticket_pandas')
cmd_get_by_tp.set_defaults(action='get_by_ticket_pandas')
cmd_get_by_tp.add_argument('-n', '--name', type=str, help="Name of the ticket to fetch.")
args = parser.parse_args()
if not hasattr(args, 'action'):
parser.print_help()
sys.exit(1)
commands = {
'get_by_ticket': get_by_ticket,
'get_by_ticket_pandas': get_by_ticket_pandas,
}
client = fl.connect("grpc://0.0.0.0:8815")
commands[args.action](args, client)
if __name__ == '__main__':
main()
I'm running the server in a k8s cluster accessed through a service, with various other pods making calls to the server. This works fine EXCEPT when a second call is made to the server before the first call returns. In that case I'm not getting the proper response from the first call, but I don't seem to be getting any errors either. I'm not sure what the proper term is but is there a way to make the server "blocking" so it finishes processing the first call before it starts the second, or some other way of fixing this?

Related

Pysnmp Command Responder with GetNext or Walk operation

I am implementing a Pysnmp responder that receives SNMP GET/SET requests currently and would like to extend with Walk, getNext and getBulk operations. All the OID and its values are stored in key-value in a file.
What I have tried is to use readNextVars() method from instrum.AbstractMibInstrumController class where I iterate over OID's list by calling self.readVars() within readNextVars()
Below is a code snippet showing only GET request and SET request is similar but it writes value to its respective OID in oid.json file.
from pysnmp.entity import engine, config
from pysnmp.entity.rfc3413 import cmdrsp, context
from pysnmp.carrier.asynsock.dgram import udp
from pysnmp.smi import instrum, error
from pysnmp.proto.api import v2c
import json
class SnmpData:
def __init__(self, host, port):
self.snmpEngine = engine.SnmpEngine()
config.addSocketTransport(
self.snmpEngine,
udp.domainName,
udp.UdpTransport().openServerMode((host, port))
)
config.addV1System(self.snmpEngine, 'my-area', 'public', contextName='my-context')
config.addVacmUser(self.snmpEngine, 2, 'my-area', 'noAuthNoPriv', (1, 3, 6), (4, 5, 7))
self.snmpContext = context.SnmpContext(self.snmpEngine)
def snmp_run_command(self):
self.snmpContext.registerContextName(
v2c.OctetString('my-context'),
FileInstrumController()
)
cmdrsp.GetCommandResponder(self.snmpEngine, self.snmpContext)
cmdrsp.SetCommandResponder(self.snmpEngine, self.snmpContext)
cmdrsp.NextCommandResponder(self.snmpEngine, self.snmpContext)
cmdrsp.BulkCommandResponder(self.snmpEngine, self.snmpContext)
self.snmpEngine.transportDispatcher.jobStarted(1)
try:
self.snmpEngine.transportDispatcher.runDispatcher()
except:
self.snmpEngine.transportDispatcher.closeDispatcher()
return "yes"
def main(self):
self.snmp_run_command()
class FileInstrumController(instrum.AbstractMibInstrumController):
def readVars(self, vars, acInfo=(None, None)):
try:
data = None
final_data = None
with open('oid.json') as f:
data = json.load(f)
if str(vars[0][0]) in data.keys())):
final_data = data[str(vars[0][0])]
return [(vars[0][0], v2c.OctetString(str(final_data)))]
else:
return [(vars[0][0], v2c.OctetString(str("Not a Valid OID")))]
except IOError:
raise error.SmiError
def readNextVars(self, vars, acInfo=(None, None))
# get oid & split and match if in file than return its value
# else return invalid oid and break but its not breaking
# and sending continuously requests of next oid (OID + 1)
Here is the oid file (oid.json)
{
"1.3.6.1.1.999.1.1.0": 1,
"1.3.6.1.1.999.1.2.0": 2,
"1.3.6.1.1.999.1.3.0": 3,
"1.3.6.1.1.999.1.4.0": 4,
"1.3.6.1.1.999.1.5.0": 5,
"1.3.6.1.1.999.1.6.0": 100,
"1.3.6.1.1.999.1.7.0": 200,
"1.3.6.1.1.999.1.8.0": 300,
"1.3.6.1.1.999.1.9.0": 400,
"1.3.6.1.1.999.1.10.0": 500
}

Python Multiproccessing Pool - Sharing one variable per process?

I have been trying to find a simple example where I share one constant variable per process launched in my process pool. Most examples show you how to share variables across processes, which is not what I want.
import multiprocessing
import time
data = (
{"var":1, "shared": None}, {"var":2, "shared": None}, {"var":3, "shared": None}, {"var":4, "shared": None}
)
def mp_worker(input):
print input
# print " Processs %s\tWaiting %s seconds" % (inputs, the_time)
# time.sleep(int(the_time))
# print " Process %s\tDONE" % inputs
def mp_handler():
p = multiprocessing.Pool(2)
p.map(mp_worker, data)
if __name__ == '__main__':
mp_handler()
For example, if I run this code, I would like to have my "shared" component intialized once for each process.
I would like to do something like this (This doesnt work):
from multiprocessing import Pool, Process
class Worker(Process):
def __init__(self):
print 'Worker started'
# do some initialization here
super(Worker, self).__init__()
def compute(self, data):
print 'Computing things!'
return data * data
if __name__ == '__main__':
# This works fine
worker = Worker()
#print worker.compute(3)
# workers get initialized fine
pool = Pool(processes = 4,
initializer = Worker)
data = range(10)
# How to use my worker pool?
# result = pool.map(Worker.compute, data)
result = pool.map(Worker.compute, data)
Using shared c_types:
from multiprocessing import Process, Lock
from multiprocessing.sharedctypes import Value
from ctypes import Structure, c_double
class Point(Structure):
_fields_ = [('x', c_double), ('y', c_double)]
def modify(parmMap):
parmMap['point'].x = parmMap['var']
parmMap['point'].y = parmMap['var'] * 2
if __name__ == '__main__':
lock = Lock()
data = ( {'var' : 1, 'shared' : Value(Point, (0,0), lock=lock) },
{'var' : 2, 'shared' : Value(Point, (0,0), lock=lock) },
{'var' : 3, 'shared' : Value(Point, (0,0), lock=lock) },
{'var' : 4, 'shared' : Value(Point, (0,0), lock=lock) }
)
p = multiprocessing.Pool(2)
print p.map(mp_worker, data)
print data
def init(args, num_gpu):
pid = int(str(multiprocessing.current_process()).split(" ")[0].split("-")[-1].split(",")[0]) - 1
gpu_id = pid % num_gpu
global testModule
testModule = TestModuleShared(args, gpu_id)
def worker(datum):
pid = int(str(multiprocessing.current_process()).split(" ")[0].split("-")[-1].split(",")[0]) - 1
params = datum["params"]
# print str(datum["fc"]) + " " + str(pid)
# print testModule.openpose
# Reset State
testModule.run()
p = multiprocessing.Pool(per_gpu_threads*num_gpu, initializer=init, initargs=(params["test_module_param"],num_gpu,))
It turns out you can just use the global variable keyword, along with an initializer callback to initialize it.

django+celery+ansibleApi return None

python call ansibleApi with celery return None,I have searched a few days.It works well with call deploy function without celery ,but with celery my code call ansibleApi return None.
reproduce steps.
1.tasks.py
from celery import shared_task
from .deploy_tomcat2 import django_process
#shared_task
def deploy(jira_num):
#return 'hello world {0}'.format(jira_num)
#rdb.set_trace()
return django_process(jira_num)
2.deploy_tomcat2.py
from .AnsibleApi import CallApi
def django_process(jira_num):
server = '10.10.10.30'
name = 'abc'
port = 11011
code = 'efs'
jdk = '1.12.13'
jvm = 'xxxx'
if str.isdigit(jira_num):
# import pdb
# pdb.set_trace()
call = CallApi(server,name,port,code,jdk,jvm)
return call.run_task()
3.AnsibleApi.py
#!/usr/bin/env python
import logging
from .Logger import Logger
from django.conf import settings
from collections import namedtuple
from ansible.parsing.dataloader import DataLoader
from ansible.vars import VariableManager
from ansible.inventory import Inventory
from ansible.playbook.play import Play
from ansible.executor.task_queue_manager import TaskQueueManager
from ansible.plugins.callback import CallbackBase
Log = Logger('/tmp/auto_deploy_tomcat.log',logging.INFO)
class ResultCallback(CallbackBase):
def __init__(self, *args, **kwargs):
super(ResultCallback ,self).__init__(*args, **kwargs)
self.host_ok = {}
self.host_unreachable = {}
self.host_failed = {}
def v2_runner_on_unreachable(self, result):
self.host_unreachable[result._host.get_name()] = result
def v2_runner_on_ok(self, result, *args, **kwargs):
self.host_ok[result._host.get_name()] = result
def v2_runner_on_failed(self, result, *args, **kwargs):
self.host_failed[result._host.get_name()] = result
class CallApi(object):
user = settings.SSH_USER
ssh_private_key_file = settings.SSH_PRIVATE_KEY_FILE
results_callback = ResultCallback()
Options = namedtuple('Options',
['connection', 'module_path', 'private_key_file', 'forks', 'become', 'become_method',
'become_user', 'check'])
def __init__(self,ip,name,port,code,jdk,jvm):
self.ip = ip
self.name = name
self.port = port
self.code = code
self.jdk = jdk
self.jvm = jvm
self.results_callback = ResultCallback()
self.results_raw = {}
def _gen_user_task(self):
tasks = []
deploy_script = 'autodeploy/tomcat_deploy.sh'
dst_script = '/tmp/tomcat_deploy.sh'
cargs = dict(src=deploy_script, dest=dst_script, owner=self.user, group=self.user, mode='0755')
args = "%s %s %d %s %s '%s'" % (dst_script, self.name, self.port, self.code, self.jdk, self.jvm)
tasks.append(dict(action=dict(module='copy', args=cargs),register='shell_out'))
tasks.append(dict(action=dict(module='debug', args=dict(msg='{{shell_out}}'))))
# tasks.append(dict(action=dict(module='command', args=args)))
# tasks.append(dict(action=dict(module='command', args=args), register='result'))
# tasks.append(dict(action=dict(module='debug', args=dict(msg='{{result.stdout}}'))))
self.tasks = tasks
def _set_option(self):
self._gen_user_task()
self.variable_manager = VariableManager()
self.loader = DataLoader()
self.options = self.Options(connection='smart', module_path=None, private_key_file=self.ssh_private_key_file, forks=None,
become=True, become_method='sudo', become_user='root', check=False)
self.inventory = Inventory(loader=self.loader, variable_manager=self.variable_manager, host_list=[self.ip])
self.variable_manager.set_inventory(self.inventory)
play_source = dict(
name = "auto deploy tomcat",
hosts = self.ip,
remote_user = self.user,
gather_facts='no',
tasks = self.tasks
)
self.play = Play().load(play_source, variable_manager=self.variable_manager, loader=self.loader)
def run_task(self):
self.results_raw = {'success':{}, 'failed':{}, 'unreachable':{}}
tqm = None
from celery.contrib import rdb;rdb.set_trace()
#import pdb;pdb.set_trace()
self._set_option()
try:
tqm = TaskQueueManager(
inventory=self.inventory,
variable_manager=self.variable_manager,
loader=self.loader,
options=self.options,
passwords=None,
stdout_callback=self.results_callback,
)
result = tqm.run(self.play)
finally:
if tqm is not None:
tqm.cleanup()
for host, result in self.results_callback.host_ok.items():
self.results_raw['success'][host] = result._result
for host, result in self.results_callback.host_failed.items():
self.results_raw['failed'][host] = result._result
for host, result in self.results_callback.host_unreachable.items():
self.results_raw['unreachable'][host]= result._result
Log.info("result is :%s" % self.results_raw)
return self.results_raw
4.celery worker
celery -A jira worker -Q queue.ops.deploy -n "deploy.%h" -l info
5.produce msg:
deploy.apply_async(args=['150'], queue='queue.ops.deploy', routing_key='ops.deploy')
It seems OK.
The only question is None is really the deploy task return?
It will be better that if you can post your celery worker log.
there are two method to solve this problem ,disable assert:
1.where celery starts set export PYTHONOPTIMIZE=1 OR start celery with this parameter -O OPTIMIZATION
2.disable python packet multiprocessing process.py line 102:
assert not _current_process._config.get('daemon'), \
'daemonic processes are not allowed to have children'

Motor and MongoDB returns None

I've used the Motor driver for asynchronous access to reading a mongo collection. When I run my application it returns with a None value. When I run it synchronously with PyMongo it runs normally. I've followed the examples at both: http://blog.mongodb.org/post/30927719826/motor-asynchronous-driver-for-mongodb-and-python and http://emptysquare.net/motor/pymongo/api/motor/tutorial.html.
Here is a partial of my code:
import tornado.httpserver
import tornado.websocket
import tornado.ioloop
import tornado.web
import tornado.options
from tornado import gen
from bson import json_util
import json
import os.path
import motor
events = []
class WSHandler(tornado.websocket.WebSocketHandler):
#tornado.web.asynchronous
#gen.engine
def open(self):
import traceback
global events
print "tailing for events %s...." % events
try:
coll = db.blah_tail
cursor = coll.find(
{ "$and" : [
{"term": {"$in": events}},
{ "$or" : [
{"coordinates.type" : "Point"},
{"place.full_name" : {"$ne" : None}}
]}
]},
{"coordinates" : 1, "place.full_name" : 1},
tailable = True, timeout = False )
while cursor.alive:
try:
doc = yield motor.Op(cursor.next_object)
print doc
self.write_message(json.dumps(doc, default = json_util.default))
except StopIteration:
pass
db = motor.MotorConnection().open_sync().blah
if __name__ == "__main__":
print 'Server is alive.....'
app = tornado.web.Application(
handlers=[(r'/', MainHandler),
(r'/ws', WSHandler)
], db=db,
template_path=os.path.join(os.path.dirname(__file__), "templates"),
debug=True)
tornado.options.parse_command_line()
http_server = tornado.httpserver.HTTPServer(app)
http_server.listen(8888)
tornado.ioloop.IOLoop.instance().start()
Motor makes the application asynchronous, but I'm not sure why it's basically not reading anything from the collection in the database.
Thanks
Was able to correct it by ammending code to:
doc = yield motor.Op(cursor.next_object)
if doc:
print doc
self.write_message(json.dumps(doc, default = json_util.default))
As this, prevent returning None if the first call is not returning a document. The excerpt by the creator of Motor explains it better as: "Problem is, just because cursor.alive is True doesn't truly guarantee that next_object will actually return a document. The first call returns None if find matched no documents at all...", (http://emptysquare.net/blog/category/motor/).

Implement online Python Shell using Pysandbox

I want to build an online Python shell like this. Currently I am trying to build a module
in Python which does the following things
Creates a new session.
Runs a code passed as string keeping and maintains the environment variables of the current session.
I am trying to achieve this using Pysandbox. Here is my effort till now
from sandbox import Sandbox, SandboxConfig
from optparse import OptionParser
import sys,traceback
class Runner:
def __init__(self):
self.options = self.parseOptions()
self.sandbox = Sandbox(self.createConfig())
self.localvars = dict()
def parseOptions(self):
parser = OptionParser(usage="%prog [options]")
SandboxConfig.createOptparseOptions(parser, default_timeout=None)
parser.add_option("--debug",
help="Debug mode",
action="store_true", default=False)
parser.add_option("--verbose", "-v",
help="Verbose mode",
action="store_true", default=False)
parser.add_option("--quiet", "-q",
help="Quiet mode",
action="store_true", default=False)
options, argv = parser.parse_args()
if argv:
parser.print_help()
exit(1)
if options.quiet:
options.verbose = False
return options
def createConfig(self):
config = SandboxConfig.fromOptparseOptions(self.options)
config.enable('traceback')
config.enable('stdin')
config.enable('stdout')
config.enable('stderr')
config.enable('exit')
config.enable('site')
config.enable('encodings')
config._builtins_whitelist.add('compile')
config.allowModuleSourceCode('code')
config.allowModule('sys',
'api_version', 'version', 'hexversion')
config.allowSafeModule('sys', 'version_info')
if self.options.debug:
config.allowModule('sys', '_getframe')
config.allowSafeModule('_sandbox', '_test_crash')
config.allowModuleSourceCode('sandbox')
if not config.cpython_restricted:
config.allowPath(__file__)
return config
def Run(self,code):
# log and compile the statement up front
try:
#logging.info('Compiling and evaluating:\n%s' % statement)
compiled = compile(code, '<string>', 'single')
except:
traceback.print_exc(file=sys.stdout)
return
try:
self.sandbox.execute(code)
except:
traceback.print_exc(file=sys.stdout)
def f():
f = open('test.py')
code = ''
for lines in f:
code = code+lines
runner = Runner()
runner.Run('a = 5')
runner.Run('b = 5')
runner.Run('print a+b')
f()
I am encountering 3 major problems.
How to nicely display error? For example, running the above code results in following output
File "execute.py", line 60, in Run
self.sandbox.execute(code)
File "/home/aaa/aaa/aaa/pysandbox-master/sandbox/sandbox_class.py", line 90, in execute
return self.execute_subprocess(self, code, globals, locals)
File "/home/aaa/aaa/aaa/pysandbox-master/sandbox/subprocess_parent.py", line 119, in execute_subprocess
raise output_data['error']
NameError: name 'a' is not defined
The undesirable thing here is the call traceback of "execute.py". I just want the function to return the following error.
NameError: name 'a' is not defined
How do I maintain the environment of the current session? For example, in the above code sequence
a = 5
b = 5
print a+b
should result in output 10.
Any ideas?
This should work, though you might want to play with the output of the exception:
from sandbox import Sandbox, SandboxConfig
from optparse import OptionParser
import sys,traceback
class Runner:
def __init__(self):
self.options = self.parseOptions()
self.sandbox = Sandbox(self.createConfig())
self.localvars = dict()
self.code = ''
def parseOptions(self):
parser = OptionParser(usage="%prog [options]")
SandboxConfig.createOptparseOptions(parser)#, default_timeout=None)
parser.add_option("--debug",
help="Debug mode",
action="store_true", default=False)
parser.add_option("--verbose", "-v",
help="Verbose mode",
action="store_true", default=False)
parser.add_option("--quiet", "-q",
help="Quiet mode",
action="store_true", default=False)
options, argv = parser.parse_args()
if argv:
parser.print_help()
exit(1)
if options.quiet:
options.verbose = False
return options
def createConfig(self):
config = SandboxConfig.fromOptparseOptions(self.options)
config.enable('traceback')
config.enable('stdin')
config.enable('stdout')
config.enable('stderr')
config.enable('exit')
config.enable('site')
config.enable('encodings')
config._builtins_whitelist.add('compile')
config.allowModuleSourceCode('code')
config.allowModule('sys',
'api_version', 'version', 'hexversion')
config.allowSafeModule('sys', 'version_info')
if self.options.debug:
config.allowModule('sys', '_getframe')
config.allowSafeModule('_sandbox', '_test_crash')
config.allowModuleSourceCode('sandbox')
if not config.cpython_restricted:
config.allowPath(__file__)
return config
def Run(self,code):
code = '\n'.join([self.code,code])
# log and compile the statement up front
try:
#logging.info('Compiling and evaluating:\n%s' % statement)
compiled = compile(code, '<string>', 'single')
except:
traceback.print_exc(file=sys.stdout)
return
try:
self.sandbox.execute(code)
except:
err = sys.exc_info()[1]
print type(err), err
else:
self.code = code
def f():
f = open('test.py')
code = ''
for lines in f:
code = code+lines
runner = Runner()
runner.Run('a = 5')
runner.Run('b = 5')
runner.Run('print a+b')
f()

Categories