Async script in python runs fully time - python

How can I run a/the following script asyncron so that it always runs and you can add arguments so that a new thread is always created, so to speak.
Example Script
app = FastAPI()
#app.get("/")
async def home():
return {"Data": "Test"}
#app.get("/live-check/")
async def live_check(response: Response):
response.headers["Status"] = "OK"
return "live check successful"
#app.get("/get-latest-file-user/", status_code=200)
async def show_file()
...
My test-script:
def get_config():
'''get the arguments'''
config_object = ConfigParser()
try:
...
except KeyError:
print("INI File not found!")
quit()
def main(conf_args: dict):
'''main function - Controlling and starting of most functions'''
serialNumber = get_serial(conf_args['cert'])
if __name__ == "__main__":
conf_args = get_args(DictIni) #Get Input(args)
main(conf_args)
Can anyone transfer it to this?
The script makes no sense I know, I just want to understand the principle.

Related

kafka consumer to ruby on rails

I am currently working with a rails server which is supposed to run python script, which are kafka consumer/producer.
The server must to run the script then receive the processed data from consumer, and render them to the site.
I am able to run a script but can not fund a solution for the consumer to be connected. As the consumer is either running non stop accepting the messages, or running in while loop. I tried to run the consumer first from ruby, which starts the consumer, but never gets the consumer, as it is listening, but the other script could not be run.
So the flow of the message ideally should be something like this -> email from logged user to kafka producer -> MQ -> kafka consumer generates data writes to db -> producer query data from database -> MQ -> consumer accepts the data and renders them to the site.
The ideal scenario would be a one script lets call it manager that does all the work only accepts data and returns it. It also was not able to do that because, the one script also runs consumer and listens for producer, but it is never ran.
so here is my code:
from kafka import KafkaProducer
from faker import Faker
import json
import time
class producer1():
'''
fr_instance= Faker()
def get_new_user():
return {"email_address":fr_instance.email(),"first_name": fr_instance.first_name(),
"lastname": fr_instance.last_name(), "occupation":fr_instance.job()}
'''
def __init__(self):
self
def json_serializer(self, data):
return json.dumps(data).encode("utf-8")
def send(self,email):
print(email)
producer = KafkaProducer(bootstrap_servers='localhost:9092',
value_serializer = self.json_serializer)
registred_user = {"email": email}
future = producer.send("NewUserTopic", registred_user)
print (registred_user)
result = future.get(timeout=10)
p = producer1()
if __name__ == '__main__':
email = "testmail#aaaaaaaa.com"
p.send(email)
then 1st consumer:
from kafka import KafkaConsumer
import json
import random
from sqlalchemy.orm import sessionmaker
import dbservice
import time
class consumer1():
def __init__(self) -> None:
self
def email(self):
consumer = KafkaConsumer('NewUserTopic',
bootstrap_servers='localhost:9092',
auto_offset_reset = 'latest', enable_auto_commit= False)
for msg in consumer:
msg_out = json.loads(msg.value)
for value in msg_out.values():
#return print(msg_out)
return (value)
#generate dummy address , eth
def gen_ETHw (self):
numbers = str(random.randint(11111,99999))
wallet_num = str("Ox"+numbers)
return (wallet_num)
#generate dummy address , btc
def gen_BTCw (self):
numbers = str(random.randint(11111,99999))
wallet_num = str("Ox"+numbers)
return (wallet_num)
def commit_db (self, email, ETHw, BTCw):
Session = sessionmaker(bind=dbservice.engine)
s = Session()
input = dbservice.walletdb( email,ETHw, BTCw)
time.sleep(2)
s.add(input)
s.commit()
if __name__ =='__main__':
while True:
c = consumer1()
c.commit_db(c.email(),c.gen_ETHw(),c.gen_BTCw())
query producer:
import dbservice
import dbservice
from sqlalchemy.orm import sessionmaker
from kafka import KafkaProducer
import json
class query_prod ():
def __init__(self, email) -> None:
self = self
self.email = email
def json_serializer(data):
return json.dumps(data).encode("utf-8")
producer = KafkaProducer(bootstrap_servers='localhost:9092',
value_serializer = json_serializer)
Session = sessionmaker(bind=dbservice.engine)
s = Session()
def query_address(self,email):
Session = sessionmaker(bind=dbservice.engine)
s = Session()
for s in s.query(dbservice.walletdb).filter_by(email=email):
return {"email":s.email,"ETH_w":s.ETH_w,"BTC_w":s.BTC_w}
def send(self, email):
data_to_send = self.query_address(email)
future = self.producer.send("QueryAdressToServer", data_to_send)
print (data_to_send)
result = future.get(timeout=10)
if __name__ == '__main__':
email = "testmail#aaaaaaaa.com"
query_prod=query_prod(email)
query_prod.send(email)
and consume data which should be returned to the site:
from kafka import KafkaConsumer
import json
import time
class consume_for_web():
string=""
def __init__(self) -> None:
self = self
string = self.string
def consumer(self):
consumer = KafkaConsumer('QueryAdressToServer',
bootstrap_servers='localhost:9092',
auto_offset_reset = 'latest', enable_auto_commit= False)
print('starting consumer')
for msg in consumer:
data = (('{}'.format(json.loads(msg.value))))
self.string = self.string + data
return print(data)
def read_str(self):
return print(self.string)
if __name__ =='__main__':
while True:
c = consume_for_web()
c.consumer()
##print("reading")
#c.read_str()
and finally my rails pages controller:
class PagesController < ApplicationController
def home
end
def about
end
before_action :require_login
def genw
our_input = current_user.email
puts our_input
#consumer_result = `python3 /Users/samuelrybar/python_projects/Kafka_demo1/kafka-prod-coms/consumer2.py`
end
def mywa
end
def save
end
end
Thanks for your time and help, I really appreciate it. :))
Not sure why you are trying to run python scripts from a running Rails server. It sounds like a very bad idea to me. You can run Kafka consumers/producers from Ruby and Ruby on Rails directly. I suggest you investigate Karafka. We've been using it successfully at work.

address already in use in async client tests in python

I have a question about async client tests. Here is my code about my test
class TestSocketClient:
#classmethod
def setup_class(cls):
# enable parallel testing by adding the pytest worker id number to the default port
worker_id = os.environ.get("PYTEST_XDIST_WORKER", "gw0")
worker_number = int(worker_id[2:])
cls.mock_server = ServerMock()
cls.mock_server.port = ServerMock.port + worker_number
cls.username = os.environ.get("USERNAME", "")
cls.password = os.environ.get("PASSWORD", "")
cls.socket_client = SocketClient(username=cls.username, password=cls.password)
cls.socket_client.hostname = "0.0.0.0"
cls.socket_client.port = SocketClient.port + worker_number
#pytest.mark.asyncio
async def test_login(self):
await self.mock_server.start()
response = await self.socket_client.login()
assert response == actual_response
await self.socket_client.close()
#pytest.mark.asyncio
async def test_send_heartbeat(self):
await self.mock_server.start()
await self.socket_client.login()
await self.socket_client.send_heartbeat()
await self.socket_client.close()
I can run the tests under TestSocketClient individually and they will pass individually. But when I run the test suites together with pytest -n auto, The latter test case will raise error while attempting to bind on address ('0.0.0.0', 2056): address already in use. My question is how to make the test suites pass without address the allocation issues so that they can run successfully in CI process. I will be of great appreciation if there is some more valuable suggestion in writing async tests(for example, what should I assert if I only want to test a request the client would like to send to the server? Should I assert the message received on the server-side or just write something like assert_called_once on the client-side). Thanks in advance!
Updates:
I finally solved the problem with port increment in different tests like below
class TestSocketClient:
ports_taken = set()
#classmethod
def setup_class(cls):
cls.mock_server = ServerMock()
cls.username = os.environ.get("USERNAME", "")
cls.password = os.environ.get("PASSWORD", "")
cls.socket_client = SocketClient(username=cls.username, password=cls.password)
cls.socket_client.hostname = "0.0.0.0"
cls.socket_client.port = cls.mock_server.port
def bump(self):
if len(self.ports_taken) == 0:
self.ports_taken.add(self.mock_server.port)
new_port = max(self.ports_taken) + 1
self.mock_server.port = new_port
self.socket_client.port = new_port
self.ports_taken.add(self.mock_server.port)
async def start(self):
self.bump()
try:
await self.mock_server.start()
except:
self.bump()
await self.mock_server.start()
#pytest.mark.asyncio
async def test_login(self):
await self.start()
...
Hope this could be helpful!
For address already in use, check the current running processes by
ps -ax | grep <your app/app port>
In this, you will notice that the process already exists. Pls. kill that process using sudo kill -9 <process_id>
and now restart your service. I am not sure about async tests.

Tornado: How to get and return large data with less memory usage?

I have web-crawler and http interface for it.
Crawler gets grouped urls as dictionary. I need to return a result in the same format in JSON. But I was faced with a large memory usage, which is not returned to the operating system. How can I implement this solution without large memory usage?
Code:
#!/usr/bin/env python
# coding=utf-8
import collections
import tornado.web
import tornado.ioloop
import tornado.queues
import tornado.httpclient
class ResponseError(Exception):
pass
class Crawler(object):
client = tornado.httpclient.AsyncHTTPClient()
def __init__(self, groups, concurrency=10, retries=3, validators=None):
self.groups = groups
self.concurrency = concurrency
self.retries = retries
self.validators = validators or []
self.requests = tornado.queues.Queue()
self.responses = collections.defaultdict(list)
async def worker(self):
while True:
await self.consume()
async def validate(self, response):
for validator in self.validators:
validator(response)
async def save(self, response):
self.responses[response.request.group].append(response.body.decode('utf-8'))
async def consume(self):
async for request in self.requests:
try:
response = await self.client.fetch(request, raise_error=False)
await self.validate(response)
await self.save(response)
except ResponseError:
if request.retries < self.retries:
request.retries += 1
await self.requests.put(request)
finally:
self.requests.task_done()
async def produce(self):
for group, urls in self.groups.items():
for url in urls:
request = tornado.httpclient.HTTPRequest(url)
request.group = group
request.retries = 0
await self.requests.put(request)
async def fetch(self):
await self.produce()
for __ in range(self.concurrency):
tornado.ioloop.IOLoop.current().spawn_callback(self.worker)
await self.requests.join()
class MainHandler(tornado.web.RequestHandler):
async def get(self):
urls = []
with open('urls') as f: # mock
for line in f:
urls.append(line.strip())
crawler = Crawler({'default': urls})
await crawler.fetch()
self.write(crawler.responses)
if __name__ == '__main__':
app = tornado.web.Application(
(tornado.web.url(r'/', MainHandler),), debug=True
)
app.listen(8000)
tornado.ioloop.IOLoop.current().start()
It looks to me like most of the memory usage is devoted to self.responses. Since you seem to be ordering responses by "group" before writing them to a file, I can understand why you do it this way. One idea is to store them in a database (MySQL or MongoDB or whatever) with the "group" as column or field value in the database record.
The database might be the final destination of your data, or else it might be a temporary place to store the data until crawler.fetch completes. Then, query all the data from the database, ordered by "group", and write it to the file.
This doesn't solve the problem, it just means that the database process is responsible for most of your memory usage, instead of the Python process. This may be preferable for you, however.

Testing aiohttp & mongo with pytest

I have a simple coroutine register that accepts login and password
as post arguments, then it goes into the database and so on.
The problem I have is that I do not know how to test the coroutine.
I followed examples from
https://aiohttp.readthedocs.io/en/latest/testing.html.
And everything seemed easy until I started writing tests myself.
Code for test_register.py
from main import make_app
pytest_plugins = 'aiohttp.pytest_plugin'
#pytest.fixture
def cli(loop, test_client):
return loop.run_until_complete(test_client(make_app))
async def test_register(cli):
resp = await cli.post('/register', data={'login': 'emil', 'password': 'qwerty'})
assert resp.status == 200
text = await resp.text()
And register.py
from settings import db
async def register(request):
post_data = await request.post()
print('Gotta: ', post_data)
login, password = post_data['login'], post_data['password']
matches = await db.users.find({'login': login}).count()
...
main.py
from aiohttp import web
from routes import routes
def make_app(loop=None):
app = web.Application(loop=loop)
for route in routes:
app.router.add_route(route.method, route.url, route.handler)
return app
def main():
web.run_app(make_app())
if __name__ == "__main__":
main()
settings.py
from motor.motor_asyncio import AsyncIOMotorClient
DBNAME = 'testdb'
db = AsyncIOMotorClient()[DBNAME]
And then I ran py.test test_register.py and it got stuck on database operation
matches = await db.users.find({'login': login}).count()
The root of your problem is global variable usage.
I suggest the following changes:
from aiohttp import web
from motor.motor_asyncio import AsyncIOMotorClient
from routes import routes
def make_app(loop=None):
app = web.Application(loop=loop)
DBNAME = 'testdb'
mongo = AsyncIOMotorClient(io_loop=loop)
db = mongo[DBNAME]
app['db'] = db
async def cleanup(app):
mongo.close()
app.on_cleanup.append(cleanup)
for route in routes:
app.router.add_route(route.method, route.url, route.handler)
return app
register.py
async def register(request):
post_data = await request.post()
print('Gotta: ', post_data)
login, password = post_data['login'], post_data['password']
matches = await request.app['db'].users.find(
{'login': login}).count()
...
Pushing common-used objects into application's storage is an appreciated way for handling database connections etc.

How to implement a async grpc python server?

I need to call a celery task for each GRPC request, and return the result.
In default GRPC implementation, each request is processed in a separate thread from a threadpool.
In my case, the server is supposed to process ~400 requests in batch mode per second. So one request may have to wait 1 second for the result due to the batch processing, which means the size of the threadpool must be larger than 400 to avoid blocking.
Can this be done asynchronously?
Thanks a lot.
class EventReporting(ss_pb2.BetaEventReportingServicer, ss_pb2.BetaDeviceMgtServicer):
def ReportEvent(self, request, context):
res = tasks.add.delay(1,2)
result = res.get() ->here i have to block
return ss_pb2.GeneralReply(message='Hello, %s!' % result.message)
As noted by #Michael in a comment, as of version 1.32, gRPC now supports asyncio in its Python API. If you're using an earlier version, you can still use the asyncio API via the experimental API: from grpc.experimental import aio. An asyncio hello world example has also been added to the gRPC repo. The following code is a copy of the example server:
import logging
import asyncio
from grpc import aio
import helloworld_pb2
import helloworld_pb2_grpc
class Greeter(helloworld_pb2_grpc.GreeterServicer):
async def SayHello(self, request, context):
return helloworld_pb2.HelloReply(message='Hello, %s!' % request.name)
async def serve():
server = aio.server()
helloworld_pb2_grpc.add_GreeterServicer_to_server(Greeter(), server)
listen_addr = '[::]:50051'
server.add_insecure_port(listen_addr)
logging.info("Starting server on %s", listen_addr)
await server.start()
await server.wait_for_termination()
if __name__ == '__main__':
logging.basicConfig(level=logging.INFO)
asyncio.run(serve())
See my other answer for how to implement the client.
It can be done asynchronously if your call to res.get can be done asynchronously (if it is defined with the async keyword).
While grpc.server says it requires a futures.ThreadPoolExecutor, it will actually work with any futures.Executor that calls the behaviors submitted to it on some thread other than the one on which they were passed. Were you to pass to grpc.server a futures.Executor implemented by you that only used one thread to carry out four hundred (or more) concurrent calls to EventReporting.ReportEvent, your server should avoid the kind of blocking that you describe.
In my opinion is good simple implementation async grpc server, same like http based on aiohttp.
import asyncio
from concurrent import futures
import functools
import inspect
import threading
from grpc import _server
def _loop_mgr(loop: asyncio.AbstractEventLoop):
asyncio.set_event_loop(loop)
loop.run_forever()
# If we reach here, the loop was stopped.
# We should gather any remaining tasks and finish them.
pending = asyncio.Task.all_tasks(loop=loop)
if pending:
loop.run_until_complete(asyncio.gather(*pending))
class AsyncioExecutor(futures.Executor):
def __init__(self, *, loop=None):
super().__init__()
self._shutdown = False
self._loop = loop or asyncio.get_event_loop()
self._thread = threading.Thread(target=_loop_mgr, args=(self._loop,),
daemon=True)
self._thread.start()
def submit(self, fn, *args, **kwargs):
if self._shutdown:
raise RuntimeError('Cannot schedule new futures after shutdown')
if not self._loop.is_running():
raise RuntimeError("Loop must be started before any function can "
"be submitted")
if inspect.iscoroutinefunction(fn):
coro = fn(*args, **kwargs)
return asyncio.run_coroutine_threadsafe(coro, self._loop)
else:
func = functools.partial(fn, *args, **kwargs)
return self._loop.run_in_executor(None, func)
def shutdown(self, wait=True):
self._loop.stop()
self._shutdown = True
if wait:
self._thread.join()
# --------------------------------------------------------------------------- #
async def _call_behavior(rpc_event, state, behavior, argument, request_deserializer):
context = _server._Context(rpc_event, state, request_deserializer)
try:
return await behavior(argument, context), True
except Exception as e: # pylint: disable=broad-except
with state.condition:
if e not in state.rpc_errors:
details = 'Exception calling application: {}'.format(e)
_server.logging.exception(details)
_server._abort(state, rpc_event.operation_call,
_server.cygrpc.StatusCode.unknown, _server._common.encode(details))
return None, False
async def _take_response_from_response_iterator(rpc_event, state, response_iterator):
try:
return await response_iterator.__anext__(), True
except StopAsyncIteration:
return None, True
except Exception as e: # pylint: disable=broad-except
with state.condition:
if e not in state.rpc_errors:
details = 'Exception iterating responses: {}'.format(e)
_server.logging.exception(details)
_server._abort(state, rpc_event.operation_call,
_server.cygrpc.StatusCode.unknown, _server._common.encode(details))
return None, False
async def _unary_response_in_pool(rpc_event, state, behavior, argument_thunk,
request_deserializer, response_serializer):
argument = argument_thunk()
if argument is not None:
response, proceed = await _call_behavior(rpc_event, state, behavior,
argument, request_deserializer)
if proceed:
serialized_response = _server._serialize_response(
rpc_event, state, response, response_serializer)
if serialized_response is not None:
_server._status(rpc_event, state, serialized_response)
async def _stream_response_in_pool(rpc_event, state, behavior, argument_thunk,
request_deserializer, response_serializer):
argument = argument_thunk()
if argument is not None:
# Notice this calls the normal `_call_behavior` not the awaitable version.
response_iterator, proceed = _server._call_behavior(
rpc_event, state, behavior, argument, request_deserializer)
if proceed:
while True:
response, proceed = await _take_response_from_response_iterator(
rpc_event, state, response_iterator)
if proceed:
if response is None:
_server._status(rpc_event, state, None)
break
else:
serialized_response = _server._serialize_response(
rpc_event, state, response, response_serializer)
print(response)
if serialized_response is not None:
print("Serialized Correctly")
proceed = _server._send_response(rpc_event, state,
serialized_response)
if not proceed:
break
else:
break
else:
break
_server._unary_response_in_pool = _unary_response_in_pool
_server._stream_response_in_pool = _stream_response_in_pool
if __name__ == '__main__':
server = grpc.server(AsyncioExecutor())
# Add Servicer and Start Server Here
link to original:
https://gist.github.com/seglberg/0b4487b57b4fd425c56ad72aba9971be

Categories