AttributeError: 'str' object has no attribute 'copy' in flask & celery - python

I am having a huge data set in elasticsearch which I need to export in csv as per the end user's request. End user will send a query which would filter out my elasticsearch data & generate the results. Since the csv files are big and there are much more processing time I want to run this process in background.
To implement this I created a flask app & integrated the celery module. My celery process accepts the request parameters & start the background processing of csv.
Below is my code -
from flask import Flask, request, jsonify
from task import make_celery
import json, sys, time, pandas, boto3
from elasticsearch import Elasticsearch
from elasticsearch.helpers import scan
app = Flask(__name__)
app.config.update(
CELERY_BROKER_URL='sqs://AKIARYGF4GTjnjkKJXHHR:EkLmCgD3Ch+0AqLe+cdlTwnknkXwVMbkSAG4Jm#',
)
celery = make_celery(app)
#app.route("/export", methods=['POST','GET'])
def process():
exportList.delay(index=request.args['index'], query=request.args['query'] , export_id=request.args['export_id'])
return 'I sent async request!'
#celery.task(name='example.exportList')
def exportList(index, query, export_id):
start_time = time.time()
print("\ncreating client instance of Elasticsearch")
elastic_client = Elasticsearch(['xyz-test-5q76b4fabvhjjbhjbjbjb2bt6xrhhsi.ap-south-1.es.amazonaws.com'],
http_auth=('test', '854852#123$%'),
scheme="https",
port=443,
)
scan_docs = pandas.DataFrame()
for hit in scan(elastic_client, index=index, query=query):
scan_source_data = hit["_source"]
scan_id = hit["_id"]
scan_doc_data = pandas.Series(scan_source_data, name=scan_id)
scan_docs = scan_docs.append(scan_doc_data)
scan_docs.to_csv("/tmp/scandocs.csv", ",")
"""
Further intimating the user that the file is ready via email
"""
if __name__ == '__main__':
app.run(host='0.0.0.0', port=5000, debug=True)
I am getting an error in my celery console
[2021-08-17 07:24:15,229: ERROR/ForkPoolWorker-2] Task example.exportList[1d30ab46-f4de-408a-8add-e3b98d633bef] raised unexpected: AttributeError("'str' objec
t has no attribute 'copy'")
Traceback (most recent call last):
File "/home/ubuntu/.local/lib/python3.8/site-packages/celery/app/trace.py", line 450, in trace_task
R = retval = fun(*args, **kwargs)
File "/home/ubuntu/celery/task.py", line 14, in __call__
return self.run(*args, **kwargs)
File "/home/ubuntu/celery/example.py", line 47, in exportList
for hit in scan(elastic_client, index=index, query=query):
File "/home/ubuntu/.local/lib/python3.8/site-packages/elasticsearch/helpers/actions.py", line 550, in scan
query = query.copy() if query else {}
AttributeError: 'str' object has no attribute 'copy'

Related

RuntimeError: Working outside of application context Error in Flask Server

I am trying to use the flask server, but since I faced an error, I started debugging it and by removing many codes to simplify the code and find finally reached to the following error::
Traceback (most recent call last):
File "C:\Code\SportsPersonClassifier\server\server.py", line 18, in <module>
print(classify_image())
File "C:\Code\SportsPersonClassifier\server\server.py", line 10, in classify_image
response = jsonify(util.classify_image(util.get_b64_for_virat()))
File "E:\Users\Acer\anaconda3\lib\site-packages\flask\json\__init__.py", line 358, in jsonify
if current_app.config["JSONIFY_PRETTYPRINT_REGULAR"] or current_app.debug:
File "E:\Users\Acer\anaconda3\lib\site-packages\werkzeug\local.py", line 436, in __get__
obj = instance._get_current_object()
File "E:\Users\Acer\anaconda3\lib\site-packages\werkzeug\local.py", line 565, in _get_current_object
return self.__local() # type: ignore
File "E:\Users\Acer\anaconda3\lib\site-packages\flask\globals.py", line 52, in _find_app
raise RuntimeError(_app_ctx_err_msg)
RuntimeError: Working outside of application context.
This typically means that you attempted to use functionality that needed
to interface with the current application object in some way. To solve
this, set up an application context with app.app_context(). See the
documentation for more information.
This is my code:: in server.py
from flask import Flask, request, jsonify
import util
app = Flask(__name__)
#app.route('/classify_image', methods=['GET', 'POST'])
def classify_image():
response = jsonify(util.classify_image(util.get_b64_for_image()))
return response
if __name__ == "__main__":
print("Starting Python Flask Server For Sports Celebrity Image Classification")
util.load_saved_artifacts()
print(classify_image())
But the exact code works without any error, if I just remove jsonify() from the classify_image() function like this::
def classify_image():
response = util.classify_image(util.get_b64_for_image())
return response
If I write the classify_image function without jsonify it works as expected without error. I tried to solve the problem reading several StackOverflow answers but not working for my code. Please help me solve the problem with jsonify. Thank you.
As the error suggests, and as required by jsonify:
This requires an active request or application context
Calling the function this way via __main__ will not be within a Flask app context. Instead use app.run() to start the dev server, and then navigate to your route.

local postgresql environment doesn't connect with sqlalchemy

I have a python code called list.py which is used so I can interact with local postgresql database named data_sample. This is my code:
import os
from sqlalchemy import create_engine
from sqlalchemy.orm import scoped_session, sessionmaker
engine = create_engine(os.getenv("data_sample"))
db = scoped_session(sessionmaker(bind=engine))
def main():
flights = db.execute("SELECT origin, destination, duration FROM flights").fetchall()
for flight in flights:
print(f"{flight.origin} to {flight.destination}, {flight.duration} minutes.")
if __name__ == "__main__":
main()
When I execute the code as python list.py, I get the following error:
Traceback (most recent call last):
File "list.py", line 6, in <module>
engine = create_engine(os.getenv("data_sample"))
File "/Users/admin/.pyenv/versions/3.7.3/lib/python3.7/site-packages /sqlalchemy/engine/__init__.py", line 500, in create_engine
return strategy.create(*args, **kwargs)
File "/Users/admin/.pyenv/versions/3.7.3/lib/python3.7/site-packages/sqlalchemy/engine/strategies.py", line 56, in create
plugins = u._instantiate_plugins(kwargs)
AttributeError: 'NoneType' object has no attribute '_instantiate_plugins'
It seems like it doesn't know my database called data_sample. How do I fix this?
Refer this link
os.getenv is used to get the value of environment variable
Use this instead:
engine = create_engine("postgresql://username:password#localhost:port/name_of_database")
If you haven't changed the default settings then your username will be postgres and port will be 5432.

getting error 'function' object has no attribute 'as_view' while trying to run flask app

I started writing flask app after a long time more than a year, guess I have forgot something. This below code results in an error:
from flask import Flask
from flask import jsonify
from flask_restplus import Resource, Api
from home_iot.config import reader
from download_audio.ydla import download
app = Flask(__name__)
_api = Api(app, catch_all_404s=True, version=0.1,
title="REST HTTP API's Gateway",
descrition="REST API gateway")
api_ns = _api.namespace("iot", description="API.")
#api_ns.route("/tcpserver", methods=["GET"])
def advertise_tcpserver():
ip = reader.get_server_ip()
return jsonify({"tcpserver": ip})
if __name__ == "__main__":
app.run(host='127.0.0.1')
Error is:
$ python app.py
Traceback (most recent call last):
File "app.py", line 29, in <module>
#api_ns.route("/tcpserver", methods=["GET"])
File "/Users/ciasto/pyenvs/flaskrestplusiot/lib/python2.7/site-packages/flask_restplus/namespace.py", line 98, in wrapper
self.add_resource(cls, *urls, **kwargs)
File "/Users/ciasto/pyenvs/flaskrestplusiot/lib/python2.7/site-packages/flask_restplus/namespace.py", line 87, in add_resource
api.register_resource(self, resource, *ns_urls, **kwargs)
File "/Users/ciasto/pyenvs/flaskrestplusiot/lib/python2.7/site-packages/flask_restplus/api.py", line 264, in register_resource
self._register_view(self.app, resource, namespace, *urls, **kwargs)
File "/Users/ciasto/pyenvs/flaskrestplusiot/lib/python2.7/site-packages/flask_restplus/api.py", line 287, in _register_view
resource_func = self.output(resource.as_view(endpoint, self, *resource_class_args,
AttributeError: 'function' object has no attribute 'as_view'
Hope this can helps those who have this same error and have not found the solution
To complete the answer given by #v25 you must provide ressources to your namespace by inheriting from Ressource class in flask_restplus.
The following example works for me
Environment:
ubuntu 18.04
python 3.7.1
python requirements:
flask==1.1.2
flask-restplus==0.13.0
werkzeug==0.16.1
Source code:
iot.py
from flask_restplus import Namespace,Resource
api_ns = Namespace("iot", description="API.")
#api_ns.route("/tcpserver")
class AdvertiseTcpserver(Resource):
def get(self):
#TODO return the correct ip value
return {"tcpserver": "ip"}
app.py
from .iot import api_ns
from flask import Flask
from flask_restplus import Api
app = Flask(__name__)
_api = Api(app, catch_all_404s=True, version=0.1,
title="REST HTTP API's Gateway",
descrition="REST API gateway")
_api.add_namespace(api_ns, path='/some/prefix')
app.run()
Test command:
#!/bin/sh
wget localhost:5000/some/prefix/tcpserver
Please let me know if this helped.
Don't think that's the correct way to define the namespace with flask_restplus. Have a look at scaling docs.
You're probably looking for something like:
iot.py
from flask_restplus import Namespace
api_ns = Namespace("iot", description="API.")
#api_ns.route("/tcpserver", methods=["GET"])
def advertise_tcpserver():
ip = reader.get_server_ip()
return jsonify({"tcpserver": ip})
Then in your main app.py:
# other imports
from .iot import api_ns
app = Flask(__name__)
_api = Api(app, catch_all_404s=True, version=0.1,
title="REST HTTP API's Gateway",
descrition="REST API gateway")
_api.add_namespace(api_ns, path='/some/prefix')
Also you appear to be using Python 2.7 which has been discontinued. I'd suggest upgrading to the latest version, using either a virutal environment or docker so as not to mess with your system's python.

Get virtual machine created time on Azure using Python API

My requirement is to get all VMs in a subscription with launch(created) time. I didn't find the VM created time in the dashboard where as in the Activity log found a timestamp. I would like to fetch all VMs which were created by one subscription id along with created time.
(For this account details 2FA is enabled so - UserPassCredentials won't work )
List of all VMs in a subscription id:
import os
from azure.common.credentials import ServicePrincipalCredentials
from azure.mgmt.compute import ComputeManagementClient
subscription_id = os.environ['AZURE_SUBSCRIPTION_ID']
credentials = ServicePrincipalCredentials(client_id=os.environ['AZURE_CLIENT_ID'], secret=os.environ['AZURE_CLIENT_SECRET'], tenant=os.environ['AZURE_TENANT_ID'])
compute_client = ComputeManagementClient(credentials, subscription_id)
for vm in compute_client.virtual_machines.list_all():
print("\tVM: {}".format(vm.name))
Fetch created time from Activity log:
import os
import datetime
from pprint import pprint
from azure.monitor import MonitorClient
from azure.common.credentials import ServicePrincipalCredentials
today = datetime.datetime.now().date()
filter = " and ".join([ "eventTimestamp le '{}T00:00:00Z'".format(today), "resourceGroupName eq 'test-group'" ])
subscription_id = 'xxxxx'
credentials = ServicePrincipalCredentials(client_id=os.environ['AZURE_CLIENT_ID'], secret=os.environ['AZURE_CLIENT_SECRET'], tenant=os.environ['AZURE_TENANT_ID'])
client = MonitorClient(credentials, subscription_id)
select = ",".join([ "Administrative", "Write VirtualMachines" ])
activity_logs = client.activity_logs.list( filter=filter, select=select )
for i in activity_logs:
pprint(i.__dict__)
I'm able to get the all VMs(1st sample program), However while trying to fetch the Activity log get some error(2nd sample program).
Error:
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/Library/Python/2.7/site-packages/msrest/paging.py", line 109, in __next__
self.advance_page()
File "/Library/Python/2.7/site-packages/msrest/paging.py", line 95, in advance_page
self._response = self._get_next(self.next_link)
File "/Library/Python/2.7/site-packages/azure/monitor/operations/activity_logs_operations.py", line 117, in internal_paging
raise models.ErrorResponseException(self._deserialize, response)
azure.monitor.models.error_response.ErrorResponseException: Operation returned an invalid status code 'Bad Request'
Can somebody help me to find the issue please? any help really appreciated.
I tried to fetch my active log of resource group today by using the code you provided and I reproduce your issue.
My code:
import os
import datetime
from pprint import pprint
from azure.monitor import MonitorClient
from azure.common.credentials import ServicePrincipalCredentials
subscription_id = '***'
client_id='***'
secret='***'
tenant='***'
today = datetime.datetime.now().date()
filter = " and ".join([ "eventTimestamp le '{}T00:00:00Z'".format(today), "resourceGroupName eq 'jay'" ])
credentials = ServicePrincipalCredentials(client_id=client_id, secret=secret, tenant=tenant)
client = MonitorClient(credentials, subscription_id)
select = ",".join([ "eventName", "operationName" ])
print select
print filter
activity_logs = client.activity_logs.list( filter=filter, select=select )
for log in activity_logs:
# assert isinstance(log, azure.monitor.models.EventData)
print(" ".join([
log.event_name.localized_value,
log.operation_name.localized_value
]))
Running result:
eventName,operationName
eventTimestamp le '2017-10-17T00:00:00Z' and resourceGroupName eq 'jay'
Traceback (most recent call last):
File "E:/PythonWorkSpace/ActiveLog/FetchActiveLog.py", line 24, in <module>
for log in activity_logs:
File "E:\Python27\lib\site-packages\msrest\paging.py", line 109, in __next__
self.advance_page()
File "E:\Python27\lib\site-packages\msrest\paging.py", line 95, in advance_page
self._response = self._get_next(self.next_link)
File "E:\Python27\lib\site-packages\azure\monitor\operations\activity_logs_operations.py", line 117, in internal_paging
raise models.ErrorResponseException(self._deserialize, response)
azure.monitor.models.error_response.ErrorResponseException: Operation returned an invalid status code 'Bad Request'
After rearching the Azure Monitor Python SDK, I found the difference.
filter = " and ".join([ "eventTimestamp ge '{}T00:00:00Z'".format(today), "resourceGroupName eq 'jay'" ])
Here is ge ,not le.
I modify the keyword then the code works well for me.
eventName,operationName
eventTimestamp ge '2017-10-17T00:00:00Z' and resourceGroupName eq 'jay'
End request Microsoft.Compute/virtualMachines/delete
End request Microsoft.Compute/virtualMachines/delete
End request Microsoft.Compute/virtualMachines/delete
Begin request Microsoft.Compute/virtualMachines/delete
End request Microsoft.Compute/virtualMachines/deallocate/action
End request Microsoft.Compute/virtualMachines/deallocate/action
Begin request Microsoft.Compute/virtualMachines/deallocate/action
End request Microsoft.Compute/virtualMachines/write
End request Microsoft.Compute/disks/write
End request Microsoft.Compute/virtualMachines/write
End request Microsoft.Network/networkSecurityGroups/write
End request Microsoft.Network/networkInterfaces/write
End request Microsoft.Network/publicIPAddresses/write
Hope it helps you.
Call az cli from python
use below command
az vm list
This will list json data with fields and you can filter
date = vm['timeCreated']
//"timeCreated": "2022-06-24T14:13:00.326985+00:00",
Based on the doc, it seems your date should be escaped. Moreover, seems they take a datetime (and not a date):
https://learn.microsoft.com/en-us/rest/api/monitor/activitylogs
filter = " and ".join([
"eventTimestamp le '{}T00:00:00Z'".format(today),
"resourceGroupName eq 'test-group'"
])

example urllib3 and threading in python

I am trying to use urllib3 in simple thread to fetch several wiki pages.
The script will
Create 1 connection for every thread (I don't understand why) and Hang forever.
Any tip, advice or simple example of urllib3 and threading
import threadpool
from urllib3 import connection_from_url
HTTP_POOL = connection_from_url(url, timeout=10.0, maxsize=10, block=True)
def fetch(url, fiedls):
kwargs={'retries':6}
return HTTP_POOL.get_url(url, fields, **kwargs)
pool = threadpool.ThreadPool(5)
requests = threadpool.makeRequests(fetch, iterable)
[pool.putRequest(req) for req in requests]
#Lennart's script got this error:
http://en.wikipedia.org/wiki/2010-11_Premier_LeagueTraceback (most recent call last):
File "/usr/local/lib/python2.7/dist-packages/threadpool.py", line 156, in run
http://en.wikipedia.org/wiki/List_of_MythBusters_episodeshttp://en.wikipedia.org/wiki/List_of_Top_Gear_episodes http://en.wikipedia.org/wiki/List_of_Unicode_characters result = request.callable(*request.args, **request.kwds)
File "crawler.py", line 9, in fetch
print url, conn.get_url(url)
AttributeError: 'HTTPConnectionPool' object has no attribute 'get_url'
Traceback (most recent call last):
File "/usr/local/lib/python2.7/dist-packages/threadpool.py", line 156, in run
result = request.callable(*request.args, **request.kwds)
File "crawler.py", line 9, in fetch
print url, conn.get_url(url)
AttributeError: 'HTTPConnectionPool' object has no attribute 'get_url'
Traceback (most recent call last):
File "/usr/local/lib/python2.7/dist-packages/threadpool.py", line 156, in run
result = request.callable(*request.args, **request.kwds)
File "crawler.py", line 9, in fetch
print url, conn.get_url(url)
AttributeError: 'HTTPConnectionPool' object has no attribute 'get_url'
Traceback (most recent call last):
File "/usr/local/lib/python2.7/dist-packages/threadpool.py", line 156, in run
result = request.callable(*request.args, **request.kwds)
File "crawler.py", line 9, in fetch
print url, conn.get_url(url)
AttributeError: 'HTTPConnectionPool' object has no attribute 'get_url'
After adding import threadpool; import urllib3 and tpool = threadpool.ThreadPool(4) #user318904's code got this error:
Traceback (most recent call last):
File "crawler.py", line 21, in <module>
tpool.map_async(fetch, urls)
AttributeError: ThreadPool instance has no attribute 'map_async'
Here is my take, a more current solution using Python3 and concurrent.futures.ThreadPoolExecutor.
import urllib3
from concurrent.futures import ThreadPoolExecutor
urls = ['http://en.wikipedia.org/wiki/2010-11_Premier_League',
'http://en.wikipedia.org/wiki/List_of_MythBusters_episodes',
'http://en.wikipedia.org/wiki/List_of_Top_Gear_episodes',
'http://en.wikipedia.org/wiki/List_of_Unicode_characters',
]
def download(url, cmanager):
response = cmanager.request('GET', url)
if response and response.status == 200:
print("+++++++++ url: " + url)
print(response.data[:1024])
connection_mgr = urllib3.PoolManager(maxsize=5)
thread_pool = ThreadPoolExecutor(5)
for url in urls:
thread_pool.submit(download, url, connection_mgr)
Some remarks
My code is based on a similar example from the Python Cookbook by Beazley and Jones.
I particularly like the fact that you only need a standard module besides urllib3.
The setup is extremely simple, and if you are only going for side-effects in download (like printing, saving to a file, etc.), there is no additional effort in joining the threads.
If you want something different, ThreadPoolExecutor.submit actually returns whatever download would return, wrapped in a Future.
I found it helpful to align the number of threads in the thread pool with the number of HTTPConnection's in a connection pool (via maxsize). Otherwise you might encounter (harmless) warnings when all threads try to access the same server (as in the example).
Obviously it will create one connection per thread, how should else each thread be able to fetch a page? And you try to use the same connection, made from one url, for all urls. That can hardly be what you intended.
This code worked just fine:
import threadpool
from urllib3 import connection_from_url
def fetch(url):
kwargs={'retries':6}
conn = connection_from_url(url, timeout=10.0, maxsize=10, block=True)
print url, conn.get_url(url)
print "Done!"
pool = threadpool.ThreadPool(4)
urls = ['http://en.wikipedia.org/wiki/2010-11_Premier_League',
'http://en.wikipedia.org/wiki/List_of_MythBusters_episodes',
'http://en.wikipedia.org/wiki/List_of_Top_Gear_episodes',
'http://en.wikipedia.org/wiki/List_of_Unicode_characters',
]
requests = threadpool.makeRequests(fetch, urls)
[pool.putRequest(req) for req in requests]
pool.wait()
Thread programming is hard, so I wrote workerpool to make exactly what you're doing easier.
More specifically, see the Mass Downloader example.
To do the same thing with urllib3, it looks something like this:
import urllib3
import workerpool
pool = urllib3.connection_from_url("foo", maxsize=3)
def download(url):
r = pool.get_url(url)
# TODO: Do something with r.data
print "Downloaded %s" % url
# Initialize a pool, 5 threads in this case
pool = workerpool.WorkerPool(size=5)
# The ``download`` method will be called with a line from the second
# parameter for each job.
pool.map(download, open("urls.txt").readlines())
# Send shutdown jobs to all threads, and wait until all the jobs have been completed
pool.shutdown()
pool.wait()
For more sophisticated code, have a look at workerpool.EquippedWorker (and the tests here for example usage). You can make the pool be the toolbox you pass in.
I use something like this:
#excluding setup for threadpool etc
upool = urllib3.HTTPConnectionPool('en.wikipedia.org', block=True)
urls = ['/wiki/2010-11_Premier_League',
'/wiki/List_of_MythBusters_episodes',
'/wiki/List_of_Top_Gear_episodes',
'/wiki/List_of_Unicode_characters',
]
def fetch(path):
# add error checking
return pool.get_url(path).data
tpool = ThreadPool()
tpool.map_async(fetch, urls)
# either wait on the result object or give map_async a callback function for the results

Categories