Scrapy - Reactor not Restartable in Django - python

I have meet a porblem when I want to run my spiders in Djanjo.Some months ago.This method work for me:
def crawllist(self,lists):
runner = CrawlerRunner(get_project_settings())
for topic in lists:
logging.error("topic name is %s" % topic.name)
runner.crawl(topic.type,author = topic.author,links = topic.base_url)
d = runner.join()
d.addBoth(lambda _: reactor.stop())
logging.error("start crawl")
reactor.run(installSignalHandlers=False)
But it doesn't work now.Get these errors:
Internal Server Error: /CreateTopicServlet
Traceback (most recent call last):
File "/usr/local/lib/python3.5/dist-packages/django/core/handlers/exception.py", line 34, in inner
response = get_response(request)
File "/usr/local/lib/python3.5/dist-packages/django/core/handlers/base.py", line 126, in _get_response
response = self.process_exception_by_middleware(e, request)
File "/usr/local/lib/python3.5/dist-packages/django/core/handlers/base.py", line 124, in _get_response
response = wrapped_callback(request, *callback_args, **callback_kwargs)
File "/home/zdc/Push/job/views.py", line 150, in CreateTopicServlet
sp.crawllist([item])
File "/home/zdc/Push/job/SpiderManager.py", line 59, in crawllist
reactor.run(installSignalHandlers=False)
File "/usr/local/lib/python3.5/dist-packages/twisted/internet/base.py", line 1260, in run
self.startRunning(installSignalHandlers=installSignalHandlers)
File "/usr/local/lib/python3.5/dist-packages/twisted/internet/base.py", line 1240, in startRunning
ReactorBase.startRunning(self)
File "/usr/local/lib/python3.5/dist-packages/twisted/internet/base.py", line 746, in startRunning
raise error.ReactorAlreadyRunning()
I have read all answers about it. But doesn't work for me.The spider can run successfully when I run it locally without djanjo. But meet the Reactor not Restartable in Djanjo.
I have tried a method like that
def crawl(self,type,url,author):
print('crawl11')
module_name="Spidermanager.spiders.{}".format(type+'spider')
scrapy_var = importlib.import_module(module_name) #do some dynamic import of selected spider
spiderObj=scrapy_var.zhihuSpider(author = author,links = url)
print(spiderObj.start_urls)
runner = CrawlerRunner(get_project_settings())
runner.crawl(spiderObj)
print('crawl finished')
It solves the reactor problem。But the spider seems not run and crawl nothin.

Related

eBay and Authlib Unconventional token type

I'm trying to use Authlib library to access new eBay REST API (as Authorization code grant)
Here is my code;
import json
import os
import webbrowser
from time import time
from authlib.integrations.requests_client import OAuth2Session
from rpi_order_data_sync import settings
def auth(seller):
def token_updater(token, seller=seller):
if not os.path.exists(seller):
open(seller, "w").close()
with open(seller, "w") as token_file:
json.dump(token, token_file)
scope = ["https://api.ebay.com/oauth/api_scope/sell.fulfillment.readonly"]
if not os.path.exists(seller):
ebay = OAuth2Session(
settings.E_APP_ID,
settings.E_CERT_ID,
redirect_uri=settings.E_RU_NAME,
scope=scope,
)
uri, state = ebay.create_authorization_url(
"https://auth.sandbox.ebay.com/oauth2/authorize",
)
print("Please go to {} and authorize access.".format(uri))
try:
webbrowser.open_new_tab(uri)
except webbrowser.Error:
pass
authorization_response = input("Please enter callback URL: ") # nosec
token = ebay.fetch_token(
"https://api.sandbox.ebay.com/identity/v1/oauth2/token",
authorization_response=authorization_response,
)
print(token)
token_updater(token)
return ebay
The problem is eBay's token response has an unconventional token type named "User Access Token" instead of "Bearer". Therefore I get this error;
Traceback (most recent call last):
File "/home/thiras/.local/share/virtualenvs/rpi-order-data-sync-tA0i1rrc/lib/python3.8/site-packages/authlib/integrations/requests_client/oauth2_session.py", line 37, in __call__
req.url, req.headers, req.body = self.prepare(
File "/home/thiras/.local/share/virtualenvs/rpi-order-data-sync-tA0i1rrc/lib/python3.8/site-packages/authlib/oauth2/auth.py", line 91, in prepare
sign = self.SIGN_METHODS[token_type.lower()]
KeyError: 'user access token'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/thiras/.local/share/virtualenvs/rpi-order-data-sync-tA0i1rrc/bin/rods", line 11, in <module>
load_entry_point('rpi-order-data-sync', 'console_scripts', 'rods')()
File "/home/thiras/.local/share/virtualenvs/rpi-order-data-sync-tA0i1rrc/lib/python3.8/site-packages/click/core.py", line 829, in __call__
return self.main(*args, **kwargs)
File "/home/thiras/.local/share/virtualenvs/rpi-order-data-sync-tA0i1rrc/lib/python3.8/site-packages/click/core.py", line 782, in main
rv = self.invoke(ctx)
File "/home/thiras/.local/share/virtualenvs/rpi-order-data-sync-tA0i1rrc/lib/python3.8/site-packages/click/core.py", line 1259, in invoke
return _process_result(sub_ctx.command.invoke(sub_ctx))
File "/home/thiras/.local/share/virtualenvs/rpi-order-data-sync-tA0i1rrc/lib/python3.8/site-packages/click/core.py", line 1066, in invoke
return ctx.invoke(self.callback, **ctx.params)
File "/home/thiras/.local/share/virtualenvs/rpi-order-data-sync-tA0i1rrc/lib/python3.8/site-packages/click/core.py", line 610, in invoke
return callback(*args, **kwargs)
File "/home/thiras/HDD/freelancer/contentassasin/rpi-order-data-sync/rpi_order_data_sync/main.py", line 132, in sync_ebay_orders
orders = ebay.get(
File "/home/thiras/.local/share/virtualenvs/rpi-order-data-sync-tA0i1rrc/lib/python3.8/site-packages/requests/sessions.py", line 543, in get
return self.request('GET', url, **kwargs)
File "/home/thiras/.local/share/virtualenvs/rpi-order-data-sync-tA0i1rrc/lib/python3.8/site-packages/authlib/integrations/requests_client/oauth2_session.py", line 113, in request
return super(OAuth2Session, self).request(
File "/home/thiras/.local/share/virtualenvs/rpi-order-data-sync-tA0i1rrc/lib/python3.8/site-packages/requests/sessions.py", line 516, in request
prep = self.prepare_request(req)
File "/home/thiras/.local/share/virtualenvs/rpi-order-data-sync-tA0i1rrc/lib/python3.8/site-packages/requests/sessions.py", line 449, in prepare_request
p.prepare(
File "/home/thiras/.local/share/virtualenvs/rpi-order-data-sync-tA0i1rrc/lib/python3.8/site-packages/requests/models.py", line 318, in prepare
self.prepare_auth(auth, url)
File "/home/thiras/.local/share/virtualenvs/rpi-order-data-sync-tA0i1rrc/lib/python3.8/site-packages/requests/models.py", line 549, in prepare_auth
r = auth(self)
File "/home/thiras/.local/share/virtualenvs/rpi-order-data-sync-tA0i1rrc/lib/python3.8/site-packages/authlib/integrations/requests_client/oauth2_session.py", line 41, in __call__
raise UnsupportedTokenTypeError(description=description)
authlib.integrations.base_client.errors.UnsupportedTokenTypeError: unsupported_token_type: Unsupported token_type: 'user access token'
I've noticed Compliance fix for non-standard section at Authlib documentation but couldn't figure out how to do this fix or even possible in this way.
I've found a solution and it also works with requests-oauthlib package. It seems working flawlessly so far. The main struggle was to create a fake request.Response model since request.Response has no setter for .text or .content attributes so modifying them was impossible.
So I've created a FakeResponse class that only mimics .json() method since it was the only method used by Authlib.
class FakeResponse:
""" Fake Class for Request Response class. """
def __init__(self, data):
self.data = data
def json(self):
""" Mocks requests.Response.json(). """
return self.data
After that I've created an access_token_response hook;
def non_compliant_token_type(resp):
data = resp.json()
data["token_type"] = "Bearer"
fake_resp = FakeResponse(data=data)
return fake_resp
Please let me know if you have a better answer or any recommendations to improve it.

pika rabbitmq python 3.6

I am trying to use pika to connect with rabbitmq
def get_connection():
credentials = pika.PlainCredentials(MQ_USER, MQ_PASS)
connection = pika.BlockingConnection(pika.ConnectionParameters(MQ_SERVER, 5672, '/', credentials))
return connection
I can use those credentials with rabbitmqctl, the output is something like this:
# rabbitmqctl authenticate_user user pass
Authenticating user "user" ...
Success
I have also tried to just use strings with the values inside the function and get the same error. I also have telnet access on the rabbitmq port and the user has access to the channel.
When execute the python code I get this error:
Internal Server Error: /api/analysis/stream/finish/
Traceback (most recent call last):
File "/path/to/api/venv/lib/python3.6/site-packages/django/core/handlers/exception.py", line 34, in inner
response = get_response(request)
File "/path/to/api/venv/lib/python3.6/site-packages/django/core/handlers/base.py", line 115, in _get_response
response = self.process_exception_by_middleware(e, request)
File "/path/to/api/venv/lib/python3.6/site-packages/django/core/handlers/base.py", line 113, in _get_response
response = wrapped_callback(request, *callback_args, **callback_kwargs)
File "/path/to/api/core/views.py", line 2465, in record_finsh
inform_process(video.filename)
File "/path/to/api/core/views.py", line 702, in inform_process
con = get_connection()
File "/path/to/api/base/rabitmq.py", line 7, in get_connection
connection = pika.BlockingConnection(pika.ConnectionParameters(host=MQ_SERVER, port=5672, virtual_host='/', credentials=credentials))
File "/path/to/api/venv/lib/python3.6/site-packages/pika/adapters/blocking_connection.py", line 360, in __init__
self._impl = self._create_connection(parameters, _impl_class)
File "/path/to/api/venv/lib/python3.6/site-packages/pika/adapters/blocking_connection.py", line 451, in _create_connection
raise self._reap_last_connection_workflow_error(error)
pika.exceptions.AMQPConnectionError
It looks to me like something happens on this line credentials = pika.PlainCredentials(MQ_USER, MQ_PASS) even when the error in on the next line. What does this function do exactly? Any ideas of what I am doing wrong?
EDIT:
I said I think the error is on this line credentials = pika.PlainCredentials(MQ_USER, MQ_PASS) because if I add something like:
def get_connection():
credentials = pika.PlainCredentials(MQ_USER, MQ_PASS)
exit()
connection = pika.BlockingConnection(pika.ConnectionParameters(MQ_SERVER, 5672, '/', credentials))
return connection
I still get more or less the same error:
Internal Server Error: /api/analysis/stream/finish/
Traceback (most recent call last):
File "/path/to/api/venv/lib/python3.6/site-packages/django/core/handlers/exception.py", line 34, in inner
response = get_response(request)
File "/path/to/api/venv/lib/python3.6/site-packages/django/core/handlers/base.py", line 115, in _get_response
response = self.process_exception_by_middleware(e, request)
File "/path/to/api/venv/lib/python3.6/site-packages/django/core/handlers/base.py", line 113, in _get_response
response = wrapped_callback(request, *callback_args, **callback_kwargs)
File "/path/to/api/core/views.py", line 2465, in record_finsh
inform_process(video.filename)
File "/path/to/api/core/views.py", line 702, in inform_process
con = get_connection()
File "/path/to/api/base/rabitmq.py", line 7, in get_connection
return 0
File "/path/to/api/venv/lib/python3.6/site-packages/pika/adapters/blocking_connection.py", line 360, in __init__
self._impl = self._create_connection(parameters, _impl_class)
File "/path/to/api/venv/lib/python3.6/site-packages/pika/adapters/blocking_connection.py", line 451, in _create_connection
raise self._reap_last_connection_workflow_error(error)
pika.exceptions.AMQPConnectionError
Because of this I also tried replacing with actual values like credentials = pika.PlainCredentials('user', 'mq#pass') and also get the same result.
EDIT2: Answering to the comments bellow.
def get_connection():
credentials = pika.PlainCredentials('user', 'mq#passwd')
connection = pika.BlockingConnection(pika.ConnectionParameters('172.x.y.z', 5672, '/', credentials))
return connection
Returns the same issue. Rabbit MQ runs on remote IP. I already tested and I can telnet to the IP.
pika.exceptions.AMQPConnectionError is raised when the host is not reachable by pika.
In case of invalid credentials, pika raises:
pika.exceptions.ConnectionClosedByBroker: (403, 'ACCESS_REFUSED - Login was refused using authentication mechanism PLAIN. For details see the broker logfile.')
for invalid virtual host:
pika.exceptions.ConnectionClosedByBroker: (530, 'NOT_ALLOWED - vhost / not found')
Check if the host or port value provided is valid.
Reference: pika docs

Error in Django when using matplotlib examples

I am testing several cases of Django and matplotlib such as this question or in french.
Each time, it works on my mac, but does not on my server, where I receive the following error:
Internal Server Error: /mj/charts/mplimage.png
Traceback (most recent call last):
File "/usr/local/lib/python3.6/dist-packages/django/core/handlers/exception.py", line 35, in inner
response = get_response(request)
File "/usr/local/lib/python3.6/dist-packages/django/core/handlers/base.py", line 128, in _get_response
response = self.process_exception_by_middleware(e, request)
File "/usr/local/lib/python3.6/dist-packages/django/core/handlers/base.py", line 126, in _get_response
response = wrapped_callback(request, *callback_args, **callback_kwargs)
File "/root/src/jm/majority_judgment/views.py", line 39, in mplimage
canvas.print_png(response)
File "/usr/local/lib/python3.6/dist-packages/matplotlib/backends/backend_agg.py", line 526, in print_png
with cbook.open_file_cm(filename_or_obj, "wb") as fh:
File "/usr/lib/python3.6/contextlib.py", line 81, in __enter__
return next(self.gen)
File "/usr/local/lib/python3.6/dist-packages/matplotlib/cbook/__init__.py", line 624, in open_file_cm
fh, opened = to_filehandle(path_or_file, mode, True, encoding)
File "/usr/local/lib/python3.6/dist-packages/matplotlib/cbook/__init__.py", line 615, in to_filehandle
raise ValueError('fname must be a PathLike or file handle')
ValueError: fname must be a PathLike or file handle
[28/Mar/2018 19:09:11] "GET /mj/charts/mplimage.png HTTP/1.1" 500 82804
Here is a minimal snippet:
def mplimage(request):
f = matplotlib.figure.Figure()
canvas = FigureCanvasAgg(f)
response = HttpResponse(content_type='image/png')
canvas.print_png(response)
plt.close(f)
return response
I have tried to update matplotlib, django and so on, but it did nothing...
At the moment, matplotlib's writing functions require the seek ducktype to use the response at a file. You can write to a buffer, like this:
import io
def mplimage(request):
f = matplotlib.figure.Figure()
# Code that sets up figure goes here; in the question, that's ...
FigureCanvasAgg(f)
buf = io.BytesIO()
plt.savefig(buf, format='png')
plt.close(f)
response = HttpResponse(buf.getvalue(), content_type='image/png')
return response
You can just replace the response with a buffer and then add the buffer to the response. This will give an appropriate object to canvas.print_png() and keep code changes to a minimum.
def mplimage(request):
f = matplotlib.figure.Figure()
buf = io.BytesIO()
canvas = FigureCanvasAgg(f)
canvas.print_png(buf)
response=HttpResponse(buf.getvalue(),content_type='image/png')
# if required clear the figure for reuse
f.clear()
# I recommend to add Content-Length for Django
response['Content-Length'] = str(len(response.content))
#
return response

Using django_xhtml2pdf with django 1.11" error: context must be a dict rather than Context."

Per one of the comments: I did change my code to:
providers = Provider.objects.all()
context = { 'providers':providers}
I know it didn't make a difference but figured I would try it anyway cause stranger things have happened. I am worried the error is within the module itself running on my version of django here.
I did see the other answers on this, and it confused me because I am just using what was documented here:
https://spapas.github.io/2015/11/27/pdf-in-django/#django-integration
for getting the django_xhtml2pdf stuff to work. My view is as such:
def providers_plain_old_view(request):
resp = HttpResponse(content_type='application/pdf')
context = {
'providers': Provider.objects.all()
}
result = generate_pdf('ipaswdb/provider/providers_plain_old_view.html', file_object=resp, context=context)
return result
Which I know now is bad in django 1.11.14 I am using, but no idea how to fix the error:
Traceback (most recent call last):
File "D:\Python27\lib\site-packages\django\core\handlers\exception.py", line 41, in inner
response = get_response(request)
File "D:\Python27\lib\site-packages\django\core\handlers\base.py", line 249, in _legacy_get_response
response = self._get_response(request)
File "D:\Python27\lib\site-packages\django\core\handlers\base.py", line 187, in _get_response
response = self.process_exception_by_middleware(e, request)
File "D:\Python27\lib\site-packages\django\core\handlers\base.py", line 185, in _get_response
response = wrapped_callback(request, *callback_args, **callback_kwargs)
File "D:\Programming\web\ipa_django\mysite\ipaswdb\views.py", line 312, in providers_plain_old_view
result = generate_pdf('ipaswdb/provider/providers_plain_old_view.html', file_object=resp, context=context)
File "D:\Python27\lib\site-packages\django_xhtml2pdf\utils.py", line 62, in generate_pdf
generate_pdf_template_object(tmpl, file_object, context)
File "D:\Python27\lib\site-packages\django_xhtml2pdf\utils.py", line 39, in generate_pdf_template_object
html = template_object.render(Context(context))
File "D:\Python27\lib\site-packages\django\template\backends\django.py", line 64, in render
context = make_context(context, request, autoescape=self.backend.engine.autoescape)
File "D:\Python27\lib\site-packages\django\template\context.py", line 287, in make_context
raise TypeError('context must be a dict rather than %s.' % context.__class__.__name__)
TypeError: context must be a dict rather than Context.
"GET /ipaswdb/provider_roster/ HTTP/1.1" 500 86485
I mean it wants me to call the generate_pdf function a different way in the latest django version?
The main issue lies in the line
File "D:\Python27\lib\site-packages\django_xhtml2pdf\utils.py", line 39, in generate_pdf_template_object
html = template_object.render(Context(context))
in the error output. This is an issue the django-xhtml2pdf package not being up to date for 1.11. The call to render has changed from
html = template_object.render(Context(context))
to
html = template_object.render(context)
according to the upgrading to 1.11 notes https://docs.djangoproject.com/en/1.11/ref/templates/upgrading/
django.template.loader section.
You can either wait for them to fix it, by submitting a bug report or implement the functionality the package provides in your views.py

Can't initiate get request in web.py

I'm trying to run two servers using web.py and initiating calls from one to another. Both servers start normally but when I try to call a url the below stack trace is thrown.
import web
urls = (
'/ping', 'Ping',
'/acqlock/+(.*)', 'Acquire',
)
class MSA(web.application):
def run(self, port=8081, *middleware):
func = self.wsgifunc(*middleware)
return web.httpserver.runsimple(func, ('127.0.0.1', port))
app = MSA(urls, globals())
if __name__ == "__main__":
app.run(port=8081)
class Acquire:
def GET(self, resource_name):
print resource_name
response = app.request('http://127.0.0.1:8080/acqlock/' + resource_name, method='GET')
return response
But I keep getting this error after calling the /acqlock.
Traceback (most recent call last):
File "C:\Python27\lib\site-packages\web\wsgiserver\__init__.py", line 1245, in communicate
req.respond()
File "C:\Python27\lib\site-packages\web\wsgiserver\__init__.py", line 775, in respond
self.server.gateway(self).respond()
File "C:\Python27\lib\site-packages\web\wsgiserver\__init__.py", line 2018, in respond
response = self.req.server.wsgi_app(self.env, self.start_response)
File "C:\Python27\lib\site-packages\web\httpserver.py", line 306, in __call__
return self.app(environ, xstart_response)
File "C:\Python27\lib\site-packages\web\httpserver.py", line 274, in __call__
return self.app(environ, start_response)
File "C:\Python27\lib\site-packages\web\application.py", line 279, in wsgi
result = self.handle_with_processors()
File "C:\Python27\lib\site-packages\web\application.py", line 249, in handle_with_processors
return process(self.processors)
File "C:\Python27\lib\site-packages\web\application.py", line 246, in process
raise self.internalerror()
File "C:\Python27\lib\site-packages\web\application.py", line 515, in internalerror
parent = self.get_parent_app()
File "C:\Python27\lib\site-packages\web\application.py", line 500, in get_parent_app
if self in web.ctx.app_stack:
AttributeError: 'ThreadedDict' object has no attribute 'app_stack'
Use requests library for this.
import requests
response = requests.request(method='GET', url ='http://127.0.0.1:8080/acqlock/' + resource_name)
Note: You have used port 8080 in url even though you have hosted the web.py in 8081

Categories