Python: requests module throws exception with Gevent - python

The following code:
import gevent
import gevent.monkey
gevent.monkey.patch_socket()
import requests
import json
base_url = 'https://api.getclever.com'
section_url = base_url + '/v1.1/sections'
#get all sections
sections = requests.get(section_url, auth=('DEMO_KEY', '')).json()
urls = [base_url+data['uri']+'/students' for data in sections['data']]
#get students for each section
threads = [gevent.spawn(requests.get, url, auth=('DEMO_KEY', '')) for url in urls]
gevent.joinall(threads)
students = [thread.value for thread in threads]
#get number of students in each section
num_students = [len(student.json()['data']) for student in students]
print (sum(num_students)/len(num_students))
results in this error:
Traceback (most recent call last):
File "clever.py", line 12, in <module>
sections = requests.get(section_url, auth=('DEMO_KEY', '')).json()
File "/Library/Python/2.7/site-packages/requests/api.py", line 55, in get
return request('get', url, **kwargs)
File "/Library/Python/2.7/site-packages/requests/api.py", line 44, in request
return session.request(method=method, url=url, **kwargs)
File "/Library/Python/2.7/site-packages/requests/sessions.py", line 382, in request
resp = self.send(prep, **send_kwargs)
File "/Library/Python/2.7/site-packages/requests/sessions.py", line 485, in send
r = adapter.send(request, **kwargs)
File "/Library/Python/2.7/site-packages/requests/adapters.py", line 379, in send
raise SSLError(e)
requests.exceptions.SSLError: [Errno 2] _ssl.c:503: The operation did not complete (read)
What am I doing wrong here?

Here's a similar question: [Errno 2] _ssl.c:504: The operation did not complete (read).
When you comment out
gevent.monkey.patch_socket()
or use
gevent.monkey.patch_all()
or use
gevent.monkey.patch_socket()
gevent.monkey.patch_ssl()
then the problem disappears.

Related

Python 2.7.0 - requests problem sending xlsx file

I am trying to send an .xlsx file from a software written in python 2.7 to a telegram chat via https using the 'requests' library.
If I send an .xlsx without data (only with columns) I have no error while if I send an xlsx with different data inside I get the following error:
Traceback (most recent call last):
File "<module:project.TelegramBot>", line 68, in SendDocument
File "C:\Users\SimoneMaffei\.ignition\cache\gwlocalhost_8088\C0\pylib\requests\api.py", line 109, in post
return request('post', url, data=data, json=json, **kwargs)
File "C:\Users\SimoneMaffei\.ignition\cache\gwlocalhost_8088\C0\pylib\requests\api.py", line 50, in request
response = session.request(method=method, url=url, **kwargs)
File "C:\Users\SimoneMaffei\.ignition\cache\gwlocalhost_8088\C0\pylib\requests\sessions.py", line 465, in request
resp = self.send(prep, **send_kwargs)
File "C:\Users\SimoneMaffei\.ignition\cache\gwlocalhost_8088\C0\pylib\requests\sessions.py", line 573, in send
r = adapter.send(request, **kwargs)
File "C:\Users\SimoneMaffei\.ignition\cache\gwlocalhost_8088\C0\pylib\requests\adapters.py", line 415, in send
raise ConnectionError(err, request=request)
ConnectionError: ('Connection aborted.', BadStatusLine("''",))
This is the code:
import traceback
import json
import requests
apiToken = "12345"
chatID = "12345"
def SendDocument():
result = {'isValid': False, 'result': None}
try:
params = {}
params['chat_id'] = chatID
params['document'] = 'attach://file'
files = {'file': open("C:\\Users\\SimoneMaffei\\Desktop\\report.xlsx", "rb")}
finalURL = "https://api.telegram.org/bot" + apiToken + "/sendDocument"
httpResult = requests.post(finalURL, data = params, files=files)
result["isValid"] = True
result["result"] = httpResult
except:
print(traceback.format_exc())
return result
print(SendDocument())
With Python 3.x I do not have this problem but I cannot use it.
Can someone help me and tell me why do I have this problem?

Trying to test some url addresses is working or not with python request but getting errors

I'm trying to learn the test some internet addresses with python request and expecting some outputs (like 200 or 404). But i get errors which i couldn't figured out. I'm also open to any advice for my purpose.
import os , sys , requests
from multiprocessing import Pool
def url_check(url):
resp = requests.get(url)
print(resp.status_code)
with Pool(4) as p:
print(p.map(url_check, [ "https://api.github.com​", "​http://bilgisayar.mu.edu.tr/​", "​https://www.python.org/​", "http://akrepnalan.com/ceng2034​", "https://github.com/caesarsalad/wow​" ]))
Output of the code with errors:
404
404
multiprocessing.pool.RemoteTraceback:
"""
Traceback (most recent call last):
File "/usr/lib/python3.6/multiprocessing/pool.py", line 119, in worker
result = (True, func(*args, **kwds))
File "/usr/lib/python3.6/multiprocessing/pool.py", line 44, in mapstar
return list(map(*args))
File "ödev_deneme.py", line 6, in url_check
resp = requests.get(url)
File "/home/efe/.local/lib/python3.6/site-packages/requests/api.py", line 76, in get
return request('get', url, params=params, **kwargs)
File "/home/efe/.local/lib/python3.6/site-packages/requests/api.py", line 61, in request
return session.request(method=method, url=url, **kwargs)
File "/home/efe/.local/lib/python3.6/site-packages/requests/sessions.py", line 530, in request
resp = self.send(prep, **send_kwargs)
File "/home/efe/.local/lib/python3.6/site-packages/requests/sessions.py", line 637, in send
adapter = self.get_adapter(url=request.url)
File "/home/efe/.local/lib/python3.6/site-packages/requests/sessions.py", line 728, in get_adapter
raise InvalidSchema("No connection adapters were found for {!r}".format(url))
requests.exceptions.InvalidSchema: No connection adapters were found for '\u200bhttps://www.python.org/\u200b'
"""
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "ödev_deneme.py", line 10, in <module>
print(p.map(url_check, [ "https://api.github.com​", "​http://bilgisayar.mu.edu.tr/​", "​https://www.python.org/​", "http://akrepnalan.com/ceng2034​", "https://github.com/caesarsalad/wow​" ]))
File "/usr/lib/python3.6/multiprocessing/pool.py", line 266, in map
return self._map_async(func, iterable, mapstar, chunksize).get()
File "/usr/lib/python3.6/multiprocessing/pool.py", line 644, in get
raise self._value
requests.exceptions.InvalidSchema: No connection adapters were found for '\u200bhttps://www.python.org/\u200b'
My expecting output must be like this:
200
200
200
404
200
There is 404 on Fourth line because forth url address is not working. But in my output there are already 404 in first two line. There is a huge mistake in my code i guess.
The problem is that some of the urls include invisible ZERO WIDTH SPACE characters ('\u200b').
You can replace them with an empty string:
def url_check(url):
resp = requests.get(url.replace('\u200b', ''))
print(resp.status_code)

Requests Error: No connection adapters were found

I'm currently new to programming and I'm following along on one of Qazi tutorials and I'm on a section for web scraping but unfortunately I'm getting errors that I can't seem to find a solution for, can you please help me out. Thanks
The error code is bellow.
Traceback (most recent call last):
File "D:\Users\Vaughan\Qazi\Web Scrapping\webscraping.py", line 6, in <module>
page = requests.get(
File "C:\Users\vaugh\AppData\Local\Programs\Python\Python38-32\lib\site-packages\requests\api.py", line 75, in get
return request('get', url, params=params, **kwargs)
File "C:\Users\vaugh\AppData\Local\Programs\Python\Python38-32\lib\site-packages\requests\api.py", line 60, in request
return session.request(method=method, url=url, **kwargs)
File "C:\Users\vaugh\AppData\Local\Programs\Python\Python38-32\lib\site-packages\requests\sessions.py", line 533, in request
resp = self.send(prep, **send_kwargs)
File "C:\Users\vaugh\AppData\Local\Programs\Python\Python38-32\lib\site-packages\requests\sessions.py", line 640, in send
adapter = self.get_adapter(url=request.url)
File "C:\Users\vaugh\AppData\Local\Programs\Python\Python38-32\lib\site-packages\requests\sessions.py", line 731, in get_adapter
raise InvalidSchema("No connection adapters were found for '%s'" % url)
requests.exceptions.InvalidSchema: No connection adapters were found for '['https://forecast.weather.gov/MapClick.php?lat=34.09979000000004&lon=-118.32721499999997#.XkzZwCgzaUk']'
[Finished in 1.171s]
My line of code is as follows
from bs4 import BeautifulSoup
import requests
import csv
import pandas as pd
import lxml
page = requests.get('https://forecast.weather.gov/MapClick.php?lat=34.09979000000004&lon=-118.32721499999997#.XkzZwCgzaUk')
soup = BeautifulSoup(page.content, 'html.parser')
week = soup.find(id='seven-day-forecast-body')
print(week)

2captcha selenium out of range

I'm trying to implement 2captcha using selenium with Python.
I just copied the example form their documentation:
https://github.com/2captcha/2captcha-api-examples/blob/master/ReCaptcha%20v2%20API%20Examples/Python%20Example/2captcha_python_api_example.py
This is my code:
from selenium import webdriver
from time import sleep
from selenium.webdriver.support.select import Select
import requests
driver = webdriver.Chrome('chromedriver.exe')
driver.get('the_url')
current_url = driver.current_url
captcha = driver.find_element_by_id("captcha-box")
captcha2 = captcha.find_element_by_xpath("//div/div/iframe").get_attribute("src")
captcha3 = captcha2.split('=')
#print(captcha3[2])
# Add these values
API_KEY = 'my_api_key' # Your 2captcha API KEY
site_key = captcha3[2] # site-key, read the 2captcha docs on how to get this
url = current_url # example url
proxy = 'Myproxy' # example proxy
proxy = {'http': 'http://' + proxy, 'https': 'https://' + proxy}
s = requests.Session()
# here we post site key to 2captcha to get captcha ID (and we parse it here too)
captcha_id = s.post("http://2captcha.com/in.php?key={}&method=userrecaptcha&googlekey={}&pageurl={}".format(API_KEY, site_key, url), proxies=proxy).text.split('|')[1]
# then we parse gresponse from 2captcha response
recaptcha_answer = s.get("http://2captcha.com/res.php?key={}&action=get&id={}".format(API_KEY, captcha_id), proxies=proxy).text
print("solving ref captcha...")
while 'CAPCHA_NOT_READY' in recaptcha_answer:
sleep(5)
recaptcha_answer = s.get("http://2captcha.com/res.php?key={}&action=get&id={}".format(API_KEY, captcha_id), proxies=proxy).text
recaptcha_answer = recaptcha_answer.split('|')[1]
# we make the payload for the post data here, use something like mitmproxy or fiddler to see what is needed
payload = {
'key': 'value',
'gresponse': recaptcha_answer # This is the response from 2captcha, which is needed for the post request to go through.
}
# then send the post request to the url
response = s.post(url, payload, proxies=proxy)
# And that's all there is to it other than scraping data from the website, which is dynamic for every website.
This is my error:
solving ref captcha...
Traceback (most recent call last):
File "main.py", line 38, in
recaptcha_answer = recaptcha_answer.split('|')[1]
IndexError: list index out of range
The captcha is getting solved because I can see it on 2captcha dashboard, so which is the error if it's de official documentation?
EDIT:
For some without modification I'm getting the captcha solved form 2captcha but then I get this error:
solving ref captcha...
OK|this_is_the_2captch_answer
Traceback (most recent call last):
File "C:\Users\Usuari\AppData\Local\Programs\Python\Python37-32\lib\site-packages\urllib3\connectionpool.py", line 594, in urlopen
self._prepare_proxy(conn)
File "C:\Users\Usuari\AppData\Local\Programs\Python\Python37-32\lib\site-packages\urllib3\connectionpool.py", line 805, in _prepare_proxy
conn.connect()
File "C:\Users\Usuari\AppData\Local\Programs\Python\Python37-32\lib\site-packages\urllib3\connection.py", line 308, in connect
self._tunnel()
File "C:\Users\Usuari\AppData\Local\Programs\Python\Python37-32\lib\http\client.py", line 906, in _tunnel
(version, code, message) = response._read_status()
File "C:\Users\Usuari\AppData\Local\Programs\Python\Python37-32\lib\http\client.py", line 278, in _read_status
raise BadStatusLine(line)
http.client.BadStatusLine: <html>
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\Users\Usuari\AppData\Local\Programs\Python\Python37-32\lib\site-packages\requests\adapters.py", line 449, in send
timeout=timeout
File "C:\Users\Usuari\AppData\Local\Programs\Python\Python37-32\lib\site-packages\urllib3\connectionpool.py", line 638, in urlopen
_stacktrace=sys.exc_info()[2])
File "C:\Users\Usuari\AppData\Local\Programs\Python\Python37-32\lib\site-packages\urllib3\util\retry.py", line 368, in increment
raise six.reraise(type(error), error, _stacktrace)
File "C:\Users\Usuari\AppData\Local\Programs\Python\Python37-32\lib\site-packages\urllib3\packages\six.py", line 685, in reraise
raise value.with_traceback(tb)
File "C:\Users\Usuari\AppData\Local\Programs\Python\Python37-32\lib\site-packages\urllib3\connectionpool.py", line 594, in urlopen
self._prepare_proxy(conn)
File "C:\Users\Usuari\AppData\Local\Programs\Python\Python37-32\lib\site-packages\urllib3\connectionpool.py", line 805, in _prepare_proxy
conn.connect()
File "C:\Users\Usuari\AppData\Local\Programs\Python\Python37-32\lib\site-packages\urllib3\connection.py", line 308, in connect
self._tunnel()
File "C:\Users\Usuari\AppData\Local\Programs\Python\Python37-32\lib\http\client.py", line 906, in _tunnel
(version, code, message) = response._read_status()
File "C:\Users\Usuari\AppData\Local\Programs\Python\Python37-32\lib\http\client.py", line 278, in _read_status
raise BadStatusLine(line)
urllib3.exceptions.ProtocolError: ('Connection aborted.', BadStatusLine('<html>\r\n'))
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "main.py", line 49, in <module>
response = s.post(url, payload, proxies=proxy)
File "C:\Users\Usuari\AppData\Local\Programs\Python\Python37-32\lib\site-packages\requests\sessions.py", line 581, in post
return self.request('POST', url, data=data, json=json, **kwargs)
File "C:\Users\Usuari\AppData\Local\Programs\Python\Python37-32\lib\site-packages\requests\sessions.py", line 533, in request
resp = self.send(prep, **send_kwargs)
File "C:\Users\Usuari\AppData\Local\Programs\Python\Python37-32\lib\site-packages\requests\sessions.py", line 646, in send
r = adapter.send(request, **kwargs)
File "C:\Users\Usuari\AppData\Local\Programs\Python\Python37-32\lib\site-packages\requests\adapters.py", line 498, in send
raise ConnectionError(err, request=request)
requests.exceptions.ConnectionError: ('Connection aborted.', BadStatusLine('<html>\r\n'))
Why am I getting this error?
I'm setting as site_key = current_url_where_captcha_is_located
Is this correct?
Use your debugger or put a print(recaptcha_answer) before the error line to see what's the value of recaptcha_answer before you try to call .split('|') on it. There is no | in the string so when you're trying to get the second element of the resulting list with [1] it fails.
Looks like you don't provide any valid proxy connection parameters but passing this proxy to requests when connecting to the API.
Just comment these two lines:
#proxy = 'Myproxy' # example proxy
#proxy = {'http': 'http://' + proxy, 'https': 'https://' + proxy}
And then remove proxies=proxy from four lines:
captcha_id = s.post("http://2captcha.com/in.php?key={}&method=userrecaptcha&googlekey={}&pageurl={}".format(API_KEY, site_key, url)).text.split('|')[1]
recaptcha_answer = s.get("http://2captcha.com/res.php?key={}&action=get&id={}".format(API_KEY, captcha_id)).text
recaptcha_answer = s.get("http://2captcha.com/res.php?key={}&action=get&id={}".format(API_KEY, captcha_id)).text
response = s.post(url, payload, proxies=proxy)

Python requests.exceptions.ChunkedEncodingError

I am writing a python script that does these steps below.
Query a MongoDB database
Parse and aggregate results
Upload data to a ServiceNow table via a REST API
This works most of the time but occasionally I see this error:
requests.exceptions.ChunkedEncodingError: ("Connection broken: error(104, 'Connection reset by peer')", error(104, 'Connection reset by peer'))
This error stops the script and prevents the entire data set from being captured.
What can I do to alleviate this issue?
Python 2.7.5
Code
#!/usr/bin/env python
from config import *
import os, sys
mypath = os.path.dirname(os.path.realpath(__file__))
sys.path.append(os.path.join(mypath, "api-python-client"))
from apiclient.mongo import *
from pymongo import MongoClient
import json
import requests
from bson.json_util import dumps
client = MongoClient(mongo_uri)
#Create ServiceNow URL
svcnow_url = create_svcnow_url('u_imp_cmps')
#BITSDB Nmap Collection
db = client[mongo_db]
#Aggregate - RDBMS equivalent to Alias select x as y
#Rename fields to match ServiceNow field names
computers = db['computer'].aggregate([
{"$unwind": "$hostnames"},
{"$project" : {
"_id":0,
"u_hostname": "$hostnames.name",
"u_ipv4": "$addresses.ipv4",
"u_status": "$status.state",
"u_updated_timestamp": "$last_seen"
}}
])
j = dumps({"records":computers})
#print(j)
#Set proper headers
headers = {"Content-Type":"application/json","Accept":"application/json"}
#Build HTTP Request
response = requests.post(url=svcnow_url, auth=(svcnow_user, svcnow_pwd), headers=headers ,data=j)
#Check for HTTP codes other than 200
if response.status_code != 200:
print('Status:', response.status_code, 'Headers:', response.headers, 'Response Text', response.text, 'Error Response:',response.json())
exit()
#Decode the JSON response into a dictionary and use the data
print('Status:',response.status_code,'Headers:',response.headers,'Response:',response.json())
Error
Traceback (most recent call last):
File "/usr/src/computer_pingable_import.py", line 50, in <module>
response = requests.post(url=svcnow_url, auth=(svcnow_user, svcnow_pwd), headers=headers ,data=j)
File "/usr/local/lib/python2.7/site-packages/requests/api.py", line 107, in post
return request('post', url, data=data, json=json, **kwargs)
File "/usr/local/lib/python2.7/site-packages/requests/api.py", line 53, in request
return session.request(method=method, url=url, **kwargs)
File "/usr/local/lib/python2.7/site-packages/requests/sessions.py", line 468, in request
resp = self.send(prep, **send_kwargs)
File "/usr/local/lib/python2.7/site-packages/requests/sessions.py", line 608, in send
r.content
File "/usr/local/lib/python2.7/site-packages/requests/models.py", line 737, in content
self._content = bytes().join(self.iter_content(CONTENT_CHUNK_SIZE)) or bytes()
File "/usr/local/lib/python2.7/site-packages/requests/models.py", line 663, in generate
raise ChunkedEncodingError(e)
requests.exceptions.ChunkedEncodingError: ("Connection broken: error(104, 'Connection reset by peer')", error(104, 'Connection reset by peer'))
Another attempt and a slightly different error message:
Traceback (most recent call last):
File "/usr/src/computer_pingable_import.py", line 50, in <module>
response = requests.post(url=svcnow_url, auth=(svcnow_user, svcnow_pwd), headers=headers ,data=j)
File "/usr/local/lib/python2.7/site-packages/requests/api.py", line 107, in post
return request('post', url, data=data, json=json, **kwargs)
File "/usr/local/lib/python2.7/site-packages/requests/api.py", line 53, in request
return session.request(method=method, url=url, **kwargs)
File "/usr/local/lib/python2.7/site-packages/requests/sessions.py", line 468, in request
resp = self.send(prep, **send_kwargs)
File "/usr/local/lib/python2.7/site-packages/requests/sessions.py", line 576, in send
r = adapter.send(request, **kwargs)
File "/usr/local/lib/python2.7/site-packages/requests/adapters.py", line 426, in send
raise ConnectionError(err, request=request)
requests.exceptions.ConnectionError: ('Connection aborted.', error(104, 'Connection reset by peer'))
ServiceNow prevents REST transactions from running for longer than 60 seconds.
I'm not too sure how large your dataset is, but you will want to chunk the data into smaller pieces to ensure the transaction always runs.

Categories