requests.get() raises SSL Error - python

I am using Python 2.7.3 on a shared FreeBSD host.
The installed version of python requests module is 2.11.1.
import json
import requests
from requests.auth import HTTPBasicAuth
requests.packages.urllib3.disable_warnings()
s = requests.Session()
s.server = "dns-api.company.net"
s.auth = HTTPBasicAuth('user', 'pass')
s.headers = {'User-Agent':'DNS-Client'}
s.verify = False
r = s.get('https://dns-api.company.net/query') //Not the actual url
As you can see I am setting verify to False. Yet I get the following SSL error
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/usr/local/lib/python2.7/site-packages/requests/sessions.py", line 477, in get
return self.request('GET', url, **kwargs)
File "/usr/local/lib/python2.7/site-packages/requests/sessions.py", line 465, in request
resp = self.send(prep, **send_kwargs)
File "/usr/local/lib/python2.7/site-packages/requests/sessions.py", line 573, in send
r = adapter.send(request, **kwargs)
File "/usr/local/lib/python2.7/site-packages/requests/adapters.py", line 431, in send
raise SSLError(e, request=request)
requests.exceptions.SSLError: [Errno bad handshake] [('SSL routines', 'SSL23_GET_SERVER_HELLO', '')]
I have tried the following variations but to no avail as I end up with the same error.
from requests.packages.urllib3.exceptions import InsecureRequestWarning
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
r = requests.get("https://dns-api.company.net/query", verify=False, auth=('user','pass')) // Dummy URL
I don't care about SSL verification. What am I doing wrong here?

Related

Python requests module timeouts with every https proxy and uses my real ip with http proxies

Basically i get this error with every single https proxy I've tried on every website.
Code:
import requests
endpoint = 'https://ipinfo.io/json'
proxies = {'http':'http://45.167.95.184:8085','https':'https://45.167.95.184:8085'}
r = requests.get(endpoint,proxies=proxies,timeout=10)
Error:
Traceback (most recent call last):
File "<pyshell#5>", line 1, in <module>
r = requests.get(endpoint,proxies=proxies,timeout=10)
File "C:\Users\Utente\AppData\Local\Programs\Python\Python39\lib\site-packages\requests\api.py", line 75, in get
return request('get', url, params=params, **kwargs)
File "C:\Users\Utente\AppData\Local\Programs\Python\Python39\lib\site-packages\requests\api.py", line 61, in request
return session.request(method=method, url=url, **kwargs)
File "C:\Users\Utente\AppData\Local\Programs\Python\Python39\lib\site-packages\requests\sessions.py", line 529, in request
resp = self.send(prep, **send_kwargs)
File "C:\Users\Utente\AppData\Local\Programs\Python\Python39\lib\site-packages\requests\sessions.py", line 645, in send
r = adapter.send(request, **kwargs)
File "C:\Users\Utente\AppData\Local\Programs\Python\Python39\lib\site-packages\requests\adapters.py", line 507, in send
raise ConnectTimeout(e, request=request)
requests.exceptions.ConnectTimeout: HTTPSConnectionPool(host='ipinfo.io', port=443): Max retries exceeded with url: /json (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x000001FDCFB9E6A0>, 'Connection to 45.167.95.184 timed out. (connect timeout=10)'))
And when I only use http
import requests
endpoint = 'https://ipinfo.io/json'
proxies = {'http':'http://45.167.95.184:8085'}
r = requests.get(endpoint,proxies=proxies,timeout=10)
The request is sent but websites that return public ips show my real ip. Both requests (2.27.1) and urllib3 (1.26.8) are update to their lastest versions, what could the issue be?

requests.get crashes on certain urls

import requests
r = requests.get('https://www.whosampled.com/search/?q=marvin+gaye')`
This returns the following error
Traceback (most recent call last):
File "C:\Users\thoma\Downloads\RealisticMellowProfile\Python\New folder\Term project demo.py", line 8, in <module>
r = requests.get('https://www.whosampled.com/search/?q=marvin+gaye')
File "c:\users\thoma\miniconda3\lib\site-packages\requests\api.py", line 75, in get
return request('get', url, params=params, **kwargs)
File "c:\users\thoma\miniconda3\lib\site-packages\requests\api.py", line 60, in request
return session.request(method=method, url=url, **kwargs)
File "c:\users\thoma\miniconda3\lib\site-packages\requests\sessions.py", line 533, in request
resp = self.send(prep, **send_kwargs)
File "c:\users\thoma\miniconda3\lib\site-packages\requests\sessions.py", line 646, in send
r = adapter.send(request, **kwargs)
File "c:\users\thoma\miniconda3\lib\site-packages\requests\adapters.py", line 498, in send
raise ConnectionError(err, request=request)
requests.exceptions.ConnectionError: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
You can change the user agent so the server does not close the connection:
import requests
headers = {"User-Agent": "Mozilla/5.0"}
r = requests.get('https://www.whosampled.com/search/?q=marvin+gaye', headers=headers)
The url is broken (or the server serving this url)
Try to get it with
wget https://www.whosampled.com/search/?q=marvin+gaye
or with
curl https://www.whosampled.com/search/?q=marvin+gaye
Use try / except to handle such situations.
However you wnat be able to gat data from it (same as with wget or curl)
import requests
try:
r = requests.get('https://www.whosampled.com/search/?q=marvin+gaye')`
except requests.exceptions.ConnectionError:
print("can't get data from this server")
r = None
if r is not None:
# handle succesful request
else:
# handler error situation

2captcha selenium out of range

I'm trying to implement 2captcha using selenium with Python.
I just copied the example form their documentation:
https://github.com/2captcha/2captcha-api-examples/blob/master/ReCaptcha%20v2%20API%20Examples/Python%20Example/2captcha_python_api_example.py
This is my code:
from selenium import webdriver
from time import sleep
from selenium.webdriver.support.select import Select
import requests
driver = webdriver.Chrome('chromedriver.exe')
driver.get('the_url')
current_url = driver.current_url
captcha = driver.find_element_by_id("captcha-box")
captcha2 = captcha.find_element_by_xpath("//div/div/iframe").get_attribute("src")
captcha3 = captcha2.split('=')
#print(captcha3[2])
# Add these values
API_KEY = 'my_api_key' # Your 2captcha API KEY
site_key = captcha3[2] # site-key, read the 2captcha docs on how to get this
url = current_url # example url
proxy = 'Myproxy' # example proxy
proxy = {'http': 'http://' + proxy, 'https': 'https://' + proxy}
s = requests.Session()
# here we post site key to 2captcha to get captcha ID (and we parse it here too)
captcha_id = s.post("http://2captcha.com/in.php?key={}&method=userrecaptcha&googlekey={}&pageurl={}".format(API_KEY, site_key, url), proxies=proxy).text.split('|')[1]
# then we parse gresponse from 2captcha response
recaptcha_answer = s.get("http://2captcha.com/res.php?key={}&action=get&id={}".format(API_KEY, captcha_id), proxies=proxy).text
print("solving ref captcha...")
while 'CAPCHA_NOT_READY' in recaptcha_answer:
sleep(5)
recaptcha_answer = s.get("http://2captcha.com/res.php?key={}&action=get&id={}".format(API_KEY, captcha_id), proxies=proxy).text
recaptcha_answer = recaptcha_answer.split('|')[1]
# we make the payload for the post data here, use something like mitmproxy or fiddler to see what is needed
payload = {
'key': 'value',
'gresponse': recaptcha_answer # This is the response from 2captcha, which is needed for the post request to go through.
}
# then send the post request to the url
response = s.post(url, payload, proxies=proxy)
# And that's all there is to it other than scraping data from the website, which is dynamic for every website.
This is my error:
solving ref captcha...
Traceback (most recent call last):
File "main.py", line 38, in
recaptcha_answer = recaptcha_answer.split('|')[1]
IndexError: list index out of range
The captcha is getting solved because I can see it on 2captcha dashboard, so which is the error if it's de official documentation?
EDIT:
For some without modification I'm getting the captcha solved form 2captcha but then I get this error:
solving ref captcha...
OK|this_is_the_2captch_answer
Traceback (most recent call last):
File "C:\Users\Usuari\AppData\Local\Programs\Python\Python37-32\lib\site-packages\urllib3\connectionpool.py", line 594, in urlopen
self._prepare_proxy(conn)
File "C:\Users\Usuari\AppData\Local\Programs\Python\Python37-32\lib\site-packages\urllib3\connectionpool.py", line 805, in _prepare_proxy
conn.connect()
File "C:\Users\Usuari\AppData\Local\Programs\Python\Python37-32\lib\site-packages\urllib3\connection.py", line 308, in connect
self._tunnel()
File "C:\Users\Usuari\AppData\Local\Programs\Python\Python37-32\lib\http\client.py", line 906, in _tunnel
(version, code, message) = response._read_status()
File "C:\Users\Usuari\AppData\Local\Programs\Python\Python37-32\lib\http\client.py", line 278, in _read_status
raise BadStatusLine(line)
http.client.BadStatusLine: <html>
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\Users\Usuari\AppData\Local\Programs\Python\Python37-32\lib\site-packages\requests\adapters.py", line 449, in send
timeout=timeout
File "C:\Users\Usuari\AppData\Local\Programs\Python\Python37-32\lib\site-packages\urllib3\connectionpool.py", line 638, in urlopen
_stacktrace=sys.exc_info()[2])
File "C:\Users\Usuari\AppData\Local\Programs\Python\Python37-32\lib\site-packages\urllib3\util\retry.py", line 368, in increment
raise six.reraise(type(error), error, _stacktrace)
File "C:\Users\Usuari\AppData\Local\Programs\Python\Python37-32\lib\site-packages\urllib3\packages\six.py", line 685, in reraise
raise value.with_traceback(tb)
File "C:\Users\Usuari\AppData\Local\Programs\Python\Python37-32\lib\site-packages\urllib3\connectionpool.py", line 594, in urlopen
self._prepare_proxy(conn)
File "C:\Users\Usuari\AppData\Local\Programs\Python\Python37-32\lib\site-packages\urllib3\connectionpool.py", line 805, in _prepare_proxy
conn.connect()
File "C:\Users\Usuari\AppData\Local\Programs\Python\Python37-32\lib\site-packages\urllib3\connection.py", line 308, in connect
self._tunnel()
File "C:\Users\Usuari\AppData\Local\Programs\Python\Python37-32\lib\http\client.py", line 906, in _tunnel
(version, code, message) = response._read_status()
File "C:\Users\Usuari\AppData\Local\Programs\Python\Python37-32\lib\http\client.py", line 278, in _read_status
raise BadStatusLine(line)
urllib3.exceptions.ProtocolError: ('Connection aborted.', BadStatusLine('<html>\r\n'))
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "main.py", line 49, in <module>
response = s.post(url, payload, proxies=proxy)
File "C:\Users\Usuari\AppData\Local\Programs\Python\Python37-32\lib\site-packages\requests\sessions.py", line 581, in post
return self.request('POST', url, data=data, json=json, **kwargs)
File "C:\Users\Usuari\AppData\Local\Programs\Python\Python37-32\lib\site-packages\requests\sessions.py", line 533, in request
resp = self.send(prep, **send_kwargs)
File "C:\Users\Usuari\AppData\Local\Programs\Python\Python37-32\lib\site-packages\requests\sessions.py", line 646, in send
r = adapter.send(request, **kwargs)
File "C:\Users\Usuari\AppData\Local\Programs\Python\Python37-32\lib\site-packages\requests\adapters.py", line 498, in send
raise ConnectionError(err, request=request)
requests.exceptions.ConnectionError: ('Connection aborted.', BadStatusLine('<html>\r\n'))
Why am I getting this error?
I'm setting as site_key = current_url_where_captcha_is_located
Is this correct?
Use your debugger or put a print(recaptcha_answer) before the error line to see what's the value of recaptcha_answer before you try to call .split('|') on it. There is no | in the string so when you're trying to get the second element of the resulting list with [1] it fails.
Looks like you don't provide any valid proxy connection parameters but passing this proxy to requests when connecting to the API.
Just comment these two lines:
#proxy = 'Myproxy' # example proxy
#proxy = {'http': 'http://' + proxy, 'https': 'https://' + proxy}
And then remove proxies=proxy from four lines:
captcha_id = s.post("http://2captcha.com/in.php?key={}&method=userrecaptcha&googlekey={}&pageurl={}".format(API_KEY, site_key, url)).text.split('|')[1]
recaptcha_answer = s.get("http://2captcha.com/res.php?key={}&action=get&id={}".format(API_KEY, captcha_id)).text
recaptcha_answer = s.get("http://2captcha.com/res.php?key={}&action=get&id={}".format(API_KEY, captcha_id)).text
response = s.post(url, payload, proxies=proxy)

Requests s.get(url,verify = False) error [Python]

I'm attempting to write a program that grabs data from my password protected gradebook and analyzes it for me because my university's gradebook doesn't automatically calculate averages. I'm running into issues at the very beginning of my program and it's growing more and more frustrating. I'm running on Python 2.7.9.
This is the Code.
import logging
import requests
import re
url = "https://learn.ou.edu/d2l/m/login"
s = requests.session()
r = s.get(url,verify = False)
This is the error that is occurring.
Traceback (most recent call last):
File "/Users/jackson/Desktop/untitled folder/Grade Calculator.py", line 7, in <module>
r = s.get(url,verify = False)
File "/usr/local/lib/python2.7/site-packages/requests/sessions.py", line 473, in get
return self.request('GET', url, **kwargs)
File "/usr/local/lib/python2.7/site-packages/requests/sessions.py", line 461, in request
resp = self.send(prep, **send_kwargs)
File "/usr/local/lib/python2.7/site-packages/requests/sessions.py", line 573, in send
r = adapter.send(request, **kwargs)
File "/usr/local/lib/python2.7/site-packages/requests/adapters.py", line 431, in send
raise SSLError(e, request=request)
SSLError: EOF occurred in violation of protocol (_ssl.c:581)
Even weirder, this only happens with the gradebook URL. When I use a different URL such as "http://login.live.com", I get this error.
Warning (from warnings module):
File "/usr/local/lib/python2.7/site-packages/requests/packages/urllib3/connectionpool.py", line 734
InsecureRequestWarning)
InsecureRequestWarning: Unverified HTTPS request is being made. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.org/en/latest/security.html
Does anybody know what I could to to fix this issue? Thanks, Jackson.
Requests does not support so you need subclass the HTTPAdapter
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.poolmanager import PoolManager
import ssl
class MyAdapter(HTTPAdapter):
def init_poolmanager(self, connections, maxsize, block=False):
self.poolmanager = PoolManager(num_pools=connections,
maxsize=maxsize,
block=block,
ssl_version=ssl.PROTOCOL_TLSv1)
import logging
import requests
import re
url = "https://learn.ou.edu/d2l/m/login"
s = requests.session()
s.mount('https://', MyAdapter())
r = s.get(url,verify = False)
print r.status_code
Gives status code:
200
This is answered here

Web scraping using Python and HTTPS proxies

Is there currently something in Python that support HTTPS proxies for web scraping ? I am currently using Python 2.7 with Windows but I could use Python 3 if it supports HTTPS proxy protocol.
I tried using mechanize and requests but both failed on HTTPS proxy protocol.
This bit is using mechanize:
import mechanize
br = mechanize.Browser()
br.set_debug_http(True)
br.set_handle_robots(False)
br.set_proxies({
"http" : "ncproxy1.uk.net.intra:8080",
"https" : "ncproxy1.uk.net.intra:8080",})
br.add_proxy_password("uname", "pass")
br.open("http://www.google.co.jp/") # OK
br.open("https://www.google.co.jp/") # Proxy Authentication Required
or using requests:
import requests
from requests.auth import HTTPProxyAuth
proxyDict = {
'http' : 'ncproxy1.uk.net.intra:8080',
'https' : 'ncproxy1.uk.net.intra:8080'
}
auth = HTTPProxyAuth('uname', 'pass')
r = requests.get("https://www.google.com", proxies=proxyDict, auth=auth)
print r.text
I obtain the following message:
Traceback (most recent call last):
File "D:\SRC\NuffieldLogger\NuffieldLogger\nuffieldrequests.py", line 10, in <module>
r = requests.get("https://www.google.com", proxies=proxyDict, auth=auth)
File "C:\Python27\lib\site-packages\requests\api.py", line 55, in get
return request('get', url, **kwargs)
File "C:\Python27\lib\site-packages\requests\api.py", line 44, in request
return session.request(method=method, url=url, **kwargs)
File "C:\Python27\lib\site-packages\requests\sessions.py", line 335, in request
resp = self.send(prep, **send_kwargs)
File "C:\Python27\lib\site-packages\requests\sessions.py", line 438, in send
r = adapter.send(request, **kwargs)
File "C:\Python27\lib\site-packages\requests\adapters.py", line 331, in send
raise SSLError(e)
requests.exceptions.SSLError: [Errno 1] _ssl.c:504: error:140770FC:SSL routines:SSL23_GET_SERVER_HELLO:unknown protocol
For requests module you can use this
#!/usr/bin/env python3
import requests
proxy_dict = {
'http': 'http://user:passwd#proxy_ip:port',
'https': 'https://user:passwd#proxy_ip:port'
}
r = requests.get('https://google.com', proxies=proxy_dict)
print(r.text)
I have tested this.

Categories