check_hostname requires server_hostname - python

I was using this code with Python 3.9 and worked fine but I have updated to an alpha version of Python 3.10 and now I get this exception:
proxy = {
"http": "http://" + proxy_ip,
"https": "https://" + proxy_ip
}
requests.get("https://www.url.org/", proxies=proxy, timeout=10)
Error:
check_hostname requires server_hostname
I change the code to this as mentioned in another question but I got another error
proxy = {
"http": proxy_ip,
"https": proxy_ip
}
requests.get("https://www.url.org/", proxies=proxy, timeout=10)
Error:
Proxy URL had no scheme, should start with http:// or https://
Any ideas besides downgrading the Python version?
All Code:
import requests
from os import getcwd
from tqdm import tqdm
# Check proxy
def checkProxy(proxy_ip):
proxy = {
"http": proxy_ip,
"https": proxy_ip
}
try:
requests.get("https://www.url.org/", proxies=proxy, timeout=10)
return True
except:
return False
# Main
if __name__ == "__main__":
with open('proxy.txt', encoding='utf-8') as f:
proxies = f.readlines()
proxies = [x.strip() for x in proxies]
f = open("proxy-valid.txt",'a+')
for proxy in tqdm(proxies):
if checkProxy(proxy):
f.write(proxy + "\n")
f.flush()
f.close()
Traceback:
Traceback (most recent call last):
File "C:\Users\User\Desktop\NG\utils\proxy-checker.py", line 39, in <module>
if checkProxy(proxy):
File "C:\Users\User\Desktop\NG\utils\proxy-checker.py", line 12, in checkProxy
requests.get("https://www.url.org/", proxies=proxy, timeout=10)
File "C:\Users\User\AppData\Local\Programs\Python\Python310\lib\site-packages\requests\api.py", line 76, in get
return request('get', url, params=params, **kwargs)
File "C:\Users\User\AppData\Local\Programs\Python\Python310\lib\site-packages\requests\api.py", line 61, in request
return session.request(method=method, url=url, **kwargs)
File "C:\Users\User\AppData\Local\Programs\Python\Python310\lib\site-packages\requests\sessions.py", line 542, in request
resp = self.send(prep, **send_kwargs)
File "C:\Users\User\AppData\Local\Programs\Python\Python310\lib\site-packages\requests\sessions.py", line 655, in send
r = adapter.send(request, **kwargs)
File "C:\Users\User\AppData\Local\Programs\Python\Python310\lib\site-packages\requests\adapters.py", line 414, in send
raise InvalidURL(e, request=request)
requests.exceptions.InvalidURL: Proxy URL had no scheme, should start with http:// or https://
Thanks

Related

sending a request with a proxy causes InvalidURL: Failed to parse in Python

Using Python, I wrote a function that using a proxy, sends a simple request and returns the response, this is the code:
def testSingleProxy(ip,port,user,passw):
url = 'https://stackoverflow.com/' #example
proxy = str(ip) + ":" + str(port) + ":" + "#" + str(user) + ":" + str(passw)
http_proxy = 'http://' + proxy
https_proxy = 'https://' + proxy
proxies = {
'http': http_proxy,
'https': https_proxy,
}
response = requests.get(str(url), proxies=proxies, timeout=10)
return response
the proxy ip, port, username and password are passed as parameters to the function, each time I try to run the function this error is generated:
Traceback (most recent call last):
File "/opt/homebrew/lib/python3.10/site-packages/requests/adapters.py", line 456, in send
conn = self.get_connection(request.url, proxies)
File "/opt/homebrew/lib/python3.10/site-packages/requests/adapters.py", line 345, in get_connection
proxy = prepend_scheme_if_needed(proxy, "http")
File "/opt/homebrew/lib/python3.10/site-packages/requests/utils.py", line 988, in prepend_scheme_if_needed
parsed = parse_url(url)
File "/opt/homebrew/lib/python3.10/site-packages/urllib3/util/url.py", line 397, in parse_url
return six.raise_from(LocationParseError(source_url), None)
File "<string>", line 3, in raise_from
urllib3.exceptions.LocationParseError: Failed to parse: https://***.***.***.***:****:#*****:*****
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/Users/luca/Desktop/stack", line 28, in <module>
testSingleProxy("***.***.***.***","****","*****","*****")
File "/Users/luca/Desktop/stack", line 23, in testSingleProxy
response = requests.get(str(url), proxies=proxies, timeout=10)
File "/opt/homebrew/lib/python3.10/site-packages/requests/api.py", line 73, in get
return request("get", url, params=params, **kwargs)
File "/opt/homebrew/lib/python3.10/site-packages/requests/api.py", line 59, in request
return session.request(method=method, url=url, **kwargs)
File "/opt/homebrew/lib/python3.10/site-packages/requests/sessions.py", line 587, in request
resp = self.send(prep, **send_kwargs)
File "/opt/homebrew/lib/python3.10/site-packages/requests/sessions.py", line 701, in send
r = adapter.send(request, **kwargs)
File "/opt/homebrew/lib/python3.10/site-packages/requests/adapters.py", line 458, in send
raise InvalidURL(e, request=request)
requests.exceptions.InvalidURL: Failed to parse: https://***.***.***.***:****:#*****:*****
Note: I have already tried to change both proxy and url, I also tested my proxies with another tester and they works, I also tried to remove any invisible characters in the url but it didn't work. (In the code I covered my proxy numbers and letters with *)
How can I resolve ?
The cause of the error
Sorry to say it, but the problem, as is often the case, is a trivial typo right there:
proxy = str(ip) + ":" + str(port) + ":" + "#" + str(user) + ":" + str(passw)
^
You just using wrong template. The colon should not have come before the # so parser raised error. In the same way, the credentials should be specified first, and after # symbol the address and port (like in ssh root#1.1.1.1).
Solution
I can offer this with pretty f-string template, which I use all the time:
def testSingleProxy(ip,port,user,passw):
url = 'https://stackoverflow.com/' #example
proxies = {
"https": f"https://{user}:{passw}#{ip}:{port}",
"http" : f"http://{user}:{passw}#{ip}:{port}",
}
response = requests.get(str(url), proxies=proxies, timeout=10)
return response

Why do i get "no_proxy = proxies.get('no_proxy') if proxies is not None else None AttributeError: 'str' object has no attribute 'get'"

I have checked other posts but they do not seem to resolve this.
this is my code.
def main():
while True==True:
random_proxy = random.choice(read)
proxies = {
proxy_type: random_proxy
}
response = requests.get('https://manacube.com/play/', cookies=cookies, proxies=proxies, headers=headers)
if response.status_code == 200:
print("Page View Sent (Code 200)")
else:
print("Error")
for x in range (int(t)):
Thread(target=main).start()
THE ERROR IM GETTING IS :
Traceback (most recent call last):
File "C:\Users\name\AppData\Local\Programs\Python\Python38\lib\threading.py", line 932, in _bootstrap_inner
self.run()
File "C:\Users\name\AppData\Local\Programs\Python\Python38\lib\threading.py", line 870, in run
self._target(*self._args, **self._kwargs)
File "bot.py", line 49, in main
response = requests.get('https://url.com/play/', cookies=cookies, proxies=random_proxy, headers=headers)
File "C:\Users\name\AppData\Local\Programs\Python\Python38\lib\site-packages\requests\api.py", line 75, in get
return request('get', url, params=params, **kwargs)
File "C:\Users\name\AppData\Local\Programs\Python\Python38\lib\site-packages\requests\api.py", line 61, in request
return session.request(method=method, url=url, **kwargs)
File "C:\Users\mazin\AppData\Local\Programs\Python\Python38\lib\site-packages\requests\sessions.py", line 519, in request
settings = self.merge_environment_settings(
File "C:\Users\name\AppData\Local\Programs\Python\Python38\lib\site-packages\requests\sessions.py", line 700, in merge_environment_settings
no_proxy = proxies.get('no_proxy') if proxies is not None else None
AttributeError: 'str' object has no attribute 'get'
I am not sure if this is a dumb error but i am new at python so please do not make fun of me, please help if you can, i want to choose a random proxy out of proxies.txt and use it every request.
so
request 1:
111.68.31.155:8080
request 2:
203.210.84.198:8080
request 3:
150.136.120.227:3128
I tried this and it worked with me
It's using:
pro = {
'http://': random_proxy,
'https://': random_proxy
}
Instead of:
proxies = {
proxy_type: random_proxy
}

Python 2.7.0 - requests problem sending xlsx file

I am trying to send an .xlsx file from a software written in python 2.7 to a telegram chat via https using the 'requests' library.
If I send an .xlsx without data (only with columns) I have no error while if I send an xlsx with different data inside I get the following error:
Traceback (most recent call last):
File "<module:project.TelegramBot>", line 68, in SendDocument
File "C:\Users\SimoneMaffei\.ignition\cache\gwlocalhost_8088\C0\pylib\requests\api.py", line 109, in post
return request('post', url, data=data, json=json, **kwargs)
File "C:\Users\SimoneMaffei\.ignition\cache\gwlocalhost_8088\C0\pylib\requests\api.py", line 50, in request
response = session.request(method=method, url=url, **kwargs)
File "C:\Users\SimoneMaffei\.ignition\cache\gwlocalhost_8088\C0\pylib\requests\sessions.py", line 465, in request
resp = self.send(prep, **send_kwargs)
File "C:\Users\SimoneMaffei\.ignition\cache\gwlocalhost_8088\C0\pylib\requests\sessions.py", line 573, in send
r = adapter.send(request, **kwargs)
File "C:\Users\SimoneMaffei\.ignition\cache\gwlocalhost_8088\C0\pylib\requests\adapters.py", line 415, in send
raise ConnectionError(err, request=request)
ConnectionError: ('Connection aborted.', BadStatusLine("''",))
This is the code:
import traceback
import json
import requests
apiToken = "12345"
chatID = "12345"
def SendDocument():
result = {'isValid': False, 'result': None}
try:
params = {}
params['chat_id'] = chatID
params['document'] = 'attach://file'
files = {'file': open("C:\\Users\\SimoneMaffei\\Desktop\\report.xlsx", "rb")}
finalURL = "https://api.telegram.org/bot" + apiToken + "/sendDocument"
httpResult = requests.post(finalURL, data = params, files=files)
result["isValid"] = True
result["result"] = httpResult
except:
print(traceback.format_exc())
return result
print(SendDocument())
With Python 3.x I do not have this problem but I cannot use it.
Can someone help me and tell me why do I have this problem?

requests.get crashes on certain urls

import requests
r = requests.get('https://www.whosampled.com/search/?q=marvin+gaye')`
This returns the following error
Traceback (most recent call last):
File "C:\Users\thoma\Downloads\RealisticMellowProfile\Python\New folder\Term project demo.py", line 8, in <module>
r = requests.get('https://www.whosampled.com/search/?q=marvin+gaye')
File "c:\users\thoma\miniconda3\lib\site-packages\requests\api.py", line 75, in get
return request('get', url, params=params, **kwargs)
File "c:\users\thoma\miniconda3\lib\site-packages\requests\api.py", line 60, in request
return session.request(method=method, url=url, **kwargs)
File "c:\users\thoma\miniconda3\lib\site-packages\requests\sessions.py", line 533, in request
resp = self.send(prep, **send_kwargs)
File "c:\users\thoma\miniconda3\lib\site-packages\requests\sessions.py", line 646, in send
r = adapter.send(request, **kwargs)
File "c:\users\thoma\miniconda3\lib\site-packages\requests\adapters.py", line 498, in send
raise ConnectionError(err, request=request)
requests.exceptions.ConnectionError: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
You can change the user agent so the server does not close the connection:
import requests
headers = {"User-Agent": "Mozilla/5.0"}
r = requests.get('https://www.whosampled.com/search/?q=marvin+gaye', headers=headers)
The url is broken (or the server serving this url)
Try to get it with
wget https://www.whosampled.com/search/?q=marvin+gaye
or with
curl https://www.whosampled.com/search/?q=marvin+gaye
Use try / except to handle such situations.
However you wnat be able to gat data from it (same as with wget or curl)
import requests
try:
r = requests.get('https://www.whosampled.com/search/?q=marvin+gaye')`
except requests.exceptions.ConnectionError:
print("can't get data from this server")
r = None
if r is not None:
# handle succesful request
else:
# handler error situation

Web scraping using Python and HTTPS proxies

Is there currently something in Python that support HTTPS proxies for web scraping ? I am currently using Python 2.7 with Windows but I could use Python 3 if it supports HTTPS proxy protocol.
I tried using mechanize and requests but both failed on HTTPS proxy protocol.
This bit is using mechanize:
import mechanize
br = mechanize.Browser()
br.set_debug_http(True)
br.set_handle_robots(False)
br.set_proxies({
"http" : "ncproxy1.uk.net.intra:8080",
"https" : "ncproxy1.uk.net.intra:8080",})
br.add_proxy_password("uname", "pass")
br.open("http://www.google.co.jp/") # OK
br.open("https://www.google.co.jp/") # Proxy Authentication Required
or using requests:
import requests
from requests.auth import HTTPProxyAuth
proxyDict = {
'http' : 'ncproxy1.uk.net.intra:8080',
'https' : 'ncproxy1.uk.net.intra:8080'
}
auth = HTTPProxyAuth('uname', 'pass')
r = requests.get("https://www.google.com", proxies=proxyDict, auth=auth)
print r.text
I obtain the following message:
Traceback (most recent call last):
File "D:\SRC\NuffieldLogger\NuffieldLogger\nuffieldrequests.py", line 10, in <module>
r = requests.get("https://www.google.com", proxies=proxyDict, auth=auth)
File "C:\Python27\lib\site-packages\requests\api.py", line 55, in get
return request('get', url, **kwargs)
File "C:\Python27\lib\site-packages\requests\api.py", line 44, in request
return session.request(method=method, url=url, **kwargs)
File "C:\Python27\lib\site-packages\requests\sessions.py", line 335, in request
resp = self.send(prep, **send_kwargs)
File "C:\Python27\lib\site-packages\requests\sessions.py", line 438, in send
r = adapter.send(request, **kwargs)
File "C:\Python27\lib\site-packages\requests\adapters.py", line 331, in send
raise SSLError(e)
requests.exceptions.SSLError: [Errno 1] _ssl.c:504: error:140770FC:SSL routines:SSL23_GET_SERVER_HELLO:unknown protocol
For requests module you can use this
#!/usr/bin/env python3
import requests
proxy_dict = {
'http': 'http://user:passwd#proxy_ip:port',
'https': 'https://user:passwd#proxy_ip:port'
}
r = requests.get('https://google.com', proxies=proxy_dict)
print(r.text)
I have tested this.

Categories