IDNA label empty or too long

IDNA label empty or too long - python

I'm trying to run a script of an online course and had to change the source code from python 2 to python 3 syntax. In this script, there is a download of an archive, which I already transformed into:
url = "https://www.cs.cmu.edu/~./enron/enron_mail_20150507.tgz"
urllib.request.urlretrieve(url, filename="../enron_mail_20150507.tgz")
However, something seems to be wrong with the URL, since it gives me the following error:
Traceback (most recent call last):
File "/home/xiaolong/development/Python/udacity_intro_to_machine_learning/localpython/lib/python3.5/encodings/idna.py", line 165, in encode
raise UnicodeError("label empty or too long")
UnicodeError: label empty or too long
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "ud120_code_py35fork/tools/startup.py", line 37, in <module>
data = urllib.request.urlretrieve("http://...")
File "/home/xiaolong/development/Python/udacity_intro_to_machine_learning/localpython/lib/python3.5/urllib/request.py", line 187, in urlretrieve
with contextlib.closing(urlopen(url, data)) as fp:
File "/home/xiaolong/development/Python/udacity_intro_to_machine_learning/localpython/lib/python3.5/urllib/request.py", line 162, in urlopen
return opener.open(url, data, timeout)
File "/home/xiaolong/development/Python/udacity_intro_to_machine_learning/localpython/lib/python3.5/urllib/request.py", line 465, in open
response = self._open(req, data)
File "/home/xiaolong/development/Python/udacity_intro_to_machine_learning/localpython/lib/python3.5/urllib/request.py", line 483, in _open
'_open', req)
File "/home/xiaolong/development/Python/udacity_intro_to_machine_learning/localpython/lib/python3.5/urllib/request.py", line 443, in _call_chain
result = func(*args)
File "/home/xiaolong/development/Python/udacity_intro_to_machine_learning/localpython/lib/python3.5/urllib/request.py", line 1268, in http_open
return self.do_open(http.client.HTTPConnection, req)
File "/home/xiaolong/development/Python/udacity_intro_to_machine_learning/localpython/lib/python3.5/urllib/request.py", line 1240, in do_open
h.request(req.get_method(), req.selector, req.data, headers)
File "/home/xiaolong/development/Python/udacity_intro_to_machine_learning/localpython/lib/python3.5/http/client.py", line 1083, in request
self._send_request(method, url, body, headers)
File "/home/xiaolong/development/Python/udacity_intro_to_machine_learning/localpython/lib/python3.5/http/client.py", line 1128, in _send_request
self.endheaders(body)
File "/home/xiaolong/development/Python/udacity_intro_to_machine_learning/localpython/lib/python3.5/http/client.py", line 1079, in endheaders
self._send_output(message_body)
File "/home/xiaolong/development/Python/udacity_intro_to_machine_learning/localpython/lib/python3.5/http/client.py", line 911, in _send_output
self.send(msg)
File "/home/xiaolong/development/Python/udacity_intro_to_machine_learning/localpython/lib/python3.5/http/client.py", line 854, in send
self.connect()
File "/home/xiaolong/development/Python/udacity_intro_to_machine_learning/localpython/lib/python3.5/http/client.py", line 826, in connect
(self.host,self.port), self.timeout, self.source_address)
File "/home/xiaolong/development/Python/udacity_intro_to_machine_learning/localpython/lib/python3.5/socket.py", line 693, in create_connection
for res in getaddrinfo(host, port, 0, SOCK_STREAM):
File "/home/xiaolong/development/Python/udacity_intro_to_machine_learning/localpython/lib/python3.5/socket.py", line 732, in getaddrinfo
for res in _socket.getaddrinfo(host, port, family, type, proto, flags):
UnicodeError: encoding with 'idna' codec failed (UnicodeError: label empty or too long)
I already tried replacing the ~ and the . in the URL with %7E and %2E, but that didn't help at all.
What's wrong with the URL and how can I fix this?
exact python version: 3.5.1

Related

Web Scraping in HTML to using py-script

<body class="white-vertion black-bg">
<!-- Start Loader -->
<p>
<py-script>
import ssl
from urllib.request import urlopen
from bs4 import BeautifulSoup
context = ssl._create_unverified_context()
result = urlopen("https://blog.naver.com/PostList.naver?blogId=woong3164&categoryNo=0&from=postList", context=context)
bsObj = BeautifulSoup(result.read(), "html.parser")
</py-script>
</p>
I used py-script in HTML to do web scraping. However, this error occurred.
'JsException(PythonError: Traceback (most recent call last):
File "/lib/python3.10/urllib/request.py", line 1348, in do_open
h.request(req.get_method(), req.selector, req.data, headers,
File "/lib/python3.10/http/client.py", line 1282, in request
self._send_request(method, url, body, headers, encode_chunked)
File "/lib/python3.10/http/client.py", line 1328, in _send_request
self.endheaders(body, encode_chunked=encode_chunked)
File "/lib/python3.10/http/client.py", line 1277, in endheaders
self._send_output(message_body, encode_chunked=encode_chunked)
File "/lib/python3.10/http/client.py", line 1037, in _send_output
self.send(msg)
File "/lib/python3.10/http/client.py", line 975, in send
self.connect()
File "/lib/python3.10/http/client.py", line 1447, in connect
super().connect()
File "/lib/python3.10/http/client.py", line 941, in connect
self.sock = self._create_connection(
File "/lib/python3.10/socket.py", line 845, in create_connection
raise err
File "/lib/python3.10/socket.py", line 833, in create_connection
sock.connect(sa)
BlockingIOError: [Errno 26] Operation in progress
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/lib/python3.10/site-packages/_pyodide/_base.py", line 429, in eval_code
.run(globals, locals)
File "/lib/python3.10/site-packages/_pyodide/_base.py", line 300, in run
coroutine = eval(self.code, globals, locals)
File "", line 6, in
File "/lib/python3.10/urllib/request.py", line 216, in urlopen
return opener.open(url, data, timeout)
File "/lib/python3.10/urllib/request.py", line 519, in open
response = self._open(req, data)
File "/lib/python3.10/urllib/request.py", line 536, in _open
result = self._call_chain(self.handle_open, protocol, protocol +
File "/lib/python3.10/urllib/request.py", line 496, in _call_chain
result = func(*args)
File "/lib/python3.10/urllib/request.py", line 1391, in https_open
return self.do_open(http.client.HTTPSConnection, req,
File "/lib/python3.10/urllib/request.py", line 1351, in do_open
raise URLError(err) urllib.error.URLError: )'
I think this error was caused by ssl.
How can I solve this error?

Your problem is caused by using unsupported Python packages. The package urllib uses APIs (TCP Sockets) that do not exist in the browser. This is not a limitation of PyScript, no browser application can use socket-based APIs.
The solution is to use supported APIs such as fetch or pyfetch.

error occurs when I use you-get project on Github

I wanted to use you-get to download videos, but when I use it after I installed it, an error ocurred:
my python3.9 is well installed and I have checked the system path. I have also used many ways to install you-get including using pip3, downloading the zip from github and this error always occur. I thought this problem could be caused by my vpn, but when I close vpn, nothing changed.
I opened cmd with administrator as well.
I typed this command: you-get 'https://www.youtube.com/watch?v=nWQwrU1qUrc' --debug
[DEBUG] get_location: http://'https://www.youtube.com/watch?v=nWQwrU1qUrc'
[DEBUG] get_location: http://'https://www.youtube.com/watch?v=nWQwrU1qUrc'
you-get: version 0.4.1500, a tiny downloader that scrapes the web.
you-get: Namespace(version=False, help=False, info=False, url=False, json=False, no_merge=False, no_caption=False, force=False, skip_existing_file_size_check=False, format=None, output_filename=None, output_dir='.', player=None, cookies=None, timeout=600, debug=True, input_file=None, password=None, playlist=False, auto_rename=False, insecure=False, http_proxy=None, extractor_proxy=None, no_proxy=False, socks_proxy=None, stream=None, itag=None, URL=["'https://www.youtube.com/watch?v=nWQwrU1qUrc'"])
Traceback (most recent call last):
File "C:\Python39\lib\urllib\request.py", line 1346, in do_open
h.request(req.get_method(), req.selector, req.data, headers,
File "C:\Python39\lib\http\client.py", line 1255, in request
self._send_request(method, url, body, headers, encode_chunked)
File "C:\Python39\lib\http\client.py", line 1301, in _send_request
self.endheaders(body, encode_chunked=encode_chunked)
File "C:\Python39\lib\http\client.py", line 1250, in endheaders
self._send_output(message_body, encode_chunked=encode_chunked)
File "C:\Python39\lib\http\client.py", line 1010, in _send_output
self.send(msg)
File "C:\Python39\lib\http\client.py", line 950, in send
self.connect()
File "C:\Python39\lib\http\client.py", line 921, in connect
self.sock = self._create_connection(
File "C:\Python39\lib\socket.py", line 822, in create_connection
for res in getaddrinfo(host, port, 0, SOCK_STREAM):
File "C:\Python39\lib\socket.py", line 953, in getaddrinfo
for res in _socket.getaddrinfo(host, port, family, type, proto, flags):
socket.gaierror: [Errno 11001] getaddrinfo failed
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\Python39\lib\site-packages\you_get-0.4.1500-py3.9.egg\you_get\common.py", line 1777, in url_to_module
File "C:\Python39\lib\site-packages\you_get-0.4.1500 py3.9.egg\you_get\common.py", line 393, in get_location
File "C:\Python39\lib\site-packages\you_get-0.4.1500-py3.9.egg\you_get\common.py", line 408, in urlopen_with_retry
File "C:\Python39\lib\urllib\request.py", line 214, in urlopen
return opener.open(url, data, timeout)
File "C:\Python39\lib\urllib\request.py", line 517, in open
response = self._open(req, data)
File "C:\Python39\lib\urllib\request.py", line 534, in _open
result = self._call_chain(self.handle_open, protocol, protocol +
File "C:\Python39\lib\urllib\request.py", line 494, in _call_chain
result = func(*args)
File "C:\Python39\lib\urllib\request.py", line 1375, in http_open
return self.do_open(http.client.HTTPConnection, req)
File "C:\Python39\lib\urllib\request.py", line 1349, in do_open
raise URLError(err)
urllib.error.URLError: <urlopen error [Errno 11001] getaddrinfo failed>
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\Python39\lib\urllib\request.py", line 1346, in do_open
h.request(req.get_method(), req.selector, req.data, headers,
File "C:\Python39\lib\http\client.py", line 1255, in request
self._send_request(method, url, body, headers, encode_chunked)
File "C:\Python39\lib\http\client.py", line 1301, in _send_request
self.endheaders(body, encode_chunked=encode_chunked)
File "C:\Python39\lib\http\client.py", line 1250, in endheaders
self._send_output(message_body, encode_chunked=encode_chunked)
File "C:\Python39\lib\http\client.py", line 1010, in _send_output
self.send(msg)
File "C:\Python39\lib\http\client.py", line 950, in send
self.connect()
File "C:\Python39\lib\http\client.py", line 921, in connect
self.sock = self._create_connection(
File "C:\Python39\lib\socket.py", line 822, in create_connection
for res in getaddrinfo(host, port, 0, SOCK_STREAM):
File "C:\Python39\lib\socket.py", line 953, in getaddrinfo
for res in _socket.getaddrinfo(host, port, family, type, proto, flags):
socket.gaierror: [Errno 11001] getaddrinfo failed
Traceback (most recent call last):
File "C:\Python39\Scripts\you-get-script.py", line 33, in <module>
sys.exit(load_entry_point('you-get==0.4.1500', 'console_scripts', 'you-get')())
File "C:\Python39\lib\site-packages\you_get-0.4.1500-py3.9.egg\you_get\__main__.py", line 92, in main
File "C:\Python39\lib\site-packages\you_get-0.4.1500-py3.9.egg\you_get\common.py", line 1798, in main
File "C:\Python39\lib\site-packages\you_get-0.4.1500-py3.9.egg\you_get\common.py", line 1680, in script_main
File "C:\Python39\lib\site-packages\you_get-0.4.1500-py3.9.egg\you_get\common.py", line 1327, in download_main
File "C:\Python39\lib\site-packages\you_get-0.4.1500-py3.9.egg\you_get\common.py", line 1788, in any_download
File "C:\Python39\lib\site-packages\you_get-0.4.1500-py3.9.egg\you_get\common.py", line 1779, in url_to_module
File "C:\Python39\lib\site-packages\you_get-0.4.1500-py3.9.egg\you_get\common.py", line 393, in get_location
File "C:\Python39\lib\site-packages\you_get-0.4.1500-py3.9.egg\you_get\common.py", line 408, in urlopen_with_retry
File "C:\Python39\lib\urllib\request.py", line 214, in urlopen
return opener.open(url, data, timeout)
File "C:\Python39\lib\urllib\request.py", line 517, in open
response = self._open(req, data)
File "C:\Python39\lib\urllib\request.py", line 534, in _open
result = self._call_chain(self.handle_open, protocol, protocol +
File "C:\Python39\lib\urllib\request.py", line 494, in _call_chain
result = func(*args)
File "C:\Python39\lib\urllib\request.py", line 1375, in http_open
return self.do_open(http.client.HTTPConnection, req)
File "C:\Python39\lib\urllib\request.py", line 1349, in do_open
raise URLError(err)
urllib.error.URLError: <urlopen error [Errno 11001] getaddrinfo failed>
could someone help me pls, thank you so much

If you're intent on using cmd, you should replace ' with " in your command, or since the website contains no spaces anyway, just remove the quotes. So try running:
you-get "https://www.youtube.com/watch?v=nWQwrU1qUrc"
OR
you-get https://www.youtube.com/watch?v=nWQwrU1qUrc
However in the future I'd suggest you just use Windows PowerShell it's much better in my opinion.

Getting UrlError While running Python code for extracting Url from in Ubuntu

Below is the stack trace at the terminal end in Ubuntu
Even my anacinda is taking too much time to open (around 20 minutes)
Traceback (most recent call last):
File "/home/narendra/anaconda3/lib/python3.7/urllib/request.py", line 1317, in do_open
encode_chunked=req.has_header('Transfer-encoding'))
File "/home/narendra/anaconda3/lib/python3.7/http/client.py", line 1244, in request
self._send_request(method, url, body, headers, encode_chunked)
File "/home/narendra/anaconda3/lib/python3.7/http/client.py", line 1290, in _send_request
self.endheaders(body, encode_chunked=encode_chunked)
File "/home/narendra/anaconda3/lib/python3.7/http/client.py", line 1239, in endheaders
self._send_output(message_body, encode_chunked=encode_chunked)
File "/home/narendra/anaconda3/lib/python3.7/http/client.py", line 1026, in _send_output
self.send(msg)
File "/home/narendra/anaconda3/lib/python3.7/http/client.py", line 966, in send
self.connect()
File "/home/narendra/anaconda3/lib/python3.7/http/client.py", line 1406, in connect
super().connect()
File "/home/narendra/anaconda3/lib/python3.7/http/client.py", line 938, in connect
(self.host,self.port), self.timeout, self.source_address)
File "/home/narendra/anaconda3/lib/python3.7/socket.py", line 727, in create_connection
raise err
File "/home/narendra/anaconda3/lib/python3.7/socket.py", line 716, in create_connection
sock.connect(sa)
TimeoutError: [Errno 110] Connection timed out
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "TASK_1.py", line 23, in <module>
response = urllib.request.urlopen(line,context=gcontext)
File "/home/narendra/anaconda3/lib/python3.7/urllib/request.py", line 222, in urlopen
return opener.open(url, data, timeout)
File "/home/narendra/anaconda3/lib/python3.7/urllib/request.py", line 525, in open
response = self._open(req, data)
File "/home/narendra/anaconda3/lib/python3.7/urllib/request.py", line 543, in _open
'_open', req)
File "/home/narendra/anaconda3/lib/python3.7/urllib/request.py", line 503, in _call_chain
result = func(*args)
File "/home/narendra/anaconda3/lib/python3.7/urllib/request.py", line 1360, in https_open
context=self._context, check_hostname=self._check_hostname)
File "/home/narendra/anaconda3/lib/python3.7/urllib/request.py", line 1319, in do_open
raise URLError(err)
urllib.error.URLError: <urlopen error [Errno 110] Connection timed out>
below is my code.
this code is extracting the Url data into the file
it will one by one pick URLs for the url.txt
then it will extract the all page data from that particular URL.
import urllib.request, urllib.error, urllib.parse
import io
import ssl
#localhost, 127.0.0.0/8, ::1, 10.0.0.0/8
# using readline() that reads file line by line.
file1 = open("url.txt", "r")
count = 0
gcontext = ssl.SSLContext()`
for i in range(18):
count += 1
# Getting the next line from file
line = file1.readline()
# if line is empty
# end of file is reached
if not line:
break
response = urllib.request.urlopen(line,context=gcontext)
webContent = response.read()
with io.open("file_" + str(i) + ".txt", 'w', encoding='utf-8') as f:
f.write(webContent)
f.close()

Comparing two URLs to see if path traversal is available by reading the content of page in python

I am trying to compare two URLs using python. Basically I want to test if directory traversal can be performed or not.
I am trying to compare the content of the page if it redirects to the same page, then it is not vulnerable and if it's different then it's vulnerable.
I have tried the following code. after the input is taken, it shows an error.
Code:
import urllib
def pywalker():
target_url= input("Enter url :")
res = urllib.request.urlopen('http://'+target_url+"../../../etc/password")
res2 = urllib.request.urlopen('http://'+target_url)
if res.read() == res2.read():
print("Same webpage")
else:
print("Not same ")
pywalker()
Error:
Traceback (most recent call last):
File "<ipython-input-63-e0e37c2314c8>", line 11, in <module>
pywalker()
File "<ipython-input-63-e0e37c2314c8>", line 4, in pywalker
res = urllib.request.urlopen('http://'+target_url+"../../../etc/password")
File "C:\Users\user\Anaconda3\lib\urllib\request.py", line 222, in urlopen
return opener.open(url, data, timeout)
File "C:\Users\user\Anaconda3\lib\urllib\request.py", line 525, in open
response = self._open(req, data)
File "C:\Users\user\Anaconda3\lib\urllib\request.py", line 543, in _open
'_open', req)
File "C:\Users\user\Anaconda3\lib\urllib\request.py", line 503, in _call_chain
result = func(*args)
File "C:\Users\user\Anaconda3\lib\urllib\request.py", line 1345, in http_open
return self.do_open(http.client.HTTPConnection, req)
File "C:\Users\user\Anaconda3\lib\urllib\request.py", line 1317, in do_open
encode_chunked=req.has_header('Transfer-encoding'))
File "C:\Users\user\Anaconda3\lib\http\client.py", line 1229, in request
self._send_request(method, url, body, headers, encode_chunked)
File "C:\Users\user\Anaconda3\lib\http\client.py", line 1275, in _send_request
self.endheaders(body, encode_chunked=encode_chunked)
File "C:\Users\user\Anaconda3\lib\http\client.py", line 1224, in endheaders
self._send_output(message_body, encode_chunked=encode_chunked)
File "C:\Users\user\Anaconda3\lib\http\client.py", line 1016, in _send_output
self.send(msg)
File "C:\Users\user\Anaconda3\lib\http\client.py", line 956, in send
self.connect()
File "C:\Users\user\Anaconda3\lib\http\client.py", line 928, in connect
(self.host,self.port), self.timeout, self.source_address)
File "C:\Users\user\Anaconda3\lib\socket.py", line 707, in create_connection
for res in getaddrinfo(host, port, 0, SOCK_STREAM):
File "C:\Users\user\Anaconda3\lib\socket.py", line 748, in getaddrinfo
for res in _socket.getaddrinfo(host, port, family, type, proto, flags):
UnicodeError: encoding with 'idna' codec failed (UnicodeError: label empty or too long)

urlopen error [Errno 11001] getaddrinfo failed

I'm new to python and I'm following a video tutorial.
So here's the code snippet
from urllib.request import urlopen
with urlopen('http://sixty-north/c/t.txt') as story:
story_words = []
for line in story:
line_words = line.decode('utf-8').split()
for word in line_words:
story_words.append(word)
I'm able to access http://sixty-north.com/c/t.txt in my browser.
However when I type this into command prompt: python words.py I get this error:
C:\New folder>python words.py
Traceback (most recent call last):
File "C:\Python33\lib\urllib\request.py", line 1248, in do_open
h.request(req.get_method(), req.selector, req.data, headers)
File "C:\Python33\lib\http\client.py", line 1065, in request
self._send_request(method, url, body, headers)
File "C:\Python33\lib\http\client.py", line 1103, in _send_request
self.endheaders(body)
File "C:\Python33\lib\http\client.py", line 1061, in endheaders
self._send_output(message_body)
File "C:\Python33\lib\http\client.py", line 906, in _send_output
self.send(msg)
File "C:\Python33\lib\http\client.py", line 844, in send
self.connect()
File "C:\Python33\lib\http\client.py", line 822, in connect
self.timeout, self.source_address)
File "C:\Python33\lib\socket.py", line 417, in create_connection
for res in getaddrinfo(host, port, 0, SOCK_STREAM):
socket.gaierror: [Errno 11001] getaddrinfo failed
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "words.py", line 2, in <module>
with urlopen('http://sixty-north/c/t.txt') as story:
File "C:\Python33\lib\urllib\request.py", line 156, in urlopen
return opener.open(url, data, timeout)
File "C:\Python33\lib\urllib\request.py", line 469, in open
response = self._open(req, data)
File "C:\Python33\lib\urllib\request.py", line 487, in _open
'_open', req)
File "C:\Python33\lib\urllib\request.py", line 447, in _call_chain
result = func(*args)
File "C:\Python33\lib\urllib\request.py", line 1274, in http_open
return self.do_open(http.client.HTTPConnection, req)
File "C:\Python33\lib\urllib\request.py", line 1251, in do_open
raise URLError(err)
urllib.error.URLError: <urlopen error [Errno 11001] getaddrinfo failed>

There is no such host: sixty-north. Replace sixty-north with sixty-north.com (notice: .com at the end)

Change host: sixty-north with sixty-north.com
And also, Restart your Internet Connection.
Your Internet is not working properly.

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

IDNA label empty or too long - python

Related

Web Scraping in HTML to using py-script

error occurs when I use you-get project on Github

Getting UrlError While running Python code for extracting Url from in Ubuntu

Comparing two URLs to see if path traversal is available by reading the content of page in python

urlopen error [Errno 11001] getaddrinfo failed

Categories

Resources