Web Scraping in HTML to using py-script - python

<body class="white-vertion black-bg">
<!-- Start Loader -->
<p>
<py-script>
import ssl
from urllib.request import urlopen
from bs4 import BeautifulSoup
context = ssl._create_unverified_context()
result = urlopen("https://blog.naver.com/PostList.naver?blogId=woong3164&categoryNo=0&from=postList", context=context)
bsObj = BeautifulSoup(result.read(), "html.parser")
</py-script>
</p>
I used py-script in HTML to do web scraping. However, this error occurred.
'JsException(PythonError: Traceback (most recent call last):
File "/lib/python3.10/urllib/request.py", line 1348, in do_open
h.request(req.get_method(), req.selector, req.data, headers,
File "/lib/python3.10/http/client.py", line 1282, in request
self._send_request(method, url, body, headers, encode_chunked)
File "/lib/python3.10/http/client.py", line 1328, in _send_request
self.endheaders(body, encode_chunked=encode_chunked)
File "/lib/python3.10/http/client.py", line 1277, in endheaders
self._send_output(message_body, encode_chunked=encode_chunked)
File "/lib/python3.10/http/client.py", line 1037, in _send_output
self.send(msg)
File "/lib/python3.10/http/client.py", line 975, in send
self.connect()
File "/lib/python3.10/http/client.py", line 1447, in connect
super().connect()
File "/lib/python3.10/http/client.py", line 941, in connect
self.sock = self._create_connection(
File "/lib/python3.10/socket.py", line 845, in create_connection
raise err
File "/lib/python3.10/socket.py", line 833, in create_connection
sock.connect(sa)
BlockingIOError: [Errno 26] Operation in progress
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/lib/python3.10/site-packages/_pyodide/_base.py", line 429, in eval_code
.run(globals, locals)
File "/lib/python3.10/site-packages/_pyodide/_base.py", line 300, in run
coroutine = eval(self.code, globals, locals)
File "", line 6, in
File "/lib/python3.10/urllib/request.py", line 216, in urlopen
return opener.open(url, data, timeout)
File "/lib/python3.10/urllib/request.py", line 519, in open
response = self._open(req, data)
File "/lib/python3.10/urllib/request.py", line 536, in _open
result = self._call_chain(self.handle_open, protocol, protocol +
File "/lib/python3.10/urllib/request.py", line 496, in _call_chain
result = func(*args)
File "/lib/python3.10/urllib/request.py", line 1391, in https_open
return self.do_open(http.client.HTTPSConnection, req,
File "/lib/python3.10/urllib/request.py", line 1351, in do_open
raise URLError(err) urllib.error.URLError: )'
I think this error was caused by ssl.
How can I solve this error?

Your problem is caused by using unsupported Python packages. The package urllib uses APIs (TCP Sockets) that do not exist in the browser. This is not a limitation of PyScript, no browser application can use socket-based APIs.
The solution is to use supported APIs such as fetch or pyfetch.

Related

Slack unable to connect to send an message

I am using python slack module to send a message to a slack channel and i have also installed all the required modules (slack, slackClient, openssl) as well but I am facing with SLL violation. I am not sure if it is related to proxy or not. Any help would be appreciated.
code:
import slack
import ssl
SLACK_API_TOKEN = "xoxb-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
client = slack.WebClient(token=SLACK_API_TOKEN)
response = client.chat_postMessage(
channel='#channelName',
text="testing")
if (response["ok"]):
print("Notification sent to Slack")
Error:
Traceback (most recent call last):
File "/usr/local/Cellar/python#3.9/3.9.10/Frameworks/Python.framework/Versions/3.9/lib/python3.9/urllib/request.py", line 1346, in do_open
h.request(req.get_method(), req.selector, req.data, headers,
File "/usr/local/Cellar/python#3.9/3.9.10/Frameworks/Python.framework/Versions/3.9/lib/python3.9/http/client.py", line 1285, in request
self._send_request(method, url, body, headers, encode_chunked)
File "/usr/local/Cellar/python#3.9/3.9.10/Frameworks/Python.framework/Versions/3.9/lib/python3.9/http/client.py", line 1331, in _send_request
self.endheaders(body, encode_chunked=encode_chunked)
File "/usr/local/Cellar/python#3.9/3.9.10/Frameworks/Python.framework/Versions/3.9/lib/python3.9/http/client.py", line 1280, in endheaders
self._send_output(message_body, encode_chunked=encode_chunked)
File "/usr/local/Cellar/python#3.9/3.9.10/Frameworks/Python.framework/Versions/3.9/lib/python3.9/http/client.py", line 1040, in _send_output
self.send(msg)
File "/usr/local/Cellar/python#3.9/3.9.10/Frameworks/Python.framework/Versions/3.9/lib/python3.9/http/client.py", line 980, in send
self.connect()
File "/usr/local/Cellar/python#3.9/3.9.10/Frameworks/Python.framework/Versions/3.9/lib/python3.9/http/client.py", line 1454, in connect
self.sock = self._context.wrap_socket(self.sock,
File "/usr/local/Cellar/python#3.9/3.9.10/Frameworks/Python.framework/Versions/3.9/lib/python3.9/ssl.py", line 500, in wrap_socket
return self.sslsocket_class._create(
File "/usr/local/Cellar/python#3.9/3.9.10/Frameworks/Python.framework/Versions/3.9/lib/python3.9/ssl.py", line 1040, in _create
self.do_handshake()
File "/usr/local/Cellar/python#3.9/3.9.10/Frameworks/Python.framework/Versions/3.9/lib/python3.9/ssl.py", line 1309, in do_handshake
self._sslobj.do_handshake()
ssl.SSLEOFError: EOF occurred in violation of protocol (_ssl.c:1129)
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/Users/sai/Downloads/Gen3/POC_TEST.py", line 19, in <module>
response = client.chat_postMessage(
File "/usr/local/lib/python3.9/site-packages/slack_sdk/web/legacy_client.py", line 2091, in chat_postMessage
return self.api_call("chat.postMessage", json=kwargs)
File "/usr/local/lib/python3.9/site-packages/slack_sdk/web/legacy_base_client.py", line 167, in api_call
return self._sync_send(api_url=api_url, req_args=req_args)
File "/usr/local/lib/python3.9/site-packages/slack_sdk/web/legacy_base_client.py", line 258, in _sync_send
return self._urllib_api_call(
File "/usr/local/lib/python3.9/site-packages/slack_sdk/web/legacy_base_client.py", line 370, in _urllib_api_call
response = self._perform_urllib_http_request(url=url, args=request_args)
File "/usr/local/lib/python3.9/site-packages/slack_sdk/web/legacy_base_client.py", line 535, in _perform_urllib_http_request
raise err
File "/usr/local/lib/python3.9/site-packages/slack_sdk/web/legacy_base_client.py", line 496, in _perform_urllib_http_request
resp = opener.open(req, timeout=self.timeout) # skipcq: BAN-B310
File "/usr/local/Cellar/python#3.9/3.9.10/Frameworks/Python.framework/Versions/3.9/lib/python3.9/urllib/request.py", line 517, in open
response = self._open(req, data)
File "/usr/local/Cellar/python#3.9/3.9.10/Frameworks/Python.framework/Versions/3.9/lib/python3.9/urllib/request.py", line 534, in _open
result = self._call_chain(self.handle_open, protocol, protocol +
File "/usr/local/Cellar/python#3.9/3.9.10/Frameworks/Python.framework/Versions/3.9/lib/python3.9/urllib/request.py", line 494, in _call_chain
result = func(*args)
File "/usr/local/Cellar/python#3.9/3.9.10/Frameworks/Python.framework/Versions/3.9/lib/python3.9/urllib/request.py", line 1389, in https_open
return self.do_open(http.client.HTTPSConnection, req,
File "/usr/local/Cellar/python#3.9/3.9.10/Frameworks/Python.framework/Versions/3.9/lib/python3.9/urllib/request.py", line 1349, in do_open
raise URLError(err)
urllib.error.URLError: <urlopen error EOF occurred in violation of protocol (_ssl.c:1129)>
Thanks

Python script crashes when fail to connect to server

I'm testing my Python script and when server turned off my script crashes. How can I change this, to make connection again. Now, my script is crashing when it fails to connect to the server. Here is my script:
import urllib.request
import json
def connectToServer():
with urllib.request.urlopen("http://localhost:5000/user/connect") as url:
data = json.loads(url.read().decode())
Here is error:
Traceback (most recent call last):
File "C:\Users\Rostik\AppData\Local\Programs\Python\Python39\lib\urllib\request.py", line 1346, in do_open
h.request(req.get_method(), req.selector, req.data, headers,
File "C:\Users\Rostik\AppData\Local\Programs\Python\Python39\lib\http\client.py", line 1255, in request
self._send_request(method, url, body, headers, encode_chunked)
File "C:\Users\Rostik\AppData\Local\Programs\Python\Python39\lib\http\client.py", line 1301, in _send_request
self.endheaders(body, encode_chunked=encode_chunked)
File "C:\Users\Rostik\AppData\Local\Programs\Python\Python39\lib\http\client.py", line 1250, in endheaders
self._send_output(message_body, encode_chunked=encode_chunked)
File "C:\Users\Rostik\AppData\Local\Programs\Python\Python39\lib\http\client.py", line 1010, in _send_output
self.send(msg)
File "C:\Users\Rostik\AppData\Local\Programs\Python\Python39\lib\http\client.py", line 950, in send
self.connect()
File "C:\Users\Rostik\AppData\Local\Programs\Python\Python39\lib\http\client.py", line 921, in connect
self.sock = self._create_connection(
File "C:\Users\Rostik\AppData\Local\Programs\Python\Python39\lib\socket.py", line 843, in create_connection
raise err
File "C:\Users\Rostik\AppData\Local\Programs\Python\Python39\lib\socket.py", line 831, in create_connection
sock.connect(sa)
ConnectionRefusedError: [WinError 10061] Подключение не установлено, т.к. конечный компьютер отверг запрос на подключение
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "e:\Projects\Hacker_Pro Python\main.py", line 4, in <module>
server.connectToServer()
File "e:\Projects\Hacker_Pro Python\server.py", line 8, in connectToServer
with urllib.request.urlopen("http://localhost:5000/user/connect") as url:
File "C:\Users\Rostik\AppData\Local\Programs\Python\Python39\lib\urllib\request.py", line 214, in urlopen
return opener.open(url, data, timeout)
File "C:\Users\Rostik\AppData\Local\Programs\Python\Python39\lib\urllib\request.py", line 517, in open
response = self._open(req, data)
File "C:\Users\Rostik\AppData\Local\Programs\Python\Python39\lib\urllib\request.py", line 534, in _open
result = self._call_chain(self.handle_open, protocol, protocol +
File "C:\Users\Rostik\AppData\Local\Programs\Python\Python39\lib\urllib\request.py", line 494, in _call_chain
result = func(*args)
File "C:\Users\Rostik\AppData\Local\Programs\Python\Python39\lib\urllib\request.py", line 1375, in http_open
return self.do_open(http.client.HTTPConnection, req)
File "C:\Users\Rostik\AppData\Local\Programs\Python\Python39\lib\urllib\request.py", line 1349, in do_open
raise URLError(err)
urllib.error.URLError: <urlopen error [WinError 10061] Подключение не установлено, т.к. конечный компьютер отверг запрос на подключение>
The python try statement may help you.
def connectToServer():
try:
with urllib.request.urlopen("http://localhost:5000/user/connect") as url:
data = json.loads(url.read().decode())
except ConnectionRefusedError as error:
print('connection was refused:\n{}'.format(error))
except:
print('could not connect to server')

Getting UrlError While running Python code for extracting Url from in Ubuntu

Below is the stack trace at the terminal end in Ubuntu
Even my anacinda is taking too much time to open (around 20 minutes)
Traceback (most recent call last):
File "/home/narendra/anaconda3/lib/python3.7/urllib/request.py", line 1317, in do_open
encode_chunked=req.has_header('Transfer-encoding'))
File "/home/narendra/anaconda3/lib/python3.7/http/client.py", line 1244, in request
self._send_request(method, url, body, headers, encode_chunked)
File "/home/narendra/anaconda3/lib/python3.7/http/client.py", line 1290, in _send_request
self.endheaders(body, encode_chunked=encode_chunked)
File "/home/narendra/anaconda3/lib/python3.7/http/client.py", line 1239, in endheaders
self._send_output(message_body, encode_chunked=encode_chunked)
File "/home/narendra/anaconda3/lib/python3.7/http/client.py", line 1026, in _send_output
self.send(msg)
File "/home/narendra/anaconda3/lib/python3.7/http/client.py", line 966, in send
self.connect()
File "/home/narendra/anaconda3/lib/python3.7/http/client.py", line 1406, in connect
super().connect()
File "/home/narendra/anaconda3/lib/python3.7/http/client.py", line 938, in connect
(self.host,self.port), self.timeout, self.source_address)
File "/home/narendra/anaconda3/lib/python3.7/socket.py", line 727, in create_connection
raise err
File "/home/narendra/anaconda3/lib/python3.7/socket.py", line 716, in create_connection
sock.connect(sa)
TimeoutError: [Errno 110] Connection timed out
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "TASK_1.py", line 23, in <module>
response = urllib.request.urlopen(line,context=gcontext)
File "/home/narendra/anaconda3/lib/python3.7/urllib/request.py", line 222, in urlopen
return opener.open(url, data, timeout)
File "/home/narendra/anaconda3/lib/python3.7/urllib/request.py", line 525, in open
response = self._open(req, data)
File "/home/narendra/anaconda3/lib/python3.7/urllib/request.py", line 543, in _open
'_open', req)
File "/home/narendra/anaconda3/lib/python3.7/urllib/request.py", line 503, in _call_chain
result = func(*args)
File "/home/narendra/anaconda3/lib/python3.7/urllib/request.py", line 1360, in https_open
context=self._context, check_hostname=self._check_hostname)
File "/home/narendra/anaconda3/lib/python3.7/urllib/request.py", line 1319, in do_open
raise URLError(err)
urllib.error.URLError: <urlopen error [Errno 110] Connection timed out>
below is my code.
this code is extracting the Url data into the file
it will one by one pick URLs for the url.txt
then it will extract the all page data from that particular URL.
import urllib.request, urllib.error, urllib.parse
import io
import ssl
#localhost, 127.0.0.0/8, ::1, 10.0.0.0/8
# using readline() that reads file line by line.
file1 = open("url.txt", "r")
count = 0
gcontext = ssl.SSLContext()`
for i in range(18):
count += 1
# Getting the next line from file
line = file1.readline()
# if line is empty
# end of file is reached
if not line:
break
response = urllib.request.urlopen(line,context=gcontext)
webContent = response.read()
with io.open("file_" + str(i) + ".txt", 'w', encoding='utf-8') as f:
f.write(webContent)
f.close()

IDNA label empty or too long

I'm trying to run a script of an online course and had to change the source code from python 2 to python 3 syntax. In this script, there is a download of an archive, which I already transformed into:
url = "https://www.cs.cmu.edu/~./enron/enron_mail_20150507.tgz"
urllib.request.urlretrieve(url, filename="../enron_mail_20150507.tgz")
However, something seems to be wrong with the URL, since it gives me the following error:
Traceback (most recent call last):
File "/home/xiaolong/development/Python/udacity_intro_to_machine_learning/localpython/lib/python3.5/encodings/idna.py", line 165, in encode
raise UnicodeError("label empty or too long")
UnicodeError: label empty or too long
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "ud120_code_py35fork/tools/startup.py", line 37, in <module>
data = urllib.request.urlretrieve("http://...")
File "/home/xiaolong/development/Python/udacity_intro_to_machine_learning/localpython/lib/python3.5/urllib/request.py", line 187, in urlretrieve
with contextlib.closing(urlopen(url, data)) as fp:
File "/home/xiaolong/development/Python/udacity_intro_to_machine_learning/localpython/lib/python3.5/urllib/request.py", line 162, in urlopen
return opener.open(url, data, timeout)
File "/home/xiaolong/development/Python/udacity_intro_to_machine_learning/localpython/lib/python3.5/urllib/request.py", line 465, in open
response = self._open(req, data)
File "/home/xiaolong/development/Python/udacity_intro_to_machine_learning/localpython/lib/python3.5/urllib/request.py", line 483, in _open
'_open', req)
File "/home/xiaolong/development/Python/udacity_intro_to_machine_learning/localpython/lib/python3.5/urllib/request.py", line 443, in _call_chain
result = func(*args)
File "/home/xiaolong/development/Python/udacity_intro_to_machine_learning/localpython/lib/python3.5/urllib/request.py", line 1268, in http_open
return self.do_open(http.client.HTTPConnection, req)
File "/home/xiaolong/development/Python/udacity_intro_to_machine_learning/localpython/lib/python3.5/urllib/request.py", line 1240, in do_open
h.request(req.get_method(), req.selector, req.data, headers)
File "/home/xiaolong/development/Python/udacity_intro_to_machine_learning/localpython/lib/python3.5/http/client.py", line 1083, in request
self._send_request(method, url, body, headers)
File "/home/xiaolong/development/Python/udacity_intro_to_machine_learning/localpython/lib/python3.5/http/client.py", line 1128, in _send_request
self.endheaders(body)
File "/home/xiaolong/development/Python/udacity_intro_to_machine_learning/localpython/lib/python3.5/http/client.py", line 1079, in endheaders
self._send_output(message_body)
File "/home/xiaolong/development/Python/udacity_intro_to_machine_learning/localpython/lib/python3.5/http/client.py", line 911, in _send_output
self.send(msg)
File "/home/xiaolong/development/Python/udacity_intro_to_machine_learning/localpython/lib/python3.5/http/client.py", line 854, in send
self.connect()
File "/home/xiaolong/development/Python/udacity_intro_to_machine_learning/localpython/lib/python3.5/http/client.py", line 826, in connect
(self.host,self.port), self.timeout, self.source_address)
File "/home/xiaolong/development/Python/udacity_intro_to_machine_learning/localpython/lib/python3.5/socket.py", line 693, in create_connection
for res in getaddrinfo(host, port, 0, SOCK_STREAM):
File "/home/xiaolong/development/Python/udacity_intro_to_machine_learning/localpython/lib/python3.5/socket.py", line 732, in getaddrinfo
for res in _socket.getaddrinfo(host, port, family, type, proto, flags):
UnicodeError: encoding with 'idna' codec failed (UnicodeError: label empty or too long)
I already tried replacing the ~ and the . in the URL with %7E and %2E, but that didn't help at all.
What's wrong with the URL and how can I fix this?
exact python version: 3.5.1

Python 3 get HTML content

I'm using this code to get the web site html content,
import urllib.request
import lxml.html as lh
req= urllib.request.Request("http://www.ip-adress.com/ip_tracer/157.123.22.11",
headers={'User-Agent' : "Magic Browser"})
html = urllib.request.urlopen(req).read()
doc = lh.fromstring(html)
print (''.join(doc.xpath('.//*[#class="odd"]')[-1].text_content().split()))
I want to get the Organization: Zenith Data Systems.
but it shows some errors
Traceback (most recent call last):
File "/usr/local/python3.2.3/lib/python3.2/urllib/request.py", line 1135, in do_open
h.request(req.get_method(), req.selector, req.data, headers)
File "/usr/local/python3.2.3/lib/python3.2/http/client.py", line 967, in request
self._send_request(method, url, body, headers)
File "/usr/local/python3.2.3/lib/python3.2/http/client.py", line 1005, in _send_request
self.endheaders(body)
File "/usr/local/python3.2.3/lib/python3.2/http/client.py", line 963, in endheaders
self._send_output(message_body)
File "/usr/local/python3.2.3/lib/python3.2/http/client.py", line 808, in _send_output
self.send(msg)
File "/usr/local/python3.2.3/lib/python3.2/http/client.py", line 746, in send
self.connect()
File "/usr/local/python3.2.3/lib/python3.2/http/client.py", line 724, in connect
self.timeout, self.source_address)
File "/usr/local/python3.2.3/lib/python3.2/socket.py", line 404, in create_connection
raise err
File "/usr/local/python3.2.3/lib/python3.2/socket.py", line 395, in create_connection
sock.connect(sa)
socket.error: [Errno 111] Connection refused
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "ext.py", line 4, in <module>
html = urllib.request.urlopen(req).read()
File "/usr/local/python3.2.3/lib/python3.2/urllib/request.py", line 138, in urlopen
return opener.open(url, data, timeout)
File "/usr/local/python3.2.3/lib/python3.2/urllib/request.py", line 369, in open
response = self._open(req, data)
File "/usr/local/python3.2.3/lib/python3.2/urllib/request.py", line 387, in _open
'_open', req)
File "/usr/local/python3.2.3/lib/python3.2/urllib/request.py", line 347, in _call_chain
result = func(*args)
File "/usr/local/python3.2.3/lib/python3.2/urllib/request.py", line 1155, in http_open
return self.do_open(http.client.HTTPConnection, req)
File "/usr/local/python3.2.3/lib/python3.2/urllib/request.py", line 1138, in do_open
raise URLError(err)
urllib.error.URLError: <urlopen error [Errno 111] Connection refused>}
How to solve it. Thanks,
Basically, Connection Refused means only registered users are allowed to access the page, or server under heavy maintenance or similar reasons.
From your above code, if you want to handle errors you may try using try and except like below code:
try:
req= urllib.request.Request("http://www.ip-adress.com/ip_tracer/157.123.22.11",headers={'User-Agent' : "Magic Browser"})
html = urllib.request.urlopen(req).read()
doc = lh.fromstring(html)
print (''.join(doc.xpath('.//*[#class="odd"]')[-1].text_content().split()))
except urllib.error.URLError as e:
print(e.reason)

Categories