I work on windows 7
this is my code
def Rup(x, y, w):
odleglosc = np.dot(x,w)-y
cost = np.sum(odleglosc**2) / (2*np.shape(x)[0])
return odleglosc, cost
def REG(data_1, data_2, data_3, Tu, cou):
i = 0
while i < cou:
dif, cost = Rup(data_1, data_2, data_3)
grad = np.dot(data_1.transpose(), dif) / np.shape(data_1)[0]
data_3 = data_3 - Tu * grad
if i%200==0:
print('Wyliczony error w ' + str(i) + " iteracji: ", cost)
i+=1;
return data_3
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.preprocessing import LabelEncoder
_DANE = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data';
iris = pd.read_csv(_DANE, names=['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'label'])
le = LabelEncoder()
iris['label'] = le.fit_transform(iris['label'])
X = np.array(iris.drop(['petal_width'], axis=1))
y = np.array(iris['petal_width'])
iris.head()
cros = 1/1000
coun= 10000
_, features = np.shape(X)
wagi = np.zeros(features)
wektor = REG(X, y, wagi, cros, coun)
print('--------------------------------------------------')
print(wektor)
print('--------------------------------------------------')
dif, cost = Rup(X, y, wektor)
print('Szukany Error', cost)
the error message looks as follows
Traceback (most recent call last):
File "C:\Users\lukasz\AppData\Local\Programs\Python\Python38-32\lib\urllib\request.py", line 1319, in do_open
h.request(req.get_method(), req.selector, req.data, headers,
File "C:\Users\lukasz\AppData\Local\Programs\Python\Python38-32\lib\http\client.py", line 1230, in request
self._send_request(method, url, body, headers, encode_chunked)
File "C:\Users\lukasz\AppData\Local\Programs\Python\Python38-32\lib\http\client.py", line 1276, in _send_request
self.endheaders(body, encode_chunked=encode_chunked)
File "C:\Users\lukasz\AppData\Local\Programs\Python\Python38-32\lib\http\client.py", line 1225, in endheaders
self._send_output(message_body, encode_chunked=encode_chunked)
File "C:\Users\lukasz\AppData\Local\Programs\Python\Python38-32\lib\http\client.py", line 1004, in _send_output
self.send(msg)
File "C:\Users\lukasz\AppData\Local\Programs\Python\Python38-32\lib\http\client.py", line 944, in send
self.connect()
File "C:\Users\lukasz\AppData\Local\Programs\Python\Python38-32\lib\http\client.py", line 1399, in connect
self.sock = self._context.wrap_socket(self.sock,
File "C:\Users\lukasz\AppData\Local\Programs\Python\Python38-32\lib\ssl.py", line 500, in wrap_socket
return self.sslsocket_class._create(
File "C:\Users\lukasz\AppData\Local\Programs\Python\Python38-32\lib\ssl.py", line 1040, in _create
self.do_handshake()
File "C:\Users\lukasz\AppData\Local\Programs\Python\Python38-32\lib\ssl.py", line 1309, in do_handshake
self._sslobj.do_handshake()
ssl.SSLCertVerificationError: [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: self signed certificate in certificate chain (_ssl.c:1108)
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:/Users/lukasz/PycharmProjects/miw/test.py", line 26, in
iris = pd.read_csv(_DANE, names=['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'label'])
File "C:\Users\lukasz\PycharmProjects\miw\venv\lib\site-packages\pandas\io\parsers.py", line 685, in parser_f
return _read(filepath_or_buffer, kwds)
File "C:\Users\lukasz\PycharmProjects\miw\venv\lib\site-packages\pandas\io\parsers.py", line 439, in _read
fp_or_buf, _, compression, should_close = get_filepath_or_buffer(
File "C:\Users\lukasz\PycharmProjects\miw\venv\lib\site-packages\pandas\io\common.py", line 196, in get_filepath_or_buffer
req = urlopen(filepath_or_buffer)
File "C:\Users\lukasz\AppData\Local\Programs\Python\Python38-32\lib\urllib\request.py", line 222, in urlopen
return opener.open(url, data, timeout)
File "C:\Users\lukasz\AppData\Local\Programs\Python\Python38-32\lib\urllib\request.py", line 525, in open
response = self._open(req, data)
File "C:\Users\lukasz\AppData\Local\Programs\Python\Python38-32\lib\urllib\request.py", line 542, in _open
result = self._call_chain(self.handle_open, protocol, protocol +
File "C:\Users\lukasz\AppData\Local\Programs\Python\Python38-32\lib\urllib\request.py", line 502, in _call_chain
result = func(*args)
File "C:\Users\lukasz\AppData\Local\Programs\Python\Python38-32\lib\urllib\request.py", line 1362, in https_open
return self.do_open(http.client.HTTPSConnection, req,
File "C:\Users\lukasz\AppData\Local\Programs\Python\Python38-32\lib\urllib\request.py", line 1322, in do_open
raise URLError(err)
urllib.error.URLError:
Process finished with exit code 1
with the code should be all right because it works correctly on the pages of the online compiler
I don't know how to deal with this problem
please help me
You can provide an SSL context and disable the verification.
import ssl
ssl._create_default_https_context = ssl._create_unverified_context
Related
This is my code:-
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
dataset = pd.read_csv('https://raw.githubusercontent.com/mk- gurucharan/Classification/master /SocialNetworkAds.csv')X = dataset.iloc[:, [0, 1]].values
y = dataset.iloc[:, 2].valuesdataset.head(5)
The error it shows is:-
/Users/apple/PycharmProjects/pythonProject4 /venv/bin/python /Users/apple/PycharmProjects /pythonProject4/main.py
File "/Users/apple/PycharmProjects /pythonProject4/main.py", line 4
dataset = pd.read_csv('https://raw.githubusercontent.com/mk- gurucharan/Classification/master /SocialNetworkAds.csv')X = dataset.iloc[:, [0, 1]].values
^
SyntaxError: invalid syntax
Process finished with exit code 1
Why is it not able to read data from github.
Edit: This is my new code after reading the comments:-
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
dataset = pd.read_csv('https://raw.githubusercontent.com/mk- gurucharan/Classification/master/SocialNetworkAds.csv')
X = dataset.iloc[:, [0, 1]].values
y = dataset.iloc[:, 2].values
print(dataset)
and the error it shows is:-
/Users/apple/PycharmProjects/pythonProject4/venv/bin/python /Users/apple/PycharmProjects/pythonProject4/main.py
Traceback (most recent call last):
File "/Library/Frameworks/Python.framework/Versions/3.10/lib /python3.10/urllib/request.py", line 1348, in do_open
h.request(req.get_method(), req.selector, req.data, headers,
File "/Library/Frameworks/Python.framework/Versions/3.10/lib /python3.10/http/client.py", line 1282, in request
self._send_request(method, url, body, headers, encode_chunked)
File "/Library/Frameworks/Python.framework/Versions/3.10/lib /python3.10/http/client.py", line 1328, in _send_request
self.endheaders(body, encode_chunked=encode_chunked)
File "/Library/Frameworks/Python.framework/Versions/3.10/lib /python3.10/http/client.py", line 1277, in endheaders
self._send_output(message_body, encode_chunked=encode_chunked)
File "/Library/Frameworks/Python.framework/Versions/3.10/lib /python3.10/http/client.py", line 1037, in _send_output
self.send(msg)
File "/Library/Frameworks/Python.framework/Versions/3.10/lib /python3.10/http/client.py", line 975, in send
self.connect()
File "/Library/Frameworks/Python.framework/Versions/3.10/lib /python3.10/http/client.py", line 1447, in connect
super().connect()
File "/Library/Frameworks/Python.framework/Versions/3.10/lib /python3.10/http/client.py", line 941, in connect
self.sock = self._create_connection(
File "/Library/Frameworks/Python.framework/Versions/3.10/lib /python3.10/socket.py", line 845, in create_connection
raise err
File "/Library/Frameworks/Python.framework/Versions/3.10/lib /python3.10/socket.py", line 833, in create_connection
sock.connect(sa)
TimeoutError: [Errno 60] Operation timed out
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/Users/apple/PycharmProjects/pythonProject4/main.py", line 4, in <module>
dataset = pd.read_csv('https://raw.githubusercontent.com /mk-gurucharan/Classification/master/SocialNetworkAds.csv')
File "/Users/apple/PycharmProjects/pythonProject4/venv/lib /python3.10/site-packages/pandas/util/_decorators.py", line 211, in wrapper
return func(*args, **kwargs)
File "/Users/apple/PycharmProjects/pythonProject4/venv/lib /python3.10/site-packages/pandas/util/_decorators.py", line 331, in wrapper
return func(*args, **kwargs)
File "/Users/apple/PycharmProjects/pythonProject4/venv/lib/python3.10/site-packages/pandas/io/parsers/readers.py", line 950, in read_csv
return _read(filepath_or_buffer, kwds)
File "/Users/apple/PycharmProjects/pythonProject4/venv/lib /python3.10/site-packages/pandas/io/parsers/readers.py", line 605, in _read
parser = TextFileReader(filepath_or_buffer, **kwds)
File "/Users/apple/PycharmProjects/pythonProject4/venv/lib /python3.10/site-packages/pandas/io/parsers/readers.py", line 1442, in __init__
self._engine = self._make_engine(f, self.engine)
File "/Users/apple/PycharmProjects/pythonProject4/venv/lib /python3.10/site-packages/pandas/io/parsers/readers.py", line 1735, in _make_engine
self.handles = get_handle(
File "/Users/apple/PycharmProjects/pythonProject4/venv/lib/python3.10/site-packages/pandas/io/common.py", line 713, in get_handle
ioargs = _get_filepath_or_buffer(
File "/Users/apple/PycharmProjects/pythonProject4/venv/lib/python3.10/site-packages/pandas/io/common.py", line 363, in _get_filepath_or_buffer
with urlopen(req_info) as req:
File "/Users/apple/PycharmProjects/pythonProject4/venv/lib /python3.10/site-packages/pandas/io/common.py", line 265, in urlopen
return urllib.request.urlopen(*args, **kwargs)
File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/urllib/request.py", line 216, in urlopen
return opener.open(url, data, timeout)
File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/urllib/request.py", line 519, in open
response = self._open(req, data)
File "/Library/Frameworks/Python.framework/Versions/3.10/lib /python3.10/urllib/request.py", line 536, in _open
result = self._call_chain(self.handle_open, protocol, protocol +
File "/Library/Frameworks/Python.framework/Versions/3.10/lib /python3.10/urllib/request.py", line 496, in _call_chain
result = func(*args)
File "/Library/Frameworks/Python.framework/Versions/3.10/lib /python3.10/urllib/request.py", line 1391, in https_open
return self.do_open(http.client.HTTPSConnection, req,
File "/Library/Frameworks/Python.framework/Versions/3.10/lib /python3.10/urllib/request.py", line 1351, in do_open
raise URLError(err)
urllib.error.URLError: <urlopen error [Errno 60] Operation timed out>
Process finished with exit code 1
How to tackle it ? I guess it is still not reading the data correctly. What should I do. How to remove these errors. Everything is correct according to me.
Remove whitespace from url?
# HERE --v --v
dataset = pd.read_csv('https://raw.githubusercontent.com/mk-gurucharan/Classification/master/SocialNetworkAds.csv')
X = dataset.iloc[:, [0, 1]].values
y = dataset.iloc[:, 2].values
print(dataset)
# Output
Age EstimatedSalary Purchased
0 19 19000 0
1 35 20000 0
2 26 43000 0
3 27 57000 0
4 19 76000 0
.. ... ... ...
395 46 41000 1
396 51 23000 1
397 50 20000 1
398 36 33000 0
399 49 36000 1
[400 rows x 3 columns]
I currently work on a project to collect stock data and to export them in an excel worksheet as it is simply annoying to copy paste them.
Basically, when I run the script, everything is working well until .to_excel function.
I wondered if this library was working in Repl.it as I am working on it.
Just take a look to the code below:
from yahoofinancials import YahooFinancials
import yfinance as yf
from datetime import date, timedelta
from tabulate import tabulate
import pandas as pd
import numpy as np
import openpyxl
price = []
vol = []
tickers = ['AI.PA','AIR.PA','AMZN','AAPL','MT.AS','CS.PA','BNP.PA','^FCHI','CA.PA','CO.PA','ACA.PA','BN.PA','^GDAXI','KER.PA','OR.PA','MC.PA','^IXIC','NFLX','ORA.PA','RI.PA','RNO.PA','^GSPC','SAN.PA','GLE.PA','STLA.PA','STM.MI','TSLA','TTE.PA','FR.PA','BZ=F','AC.PA','AF.PA','SAF.PA']
def volatility(ticker):
stock_symbol = ticker
end_time = date.today()
start_time = end_time - timedelta(days=365)
end = end_time.strftime('%Y-%m-%d')
start = start_time.strftime('%Y-%m-%d')
json_prices = YahooFinancials(stock_symbol
).get_historical_price_data(start, end, 'daily')
prices = pd.DataFrame(json_prices[stock_symbol]
['prices'])[['formatted_date', 'close']]
prices.sort_index(ascending=False, inplace=True)
prices['returns'] = (np.log(prices.close /
prices.close.shift(-1)))
daily_std = np.std(prices.returns)
std = daily_std * 252 ** 0.5
df = yf.Ticker(ticker)
last_price = df.history(period='1d')['Close'][-1]
price.append(last_price)
vol.append(std)
for stock in tickers:
volatility(stock)
tb = {'TICKER':[tickers], 'Price':[price], '52w_vol':[vol]}
df2 = pd.DataFrame(tb)
df2.to_excel(r'path\Inliner_pricer.xlsm', sheet_name='Export_python', index = False)
Following is the error message :
Traceback (most recent call last):
File "main.py", line 47, in <module>
df2.to_excel(r'path/Inliner_pricer.xlsm', sheet_name='Export_python', index = False)
File "/home/runner/LovableLuxuriousCircle/venv/lib/python3.8/site-packages/pandas/core/generic.py", line 2345
Traceback (most recent call last):
File "main.py", line 47, in <module>
df2.to_excel(r'path/Inliner_pricer.xlsm', sheet_name='Export_python', index = False)
File "/home/runner/LovableLuxuriousCircle/venv/lib/python3.8/site-packages/pandas/core/generic.py", line 2345, in to_excel
formatter.write(
File "/home/runner/LovableLuxuriousCircle/venv/lib/python3.8/site-packages/pandas/io/formats/excel.py", line 888, in write
writer = ExcelWriter( # type: ignore[abstract]
File "/home/runner/LovableLuxuriousCircle/venv/lib/python3.8/site-packages/pandas/io/excel/_openpyxl.py", line 53, in __init__
super().__init__(
File "/home/runner/LovableLuxuriousCircle/venv/lib/python3.8/site-packages/pandas/io/excel/_base.py", line 1106, in __init__
self.handles = get_handle(
File "/home/runner/LovableLuxuriousCircle/venv/lib/python3.8/site-packages/pandas/io/common.py", line 670, in get_handle
ioargs = _get_filepath_or_buffer(
File "/home/runner/LovableLuxuriousCircle/venv/lib/python3.8/site-packages/pandas/io/common.py", line 339, in _get_filepath_or_buffer
with urlopen(req_info) as req:
File "/home/runner/LovableLuxuriousCircle/venv/lib/python3.8/site-packages/pandas/io/common.py", line 239, in urlopen
return urllib.request.urlopen(*args, **kwargs)
File "/nix/store/p21fdyxqb3yqflpim7g8s1mymgpnqiv7-python3-3.8.12/lib/python3.8/urllib/request.py", line 222, in urlopen
return opener.open(url, data, timeout)
File "/nix/store/p21fdyxqb3yqflpim7g8s1mymgpnqiv7-python3-3.8.12/lib/python3.8/urllib/request.py", line 525, in open
response = self._open(req, data)
File "/nix/store/p21fdyxqb3yqflpim7g8s1mymgpnqiv7-python3-3.8.12/lib/python3.8/urllib/request.py", line 542, in _open
result = self._call_chain(self.handle_open, protocol, protocol +
File "/nix/store/p21fdyxqb3yqflpim7g8s1mymgpnqiv7-python3-3.8.12/lib/python3.8/urllib/request.py", line 502, in _call_chain
result = func(*args)
File "/nix/store/p21fdyxqb3yqflpim7g8s1mymgpnqiv7-python3-3.8.12/lib/python3.8/urllib/request.py", line 1397, in https_open
return self.do_open(http.client.HTTPSConnection, req,
File "/nix/store/p21fdyxqb3yqflpim7g8s1mymgpnqiv7-python3-3.8.12/lib/python3.8/urllib/request.py", line 1354, in do_open
h.request(req.get_method(), req.selector, req.data, headers,
File "/nix/store/p21fdyxqb3yqflpim7g8s1mymgpnqiv7-python3-3.8.12/lib/python3.8/http/client.py", line 1256, in request
self._send_request(method, url, body, headers, encode_chunked)
File "/nix/store/p21fdyxqb3yqflpim7g8s1mymgpnqiv7-python3-3.8.12/lib/python3.8/http/client.py", line 1267, in _send_request
self.putrequest(method, url, **skips)
File "/nix/store/p21fdyxqb3yqflpim7g8s1mymgpnqiv7-python3-3.8.12/lib/python3.8/http/client.py", line 1101, in putrequest
self._validate_path(url)
File "/nix/store/p21fdyxqb3yqflpim7g8s1mymgpnqiv7-python3-3.8.12/lib/python3.8/http/client.py", line 1201, in _validate_path
raise InvalidURL(f"URL can't contain control characters. {url!r} "
http.client.InvalidURL: URL can't contain control characters. 'path\Inliner_pricer.xlsm' (found at least ' ')
Thanks in advance.
Here is a class I wrote to get billboard hot 100 songs by date.
The class uses requests to get website html text
It then uses beautifulsoup to parse the html
The parsing works well
the problem is the intermittent connection errors
import json
import time
from bs4 import BeautifulSoup
import requests
import datetime as DT
class BillBoardScraper():
def __init__(self) -> None:
self.top_100 = None
self.scraped_chart = None
def _scrape_chart(self, date):
url = 'https://www.billboard.com/charts/hot-100'
headers = {
"User-Agent": "Mozilla/5.0"
}
r = requests.get(
f'{url}/{date}', headers=headers)
bill_board_100_soup = BeautifulSoup(r.text, 'html.parser')
r = None
bill_board_100_results_soup = bill_board_100_soup.find_all(
"div", "o-chart-results-list-row-container")
return bill_board_100_results_soup
def _get_song_and_artist(self, idx):
for result_item in self.scraped_chart[idx].find_all('li'):
segment_struct = [tag.name for tag in result_item if tag.name]
if segment_struct == ['h3', 'span']:
song_and_artist = []
for tag in result_item:
if tag.string.strip():
song_and_artist.append(tag.string.strip())
return song_and_artist
def run_parser_and_archive_data(self, date):
self.top_100 = {}
self.scraped_chart = self._scrape_chart(date)
for i in range(0, 100):
song, artist = self._get_song_and_artist(i)
self.top_100[i+1] = {"track": song, "artist": artist, "date": date}
json_string = json.dumps(self.top_100)
with open(f'data_billboard/billboard_hot100_{date}.json', 'w') as outfile:
json.dump(json_string, outfile)
date = DT.date(2010, 3, 19)
n_weeks = 520
c_week = 1
while c_week <= n_weeks:
print(str(date))
top100 = BillBoardScraper()
top100.run_parser_and_archive_data(str(date))
date = date - DT.timedelta(days=7)
time.sleep(10)
Sporadically I receive the following error. Why does this happen? What can I do to mitigate this? Any feedback is appreciated
Traceback (most recent call last):
File "C:\Users\{USER_NAME}\AppData\Local\Programs\Python\Python37\lib\site-packages\urllib3\connectionpool.py", line 672, in urlopen
chunked=chunked,
File "C:\Users\{USER_NAME}\AppData\Local\Programs\Python\Python37\lib\site-packages\urllib3\connectionpool.py", line 421, in _make_request
six.raise_from(e, None)
File "<string>", line 3, in raise_from
File "C:\Users\{USER_NAME}\AppData\Local\Programs\Python\Python37\lib\site-packages\urllib3\connectionpool.py", line 416, in _make_request
httplib_response = conn.getresponse()
File "C:\Users\{USER_NAME}\AppData\Local\Programs\Python\Python37\lib\http\client.py", line 1344, in getresponse
response.begin()
File "C:\Users\{USER_NAME}\AppData\Local\Programs\Python\Python37\lib\http\client.py", line 306, in begin
version, status, reason = self._read_status()
File "C:\Users\{USER_NAME}\AppData\Local\Programs\Python\Python37\lib\http\client.py", line 275, in _read_status
raise RemoteDisconnected("Remote end closed connection without"
http.client.RemoteDisconnected: Remote end closed connection without response
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\Users\{USER_NAME}\AppData\Local\Programs\Python\Python37\lib\site-packages\requests\adapters.py", line 449, in send
timeout=timeout
File "C:\Users\{USER_NAME}\AppData\Local\Programs\Python\Python37\lib\site-packages\urllib3\connectionpool.py", line 720, in urlopen
method, url, error=e, _pool=self, _stacktrace=sys.exc_info()[2]
File "C:\Users\{USER_NAME}\AppData\Local\Programs\Python\Python37\lib\site-packages\urllib3\util\retry.py", line 400, in increment
raise six.reraise(type(error), error, _stacktrace)
File "C:\Users\{USER_NAME}\AppData\Local\Programs\Python\Python37\lib\site-packages\urllib3\packages\six.py", line 734, in reraise
raise value.with_traceback(tb)
File "C:\Users\{USER_NAME}\AppData\Local\Programs\Python\Python37\lib\site-packages\urllib3\connectionpool.py", line 672, in urlopen
chunked=chunked,
File "C:\Users\{USER_NAME}\AppData\Local\Programs\Python\Python37\lib\site-packages\urllib3\connectionpool.py", line 421, in _make_request
six.raise_from(e, None)
File "<string>", line 3, in raise_from
File "C:\Users\{USER_NAME}\AppData\Local\Programs\Python\Python37\lib\site-packages\urllib3\connectionpool.py", line 416, in _make_request
httplib_response = conn.getresponse()
File "C:\Users\{USER_NAME}\AppData\Local\Programs\Python\Python37\lib\http\client.py", line 1344, in getresponse
response.begin()
File "C:\Users\{USER_NAME}\AppData\Local\Programs\Python\Python37\lib\http\client.py", line 306, in begin
version, status, reason = self._read_status()
File "C:\Users\{USER_NAME}\AppData\Local\Programs\Python\Python37\lib\http\client.py", line 275, in _read_status
raise RemoteDisconnected("Remote end closed connection without"
urllib3.exceptions.ProtocolError: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "d:\airflow\plugins\api_billboard100.py", line 55, in <module>
top100.run_parser_and_archive_data(str(date))
File "d:\airflow\plugins\api_billboard100.py", line 39, in run_parser_and_archive_data
self.scraped_chart = self._scrape_chart(date)
File "d:\airflow\plugins\api_billboard100.py", line 20, in _scrape_chart
f'{url}/{date}', headers=headers)
File "C:\Users\{USER_NAME}\AppData\Local\Programs\Python\Python37\lib\site-packages\requests\api.py", line 75, in get
return request('get', url, params=params, **kwargs)
File "C:\Users\{USER_NAME}\AppData\Local\Programs\Python\Python37\lib\site-packages\requests\api.py", line 60, in request
return session.request(method=method, url=url, **kwargs)
File "C:\Users\{USER_NAME}\AppData\Local\Programs\Python\Python37\lib\site-packages\requests\sessions.py", line 533, in request
resp = self.send(prep, **send_kwargs)
File "C:\Users\{USER_NAME}\AppData\Local\Programs\Python\Python37\lib\site-packages\requests\sessions.py", line 646, in send
r = adapter.send(request, **kwargs)
File "C:\Users\{USER_NAME}\AppData\Local\Programs\Python\Python37\lib\site-packages\requests\adapters.py", line 498, in send
raise ConnectionError(err, request=request)
requests.exceptions.ConnectionError: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
I am currently working on a project for which I need to download a few thousand citations from PubMed. I am currently using BioPython and have written this code:
from Bio import Entrez
from Bio import Medline
from pandas import *
from sys import argv
import os
Entrez.email = "email"
df = read_csv("/Users/.../Desktop/sr_dataset/adhd/excluded/adhdExcluded.csv")
i=0
withoutMesh = 0
withoutMeshID = ""
withoutAbstract = 0
withoutAbstractID = ""
path = '/Users/.../Desktop/sr_dataset/adhd/excluded'
for index, row in df.iterrows():
print (row.id)
handle = Entrez.efetch(db="pubmed",rettype="medline",retmode="text", id=str(row.id))
records = Medline.parse(handle)
for record in records:
try:
abstract = str(record["AB"])
except:
abstract = "none"
withoutAbstract = withoutAbstract +1
withoutAbstractID = withoutAbstractID + str(row.id) + "\n"
try:
title = str(record["TI"])
except:
title = "none"
try:
mesh = str(record["MH"])
except:
mesh = "none"
withoutMesh = withoutMesh +1
withoutMeshID = withoutMeshID + str(row.id) + "\n"
filename= str(row.id) + '.txt'
filename = os.path.join(path, filename)
file = open(filename, "w")
output = "title: "+str(title) + "\n\n" + "abstract: "+str(abstract) + "\n\n" + "mesh: "+str(mesh) + "\n\n"
file.write(output)
file.close()
print (i)
i=i+1
filename = os.path.join(path, "overview.txt")
file = open(filename, "w")
output = "Without MeSH terms:" + str(withoutMesh) + "\n" + "ID's: "+str(withoutMeshID) + "\n\n" + "Without abstract: "+str(withoutAbstract) + "\n" + "ID's: "+str(withoutAbstractID)
file.write(output)
file.close()
The code works for the first few hundred rows in the table but then stops executing and the error I receive is:
Traceback (most recent call last):
File "/Users/.../anaconda/lib/python3.5/urllib/request.py", line 1254, in do_open
h.request(req.get_method(), req.selector, req.data, headers)
File "/Users/.../anaconda/lib/python3.5/http/client.py", line 1106, in request
self._send_request(method, url, body, headers)
File "/Users/.../anaconda/lib/python3.5/http/client.py", line 1151, in _send_request
self.endheaders(body)
File "/Users/.../anaconda/lib/python3.5/http/client.py", line 1102, in endheaders
self._send_output(message_body)
File "/Users/.../anaconda/lib/python3.5/http/client.py", line 934, in _send_output
self.send(msg)
File "/Users/.../anaconda/lib/python3.5/http/client.py", line 877, in send
self.connect()
File "/Users/.../anaconda/lib/python3.5/http/client.py", line 1260, in connect
server_hostname=server_hostname)
File "/Users/.../anaconda/lib/python3.5/ssl.py", line 377, in wrap_socket
_context=self)
File "/Users/.../anaconda/lib/python3.5/ssl.py", line 752, in __init__
self.do_handshake()
File "/Users/.../anaconda/lib/python3.5/ssl.py", line 988, in do_handshake
self._sslobj.do_handshake()
File "/Users/.../anaconda/lib/python3.5/ssl.py", line 633, in do_handshake
self._sslobj.do_handshake()
ConnectionResetError: [Errno 54] Connection reset by peer
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/Users/.../Desktop/sr_dataset/ace_inhibitor/excluded/pumbedMedline.py", line 18, in <module>
handle = Entrez.efetch(db="pubmed",rettype="medline",retmode="text", id=str(row.id))
File "/Users/.../anaconda/lib/python3.5/site-packages/biopython-1.68-py3.5-macosx-10.6-x86_64.egg/Bio/Entrez/__init__.py", line 180, in efetch
return _open(cgi, variables, post=post)
File "/Users/.../anaconda/lib/python3.5/site-packages/biopython-1.68-py3.5-macosx-10.6-x86_64.egg/Bio/Entrez/__init__.py", line 524, in _open
handle = _urlopen(cgi)
File "/Users/.../anaconda/lib/python3.5/urllib/request.py", line 163, in urlopen
return opener.open(url, data, timeout)
File "/Users/.../anaconda/lib/python3.5/urllib/request.py", line 466, in open
response = self._open(req, data)
File "/Users/.../anaconda/lib/python3.5/urllib/request.py", line 484, in _open
'_open', req)
File "/Users/.../anaconda/lib/python3.5/urllib/request.py", line 444, in _call_chain
result = func(*args)
File "/Users/.../anaconda/lib/python3.5/urllib/request.py", line 1297, in https_open
context=self._context, check_hostname=self._check_hostname)
File "/Users/.../anaconda/lib/python3.5/urllib/request.py", line 1256, in do_open
raise URLError(err)
urllib.error.URLError: <urlopen error [Errno 54] Connection reset by peer>
Here are the first few columns of the CSV file:
id
10029645
10073846
10078088
10080457
10088066
...
Biopython does follow the "up to three queries per second rule" to avoid abusing the NCBI servers, but you have have missed the first bullet point in our tutorial http://biopython.org/DIST/docs/tutorial/Tutorial.html on the guidelines:
"For any series of more than 100 requests, do this at weekends or
outside USA peak times. This is up to you to obey."
That said, sometimes you will get intermittent errors from Entrez, and using a try/except block to handle this with a retry is suggested. There is an example in the tutorial.
Every request to this one particular domain now ends in an certificate verify failed (_ssl.c:645)>
I am not sure what caused this.I've been searching for an answer since last night trying to figure out how to fix it, but somehow I cant get it running.
I tried pip uninstall -y certifi && pip install certifi==2015.04.28 but it did not help.
Here is my code:
def trade_spider(max_pages):
page = -1
partner_ID = 2
location_ID = 25
already_printed = set()
for page in range(0,20):
response = urllib.request.urlopen("http://www.getyourguide.de/s/search.json?q=" + str(Region) +"&page=" + str(page))
jsondata = json.loads(response.read().decode("utf-8"))
format = (jsondata['activities'])
g_data = format.strip("'<>()[]\"` ").replace('\'', '\"')
soup = BeautifulSoup(g_data)
hallo = soup.find_all("article", {"class": "activity-card activity-card-horizontal "})
for item in hallo:
headers = item.find_all("h3", {"class": "activity-card-title"})
for header in headers:
header_final = header.text.strip()
if header_final not in already_printed:
already_printed.add(header_final)
prices = item.find_all("span", {"class": "price"})
for price in prices:
#itemStr += ("\t" + price.text.strip().replace(",","")[2:])
price_final = price.text.strip().replace(",","")[2:]
#if itemStr2 not in already_printed:
#print(itemStr2)
#already_printed.add(itemStr2)
deeplinks = item.find_all("a", {"class": "activity-card-link"})
for t in set(t.get("href") for t in deeplinks):
#itemStr += "\t" + t
deeplink_final = t
if deeplink_final not in already_printed:
#print(itemStr3)
already_printed.add(deeplink_final)
Language = "Deutsch"
end_final = "Header: " + header_final + " | " + "Price: " + str(price_final) + " | " + "Deeplink: " + deeplink_final + " | " + "PartnerID: " + str(partner_ID) + " | " + "LocationID: " + str(location_ID)+ " | " + "Language: " + Language
if end_final not in already_printed:
print(end_final)
already_printed.add(end_final)
trade_spider(int(Spider))
This is the ouput:
Traceback (most recent call last):
File "C:\Python34\lib\urllib\request.py", line 1240, in do_open
h.request(req.get_method(), req.selector, req.data, headers)
File "C:\Python34\lib\http\client.py", line 1083, in request
self._send_request(method, url, body, headers)
File "C:\Python34\lib\http\client.py", line 1128, in _send_request
self.endheaders(body)
File "C:\Python34\lib\http\client.py", line 1079, in endheaders
self._send_output(message_body)
File "C:\Python34\lib\http\client.py", line 911, in _send_output
self.send(msg)
File "C:\Python34\lib\http\client.py", line 854, in send
self.connect()
File "C:\Python34\lib\http\client.py", line 1237, in connect
server_hostname=server_hostname)
File "C:\Python34\lib\ssl.py", line 376, in wrap_socket
_context=self)
File "C:\Python34\lib\ssl.py", line 747, in __init__
self.do_handshake()
File "C:\Python34\lib\ssl.py", line 983, in do_handshake
self._sslobj.do_handshake()
File "C:\Python34\lib\ssl.py", line 628, in do_handshake
self._sslobj.do_handshake()
ssl.SSLError: [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed (_ssl.c:645)
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:/Users/Rj/Desktop/ crawling scripts/GetyourGuide_International_Final.py", line 84, in <module>
trade_spider(int(Spider))
File "C:/Users/Raju/Desktop/scripts/GetyourGuide_International_Final.py", line 36, in trade_spider
response = urllib.request.urlopen("http://www.getyourguide.com/s/search.json?q=" + str(Region) +"&page=" + str(page))
File "C:\Python34\lib\urllib\request.py", line 162, in urlopen
return opener.open(url, data, timeout)
File "C:\Python34\lib\urllib\request.py", line 471, in open
response = meth(req, response)
File "C:\Python34\lib\urllib\request.py", line 581, in http_response
'http', request, response, code, msg, hdrs)
File "C:\Python34\lib\urllib\request.py", line 503, in error
result = self._call_chain(*args)
File "C:\Python34\lib\urllib\request.py", line 443, in _call_chain
result = func(*args)
File "C:\Python34\lib\urllib\request.py", line 686, in http_error_302
return self.parent.open(new, timeout=req.timeout)
File "C:\Python34\lib\urllib\request.py", line 465, in open
response = self._open(req, data)
File "C:\Python34\lib\urllib\request.py", line 483, in _open
'_open', req)
File "C:\Python34\lib\urllib\request.py", line 443, in _call_chain
result = func(*args)
File "C:\Python34\lib\urllib\request.py", line 1283, in https_open
context=self._context, check_hostname=self._check_hostname)
File "C:\Python34\lib\urllib\request.py", line 1242, in do_open
raise URLError(err)
urllib.error.URLError:
Can someone help me out? Any feedback is aprreciated:)
I would investigate further by checking if openssl can verify the certificate:
openssl s_client -showcerts -connect www.getyourguide.de:443