I am running a google.py scraping script to get data. The script is reading a csv file and for each line of the csv file it is sacraping a page. After the scraping is done, the script saves the result on the same csv file.
The dataframe is several thousand lines long.
After it was getting captcha results on several lines i interrupted the scraping with Ctrl+C.
I re-ran the script just after, and the length of the dataframe read from the csv file was 3929 lines less long.
this is the output of the the ctrl+C :
^CTraceback (most recent call last):
File "google.py", line 255, in <module>
Scraping().scrape()
File "google.py", line 239, in scrape
self.write_dataframe(df_psys,psy,tel_list, mail_list)
File "google.py", line 143, in write_dataframe
df_psys.to_csv('psychologues.csv',sep=';',index=False)
File "/Users/macbook/.test_requests/lib/python3.8/site-packages/pandas/core/generic.py", line 3563, in to_csv
return DataFrameRenderer(formatter).to_csv(
File "/Users/macbook/.test_requests/lib/python3.8/site-packages/pandas/io/formats/format.py", line 1180, in to_csv
csv_formatter.save()
File "/Users/macbook/.test_requests/lib/python3.8/site-packages/pandas/io/formats/csvs.py", line 261, in save
self._save()
File "/Users/macbook/.test_requests/lib/python3.8/site-packages/pandas/io/formats/csvs.py", line 266, in _save
self._save_body()
File "/Users/macbook/.test_requests/lib/python3.8/site-packages/pandas/io/formats/csvs.py", line 304, in _save_body
self._save_chunk(start_i, end_i)
File "/Users/macbook/.test_requests/lib/python3.8/site-packages/pandas/io/formats/csvs.py", line 311, in _save_chunk
res = df._mgr.to_native_types(**self._number_format)
File "/Users/macbook/.test_requests/lib/python3.8/site-packages/pandas/core/internals/managers.py", line 473, in to_native_types
return self.apply("to_native_types", **kwargs)
File "/Users/macbook/.test_requests/lib/python3.8/site-packages/pandas/core/internals/managers.py", line 304, in apply
applied = getattr(b, f)(**kwargs)
File "/Users/macbook/.test_requests/lib/python3.8/site-packages/pandas/core/internals/blocks.py", line 634, in to_native_types
result = to_native_types(self.values, na_rep=na_rep, quoting=quoting, **kwargs)
File "/Users/macbook/.test_requests/lib/python3.8/site-packages/pandas/core/internals/blocks.py", line 2207, in to_native_types
mask = isna(values)
File "/Users/macbook/.test_requests/lib/python3.8/site-packages/pandas/core/dtypes/missing.py", line 143, in isna
return _isna(obj)
File "/Users/macbook/.test_requests/lib/python3.8/site-packages/pandas/core/dtypes/missing.py", line 172, in _isna
return _isna_array(obj, inf_as_na=inf_as_na)
File "/Users/macbook/.test_requests/lib/python3.8/site-packages/pandas/core/dtypes/missing.py", line 254, in _isna_array
result = _isna_string_dtype(values, inf_as_na=inf_as_na)
File "/Users/macbook/.test_requests/lib/python3.8/site-packages/pandas/core/dtypes/missing.py", line 278, in _isna_string_dtype
result = libmissing.isnaobj2d(values, inf_as_na=inf_as_na)
KeyboardInterrupt
It seems there is an interrupt with the command to_csv, so i am wondering if the data missing comes from that or from a hack/a physical intervention on my computer. I have another keyboard interrupt on a previous run of the script, and there is no to_csv in it:
^CTraceback (most recent call last):
File "google.py", line 255, in <module>
Scraping().scrape()
File "google.py", line 238, in scrape
(tel_list, mail_list) = self.google_scraping(psy, counter)
File "google.py", line 166, in google_scraping
if a.text == "Que s'est-il passé ?":
File "/Users/macbook/.test_requests/lib/python3.8/site-packages/selenium/webdriver/remote/webelement.py", line 77, in text
return self._execute(Command.GET_ELEMENT_TEXT)['value']
File "/Users/macbook/.test_requests/lib/python3.8/site-packages/selenium/webdriver/remote/webelement.py", line 740, in _execute
return self._parent.execute(command, params)
File "/Users/macbook/.test_requests/lib/python3.8/site-packages/selenium/webdriver/remote/webdriver.py", line 428, in execute
response = self.command_executor.execute(driver_command, params)
File "/Users/macbook/.test_requests/lib/python3.8/site-packages/selenium/webdriver/remote/remote_connection.py", line 347, in execute
return self._request(command_info[0], url, body=data)
File "/Users/macbook/.test_requests/lib/python3.8/site-packages/selenium/webdriver/remote/remote_connection.py", line 369, in _request
response = self._conn.request(method, url, body=body, headers=headers)
File "/Users/macbook/.test_requests/lib/python3.8/site-packages/urllib3/request.py", line 74, in request
return self.request_encode_url(
File "/Users/macbook/.test_requests/lib/python3.8/site-packages/urllib3/request.py", line 96, in request_encode_url
return self.urlopen(method, url, **extra_kw)
File "/Users/macbook/.test_requests/lib/python3.8/site-packages/urllib3/poolmanager.py", line 376, in urlopen
response = conn.urlopen(method, u.request_uri, **kw)
File "/Users/macbook/.test_requests/lib/python3.8/site-packages/urllib3/connectionpool.py", line 703, in urlopen
httplib_response = self._make_request(
File "/Users/macbook/.test_requests/lib/python3.8/site-packages/urllib3/connectionpool.py", line 449, in _make_request
six.raise_from(e, None)
File "<string>", line 3, in raise_from
File "/Users/macbook/.test_requests/lib/python3.8/site-packages/urllib3/connectionpool.py", line 444, in _make_request
httplib_response = conn.getresponse()
File "/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.8/lib/python3.8/http/client.py", line 1337, in getresponse
response.begin()
File "/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.8/lib/python3.8/http/client.py", line 307, in begin
version, status, reason = self._read_status()
File "/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.8/lib/python3.8/http/client.py", line 268, in _read_status
line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
File "/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.8/lib/python3.8/socket.py", line 669, in readinto
return self._sock.recv_into(b)
KeyboardInterrupt
I didnt notice the missing lines before recently and I ran the script
3 times after the missing lines appeared. I have tried to look into the history commands of my terminal with history -E 1 | grep google.py but I dont have the times I ran the command, only 1 command is showing up which is the last one i ran.
So I dont really know exactly when the deletion of data happened (in the last 24 hours for sure). I would like to check other system log files but if the hypothesis of deletion comes from a bug of pandas i wont look further in my logs...
What do you think?
Is there a way I can prevent the ctrl+C interrupt to provoke this error?
This is write_dataframe:
def write_dataframe(self,df,psy,tel_list, mail_list):
index=df[df['psy'] == psy].index.values[0]
print('writing dataframes')
df_psys.loc[index,'tel_google']=tel_list
df_psys.loc[index, 'mail_google'] = mail_list
df_psys.to_csv('file.csv',sep=';',index=False)
If I do
try:
write_dataframes(args)
except KeyboardInterrupt:
sys.exit()
Will it be enough to prevent loss of data for a keyboard interupt?
Thank you
Reading the comments and the downvotes, the answer is yes.
Reading this post How to prevent a block of code from being interrupted by KeyboardInterrupt in Python?,
I have implemented the method that seemed more reliable to prevent keyboard interrupt:
import signal
s = signal.signal(signal.SIGINT, signal.SIG_IGN)
My code not to be interrupted
signal.signal(signal.SIGINT, s)
Related
I'm applying the OpenCV on python ,following the guide provided by the official https://github.com/opencv/opencv-python.
I've updated the pip ,and I entered the pip install opencv-contrib-python in the command cell .
At first ,the code run smoothly .However, after about 10mins, there was a wrong.
The first part of it, is bellow :
ERROR: Exception:
Traceback (most recent call last):
File "D:\app\Anaconda3\lib\site-packages\pip\_vendor\urllib3\response.py", line 435, in _error_catcher
yield
File "D:\app\Anaconda3\lib\site-packages\pip\_vendor\urllib3\response.py", line 516, in read
data = self._fp.read(amt) if not fp_closed else b""
File "D:\app\Anaconda3\lib\site-packages\pip\_vendor\cachecontrol\filewrapper.py", line 90, in read
data = self.__fp.read(amt)
File "D:\app\Anaconda3\lib\http\client.py", line 462, in read
n = self.readinto(b)
File "D:\app\Anaconda3\lib\http\client.py", line 506, in readinto
n = self.fp.readinto(b)
File "D:\app\Anaconda3\lib\socket.py", line 704, in readinto
return self._sock.recv_into(b)
File "D:\app\Anaconda3\lib\ssl.py", line 1241, in recv_into
return self.read(nbytes, buffer)
File "D:\app\Anaconda3\lib\ssl.py", line 1099, in read
return self._sslobj.read(len, buffer)
socket. Timeout: The read operation timed out
I also attach the second ERROR part.
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "D:\app\Anaconda3\lib\site-packages\pip\_internal\cli\base_command.py", line 167, in exc_logging_wrapper
status = run_func(*args)
File "D:\app\Anaconda3\lib\site-packages\pip\_internal\cli\req_command.py", line 247, in wrapper
return func(self, options, args)
File "D:\app\Anaconda3\lib\site-packages\pip\_internal\commands\install.py", line 369, in run
requirement_set = resolver.resolve(
File "D:\app\Anaconda3\lib\site-packages\pip\_internal\resolution\resolvelib\resolver.py", line 92, in resolve
result = self._result = resolver.resolve(
File "D:\app\Anaconda3\lib\site-packages\pip\_vendor\resolvelib\resolvers.py", line 481, in resolve
state = resolution.resolve(requirements, max_rounds=max_rounds)
File "D:\app\Anaconda3\lib\site-packages\pip\_vendor\resolvelib\resolvers.py", line 348, in resolve
self._add_to_criteria(self.state.criteria, r, parent=None)
File "D:\app\Anaconda3\lib\site-packages\pip\_vendor\resolvelib\resolvers.py", line 172, in _add_to_criteria
if not criterion.candidates:
File "D:\app\Anaconda3\lib\site-packages\pip\_vendor\resolvelib\structs.py", line 151, in __bool__
return bool(self._sequence)
File "D:\app\Anaconda3\lib\site-packages\pip\_internal\resolution\resolvelib\found_candidates.py", line 155, in __bool__
return any(self)
File "D:\app\Anaconda3\lib\site-packages\pip\_internal\resolution\resolvelib\found_candidates.py", line 143, in <genexpr>
return (c for c in iterator if id(c) not in self._incompatible_ids)
File "D:\app\Anaconda3\lib\site-packages\pip\_internal\resolution\resolvelib\found_candidates.py", line 47, in _iter_built
candidate = func()
File "D:\app\Anaconda3\lib\site-packages\pip\_internal\resolution\resolvelib\factory.py", line 206, in _make_candidate_from_link
self._link_candidate_cache[link] = LinkCandidate(
File "D:\app\Anaconda3\lib\site-packages\pip\_internal\resolution\resolvelib\candidates.py", line 297, in __init__
super().__init__(
File "D:\app\Anaconda3\lib\site-packages\pip\_internal\resolution\resolvelib\candidates.py", line 162, in __init__
self.dist = self._prepare()
File "D:\app\Anaconda3\lib\site-packages\pip\_internal\resolution\resolvelib\candidates.py", line 231, in _prepare
dist = self._prepare_distribution()
File "D:\app\Anaconda3\lib\site-packages\pip\_internal\resolution\resolvelib\candidates.py", line 308, in _prepare_distribution
return preparer.prepare_linked_requirement(self._ireq, parallel_builds=True)
File "D:\app\Anaconda3\lib\site-packages\pip\_internal\operations\prepare.py", line 438, in prepare_linked_requirement
return self._prepare_linked_requirement(req, parallel_builds)
File "D:\app\Anaconda3\lib\site-packages\pip\_internal\operations\prepare.py", line 483, in _prepare_linked_requirement
local_file = unpack_url(
File "D:\app\Anaconda3\lib\site-packages\pip\_internal\operations\prepare.py", line 165, in unpack_url
file = get_http_url(
File "D:\app\Anaconda3\lib\site-packages\pip\_internal\operations\prepare.py", line 106, in get_http_url
from_path, content_type = download(link, temp_dir.path)
File "D:\app\Anaconda3\lib\site-packages\pip\_internal\network\download.py", line 147, in __call__
for chunk in chunks:
File "D:\app\Anaconda3\lib\site-packages\pip\_internal\cli\progress_bars.py", line 53, in _rich_progress_bar
for chunk in iterable:
File "D:\app\Anaconda3\lib\site-packages\pip\_internal\network\utils.py", line 63, in response_chunks
for chunk in response.raw.stream(
File "D:\app\Anaconda3\lib\site-packages\pip\_vendor\urllib3\response.py", line 573, in stream
data = self.read(amt=amt, decode_content=decode_content)
File "D:\app\Anaconda3\lib\site-packages\pip\_vendor\urllib3\response.py", line 538, in read
raise IncompleteRead(self._fp_bytes_read, self.length_remaining)
File "D:\app\Anaconda3\lib\contextlib.py", line 137, in __exit__
self.gen.throw(typ, value, traceback)
File "D:\app\Anaconda3\lib\site-packages\pip\_vendor\urllib3\response.py", line 440, in _error_catcher
raise ReadTimeoutError(self._pool, None, "Read timed out.")
pip._vendor.urllib3.exceptions.ReadTimeoutError: HTTPSConnectionPool(host='files.pythonhosted.org', port=443): Read timed out.
Among which ,the "D:\app\Anaconda3" ,is the place my Anaconda located .
I'm wondering how it came plenty of ERROR . Besides, I'm also long for applying the Opencv on my python correctly
It will be appreciate that if someone can solve my confusion and give some advices about applying.
Thank you !
I solved the problems! It's due to my region. It takes more time to download the packages from abroad. I re-download the package successfully.
try:
r = requests.get(url)
except requests.exceptions.Timeout:
_LOG.info(f"Loop {i} timeout")
continue
I got error as below,
return request(\'get\', url, params=params, **kwargs)\n
File "/venv/lib/python3.6/site-packages/requests/api.py", line 61, in request\n
return session.request(method=method, url=url, **kwargs)\n
File "/venv/lib/python3.6/site-packages/requests/sessions.py", line 542, in request\n resp = self.send(prep, **send_kwargs)\n
File "/venv/lib/python3.6/site-packages/requests/sessions.py", line 655, in send\n r = adapter.send(request, **kwargs)\n
File "/venv/lib/python3.6/site-packages/requests/adapters.py", line 449, in send\n timeout=timeout\n
File "/venv/lib/python3.6/site-packages/urllib3/connectionpool.py", line 706, in urlopen\n chunked=chunked,\n
File "/venv/lib/python3.6/site-packages/urllib3/connectionpool.py", line 382, in _make_request\n self._validate_conn(conn)\n
File "/venv/lib/python3.6/site-packages/urllib3/connectionpool.py", line 1010, in _validate_conn\n conn.connect()\n
File "/venv/lib/python3.6/site-packages/urllib3/connection.py", line 421, in connect\n tls_in_tls=tls_in_tls,\n
File "/venv/lib/python3.6/site-packages/urllib3/util/ssl_.py", line 450, in ssl_wrap_socket\n sock, context, tls_in_tls, server_hostname=server_hostname\n
File "/venv/lib/python3.6/site-packages/urllib3/util/ssl_.py", line 493, in _ssl_wrap_socket_impl\n
return ssl_context.wrap_socket(sock, server_hostname=server_hostname)\n
File "/usr/local/lib/python3.6/ssl.py", line 401, in wrap_socket\n _context=self, _session=session)\n
File "/usr/local/lib/python3.6/ssl.py", line 808, in __init__\n self.do_handshake()\n
File "/usr/local/lib/python3.6/ssl.py", line 1061, in do_handshake\n self._sslobj.do_handshake()\n File "/usr/local/lib/python3.6/ssl.py", line 683, in do_handshake\n self._sslobj.do_handshake()\n
File "/venv/lib/python3.6/site-packages/billiard/common.py", line 125, in _shutdown_cleanup\n
sys.exit(-(256 - signum))\n
File "/venv/lib/python3.6/site-packages/billiard/pool.py", line 280, in exit\n
return _exit()\nSystemExit\n'
Can anyone let me know what is the issue above, I am using celery and request, I got ^^ error. I am confused that if this is celery error or request error. Thanks.
Also my sqlalchemy got the same issue !
File "/venv/lib/python3.6/site-packages/sqlalchemy/engine/base.py", line 976, in execute\n
return self._execute_text(object_, multiparams, params)\n
File "/venv/lib/python3.6/site-packages/sqlalchemy/engine/base.py", line 1151, in _execute_text\n parameters,\n
File "/venv/lib/python3.6/site-packages/sqlalchemy/engine/base.py", line 1288, in _execute_context\n e, statement, parameters, cursor, context\n
File "/venv/lib/python3.6/site-packages/sqlalchemy/engine/base.py", line 1485, in _handle_dbapi_exception\n util.raise_(exc_info[1], with_traceback=exc_info[2])\n
File "/venv/lib/python3.6/site-packages/sqlalchemy/util/compat.py", line 178, in raise_\n raise exception\n
File "/venv/lib/python3.6/site-packages/sqlalchemy/engine/base.py", line 1248, in _execute_context\n cursor, statement, parameters, context\n
File "/venv/lib/python3.6/site-packages/sqlalchemy/engine/default.py", line 588, in do_execute\n cursor.execute(statement, parameters)\n
File "/venv/lib/python3.6/site-packages/MySQLdb/cursors.py", line 247, in execute\n res = self._query(query)\n
File "/venv/lib/python3.6/site-packages/MySQLdb/cursors.py", line 411, in _query\n rowcount = self._do_query(q)\n
File "/venv/lib/python3.6/site-packages/MySQLdb/cursors.py", line 375, in _do_query\n self._do_get_result()\n
File "/venv/lib/python3.6/site-packages/MySQLdb/cursors.py", line 184, in _do_get_result\n self._result = self._get_result()\n
File "/venv/lib/python3.6/site-packages/MySQLdb/cursors.py", line 408, in _get_result\n return self._get_db().store_result()\n
File "/venv/lib/python3.6/site-packages/billiard/common.py", line 125, in _shutdown_cleanup\n sys.exit(-(256 - signum))\n
File "/venv/lib/python3.6/site-packages/billiard/pool.py", line 280, in exit\n
return _exit()\nSystemExit\n'
If you go this issue, please try to update the celery version, sometimes it is from your deprecated celery.
I successfully solved this issue after updating to celery==4.4.2.
Also the message queue retry sometimes need to be disable from sql, it might cause some wait lock timeout issue if celery unexpected retry.
I am attempting to upgrade my tensorflow via conda but am having some issues. I am following the Anaconda installation procedure here.
I am getting an error when I execute the conda create -n tensorflow command.
Here is the error:
Traceback (most recent call last):
File "/Users/madhavthaker/Downloads/anaconda/lib/python2.7/site-packages/conda/exceptions.py", line 479, in conda_exception_handler
return_value = func(*args, **kwargs)
File "/Users/madhavthaker/Downloads/anaconda/lib/python2.7/site-packages/conda/cli/main.py", line 145, in _main
exit_code = args.func(args, p)
File "/Users/madhavthaker/Downloads/anaconda/lib/python2.7/site-packages/conda/cli/main_create.py", line 68, in execute
install(args, parser, 'create')
File "/Users/madhavthaker/Downloads/anaconda/lib/python2.7/site-packages/conda/cli/install.py", line 238, in install
prefix=prefix)
File "/Users/madhavthaker/Downloads/anaconda/lib/python2.7/site-packages/conda/api.py", line 24, in get_index
index = fetch_index(channel_urls, use_cache=use_cache, unknown=unknown)
File "/Users/madhavthaker/Downloads/anaconda/lib/python2.7/site-packages/conda/fetch.py", line 300, in fetch_index
repodatas = [(u, f.result()) for u, f in zip(urls, futures)]
File "/Users/madhavthaker/Downloads/anaconda/lib/python2.7/site-packages/concurrent/futures/_base.py", line 403, in result
return self.__get_result()
File "/Users/madhavthaker/Downloads/anaconda/lib/python2.7/site-packages/concurrent/futures/thread.py", line 55, in run
result = self.fn(*self.args, **self.kwargs)
File "/Users/madhavthaker/Downloads/anaconda/lib/python2.7/site-packages/conda/fetch.py", line 75, in func
res = f(*args, **kwargs)
File "/Users/madhavthaker/Downloads/anaconda/lib/python2.7/site-packages/conda/fetch.py", line 117, in fetch_repodata
timeout=(6.1, 60))
File "/Users/madhavthaker/Downloads/anaconda/lib/python2.7/site-packages/requests/sessions.py", line 480, in get
return self.request('GET', url, **kwargs)
File "/Users/madhavthaker/Downloads/anaconda/lib/python2.7/site-packages/requests/sessions.py", line 468, in request
resp = self.send(prep, **send_kwargs)
File "/Users/madhavthaker/Downloads/anaconda/lib/python2.7/site-packages/requests/sessions.py", line 576, in send
r = adapter.send(request, **kwargs)
File "/Users/madhavthaker/Downloads/anaconda/lib/python2.7/site-packages/requests/adapters.py", line 376, in send
timeout=timeout
File "/Users/madhavthaker/Downloads/anaconda/lib/python2.7/site-packages/requests/packages/urllib3/connectionpool.py", line 559, in urlopen
body=body, headers=headers)
File "/Users/madhavthaker/Downloads/anaconda/lib/python2.7/site-packages/requests/packages/urllib3/connectionpool.py", line 345, in _make_request
self._validate_conn(conn)
File "/Users/madhavthaker/Downloads/anaconda/lib/python2.7/site-packages/requests/packages/urllib3/connectionpool.py", line 784, in _validate_conn
conn.connect()
File "/Users/madhavthaker/Downloads/anaconda/lib/python2.7/site-packages/requests/packages/urllib3/connection.py", line 252, in connect
ssl_version=resolved_ssl_version)
File "/Users/madhavthaker/Downloads/anaconda/lib/python2.7/site-packages/requests/packages/urllib3/contrib/pyopenssl.py", line 296, in ssl_wrap_socket
cnx.set_tlsext_host_name(server_hostname)
File "/Users/madhavthaker/Downloads/anaconda/lib/python2.7/site-packages/OpenSSL/SSL.py", line 1232, in set_tlsext_host_name
raise TypeError("name must be a byte string")
TypeError: name must be a byte string
I'm done some googling and haven't been able to find a useful response. Any help would be much appreciated.
Your conda is using python 2.7, you may have to upgrade it to python 3.x
I say this because in the anaconda installation guide you linked, it shows a call to print with parentheses, which is python 3 syntax
We're using Buildout as a way to build our packages here, and we're consistently getting really frustrating timeouts when trying to download eggs by running bin/buildout:
We have no distributions for python-dateutil that satisfies 'python-dateutil>=1.5'.
While:
Installing python_section.
Getting distribution for 'python-dateutil>=1.5'.
An internal error occured due to a bug in either zc.buildout or in a
recipe being used:
Traceback (most recent call last):
File "/vagrant/eggs/zc.buildout-2.1.1-py2.7.egg/zc/buildout/buildout.py", line 1921, in main
getattr(buildout, command)(args)
File "/vagrant/eggs/zc.buildout-2.1.1-py2.7.egg/zc/buildout/buildout.py", line 602, in install
installed_files = self[part]._call(recipe.install)
File "/vagrant/eggs/zc.buildout-2.1.1-py2.7.egg/zc/buildout/buildout.py", line 1356, in _call
return f()
File "/vagrant/eggs/zc.recipe.egg-2.0.0-py2.7.egg/zc/recipe/egg/egg.py", line 126, in install
reqs, ws = self.working_set()
File "/vagrant/eggs/zc.recipe.egg-2.0.0-py2.7.egg/zc/recipe/egg/egg.py", line 84, in working_set
allow_hosts=self.allow_hosts)
File "/vagrant/eggs/zc.buildout-2.1.1-py2.7.egg/zc/buildout/easy_install.py", line 782, in install
return installer.install(specs, working_set)
File "/vagrant/eggs/zc.buildout-2.1.1-py2.7.egg/zc/buildout/easy_install.py", line 626, in install
for dist in self._get_dist(requirement, ws):
File "/vagrant/eggs/zc.buildout-2.1.1-py2.7.egg/zc/buildout/easy_install.py", line 448, in _get_dist
dist, avail = self._satisfied(requirement)
File "/vagrant/eggs/zc.buildout-2.1.1-py2.7.egg/zc/buildout/easy_install.py", line 204, in _satisfied
return None, self._obtain(req, source)
File "/vagrant/eggs/zc.buildout-2.1.1-py2.7.egg/zc/buildout/easy_install.py", line 372, in _obtain
if index.obtain(requirement) is None:
File "/usr/lib/python2.7/dist-packages/setuptools/package_index.py", line 340, in obtain
self.prescan(); self.find_packages(requirement)
File "/usr/lib/python2.7/dist-packages/setuptools/package_index.py", line 325, in find_packages
self.scan_url(self.index_url + requirement.unsafe_name+'/')
File "/usr/lib/python2.7/dist-packages/setuptools/package_index.py", line 668, in scan_url
self.process_url(url, True)
File "/usr/lib/python2.7/dist-packages/setuptools/package_index.py", line 223, in process_url
page = self.process_index(url, page)
File "/usr/lib/python2.7/dist-packages/setuptools/package_index.py", line 300, in process_index
self.scan_url(new_url)
File "/usr/lib/python2.7/dist-packages/setuptools/package_index.py", line 668, in scan_url
self.process_url(url, True)
File "/usr/lib/python2.7/dist-packages/setuptools/package_index.py", line 201, in process_url
f = self.open_url(url, "Download error on %s: %%s -- Some packages may not be found!" % url)
File "/usr/lib/python2.7/dist-packages/setuptools/package_index.py", line 610, in open_url
return open_with_auth(url)
File "/usr/lib/python2.7/dist-packages/setuptools/package_index.py", line 753, in _socket_timeout
return func(*args, **kwargs)
File "/usr/lib/python2.7/dist-packages/setuptools/package_index.py", line 779, in open_with_auth
fp = urllib2.urlopen(request)
File "/usr/lib/python2.7/urllib2.py", line 126, in urlopen
return _opener.open(url, data, timeout)
File "/usr/lib/python2.7/urllib2.py", line 400, in open
response = self._open(req, data)
File "/usr/lib/python2.7/urllib2.py", line 418, in _open
'_open', req)
File "/usr/lib/python2.7/urllib2.py", line 378, in _call_chain
result = func(*args)
File "/usr/lib/python2.7/urllib2.py", line 1207, in http_open
return self.do_open(httplib.HTTPConnection, req)
File "/usr/lib/python2.7/urllib2.py", line 1180, in do_open
r = h.getresponse(buffering=True)
File "/usr/lib/python2.7/httplib.py", line 1030, in getresponse
response.begin()
File "/usr/lib/python2.7/httplib.py", line 407, in begin
version, status, reason = self._read_status()
File "/usr/lib/python2.7/httplib.py", line 365, in _read_status
line = self.fp.readline()
File "/usr/lib/python2.7/socket.py", line 447, in readline
data = self._sock.recv(self._rbufsize)
timeout: timed out
Is there a way to prevent these timeouts? why do they happen?
The index server failed to respond, the connection timed out. This happens when PyPI is down, for example. By default, the index server is http://pypi.python.org/simple, but you can set your own with the [buildout] option index.
We use an egg proxy (set up with collective.eggproxy) to create a local cache automatically, to avoid such problems. Our buildouts use that server as the index, and the proxy will download any missing packages from PyPI and cache them locally. So, for any packages that we have used in the past, we can continue to serve even when PyPI is (temporarily) down.
I'm trying to open a secure (https) website using mechanize library in Python. When I try to access the website, the server closes the connection and exception BadStatusLine is raised.
I have tried to modify the headers using the addheaders property, but no response.
import mechanize
br = mechanize.Browser()
print 'opening page ...'
resp = br.open('https://onlineservices.tin.nsdl.com/etaxnew/tdsnontds.jsp') #this one works fine
print 'ok'
print 'opening page 2 ...'
resp = br.open('https://incometaxindiaefiling.gov.in/portal/index.do') #exception raised
print 'ok'
Exception:
Traceback (most recent call last): File
pydev_imports.execfile(file, globals, locals) #execute the script File "Z:\pyTax\app_test.py", line 22, in
resp=br.open('https://incometaxindiaefiling.gov.in/portal/index.do')
File "build\bdist.win32\egg\mechanize_mechanize.py", line 203, in
open File "build\bdist.win32\egg\mechanize_mechanize.py", line 230,
in _mech_open File "build\bdist.win32\egg\mechanize_opener.py",
line 188, in open File "build\bdist.win32\egg\mechanize_http.py",
line 316, in http_request File
"build\bdist.win32\egg\mechanize_http.py", line 242, in read File
"build\bdist.win32\egg\mechanize_mechanize.py", line 203, in open
File "build\bdist.win32\egg\mechanize_mechanize.py", line 230, in
_mech_open File "build\bdist.win32\egg\mechanize_opener.py", line 193, in open File
"build\bdist.win32\egg\mechanize_urllib2_fork.py", line 344, in _open
File "build\bdist.win32\egg\mechanize_urllib2_fork.py", line 332, in
_call_chain File "build\bdist.win32\egg\mechanize_urllib2_fork.py", line 1170, in https_open File
"build\bdist.win32\egg\mechanize_urllib2_fork.py", line 1116, in
do_open File "D:\Python27\lib\httplib.py", line 1031, in getresponse
response.begin() File "D:\Python27\lib\httplib.py", line 407, in begin
version, status, reason = self._read_status() File "D:\Python27\lib\httplib.py", line 371, in _read_status
raise BadStatusLine(line) httplib.BadStatusLine: ''
httplib.BadStatusLineis s a subclass of HTTPException. Raised if a server responds with a HTTP status code that we don’t understand. That's whats causing your problem. I am not entirely sure about the fixup though, as your code works fine on my computer.