While scanning bunch of websites using the below function I received an error (see below). Would there be any except step I should add to the function below to handle such error or there is something wrong with my try / except part in my function?
function:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import re
import io
import requests.exceptions
import time
import asyncio
from concurrent.futures import ProcessPoolExecutor, as_completed
import io
df = pd.read_csv('myScan.csv')
urls = df.T.values.tolist()[2]
results = {}
status = {}
async def scrape(url):
try:
r = requests.get(url, timeout=(3, 6))
r.raise_for_status()
soup = BeautifulSoup(r.content, 'html.parser')
if soup.body:
data = {
"coming soon": soup.body.findAll(text = re.compile("coming soon", re.I)),
"Opening Soon": soup.body.findAll(text = re.compile("Opening Soon", re.I)),
"Under Construction": soup.body.findAll(text = re.compile("Under Construction", re.I)),
"Currently Unavailable": soup.body.findAll(text = re.compile("Currently Unavailable", re.I)),
"button_2": soup.findAll(text = re.compile('button_2.js'))}
results[url] = data
except (requests.exceptions.ConnectionError, requests.exceptions.Timeout, requests.exceptions.MissingSchema):
status[url] = "Connection Error"
except (requests.exceptions.HTTPError):
status[url] = "Http Error"
except (requests.exceptions.TooManyRedirects):
status[url] = "Redirects"
except (requests.exceptions.RequestException) as err:
status[url] = "Fatal Error: " + err + url
else:
status[url] = "OK"
async def main():
await asyncio.wait([scrape(url) for url in urls])
loop = asyncio.get_event_loop()
loop.run_until_complete(main())
loop.close()
comingList= []
openingList = []
underList = []
button_2 = []
statusList = []
for url in urls:
if(not results.get(url)):
statusList.append(status.get(url))
comingList.append("-")
openingList.append("-")
underList.append("-")
button_2.append("-")
else:
statusList.append(status.get(url))
comingList.append("x" if len(results[url].get("coming soon")) > 0 else "-")
openingList.append("x" if len(results[url].get("Opening Soon")) > 0 else "-")
underList.append("x" if len(results[url].get("Under Construction")) > 0 else "-")
button_2.append("x" if len(results[url].get("button_2")) > 0 else "-")
df["comingSoon"] = pd.DataFrame(comingList, columns=['comingSoon'])
df["openingSoon"] = pd.DataFrame(openingList, columns=['openingSoon'])
df["underConstruction"] = pd.DataFrame(underList, columns=['underConstruction'])
df["button_2"] = pd.DataFrame(button_2, columns=['button_2'])
df['status'] = pd.DataFrame(statusList, columns=['Status'])
df.to_csv('myScanCompleted.csv', index=False)
Error:
Task exception was never retrieved
future: <Task finished name='Task-43943' coro=<scrape() done, defined at crawler.py:69> exception=TypeError('can only concatenate str (not "ChunkedEncodingError") to str')>
Traceback (most recent call last):
File "/usr/local/lib/python3.9/site-packages/urllib3/response.py", line 697, in _update_chunk_length
self.chunk_left = int(line, 16)
ValueError: invalid literal for int() with base 16: b''
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/local/lib/python3.9/site-packages/urllib3/response.py", line 438, in _error_catcher
yield
File "/usr/local/lib/python3.9/site-packages/urllib3/response.py", line 764, in read_chunked
self._update_chunk_length()
File "/usr/local/lib/python3.9/site-packages/urllib3/response.py", line 701, in _update_chunk_length
raise InvalidChunkLength(self, line)
urllib3.exceptions.InvalidChunkLength: InvalidChunkLength(got length b'', 0 bytes read)
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/local/lib/python3.9/site-packages/requests/models.py", line 753, in generate
for chunk in self.raw.stream(chunk_size, decode_content=True):
File "/usr/local/lib/python3.9/site-packages/urllib3/response.py", line 572, in stream
for line in self.read_chunked(amt, decode_content=decode_content):
File "/usr/local/lib/python3.9/site-packages/urllib3/response.py", line 793, in read_chunked
self._original_response.close()
File "/usr/local/Cellar/python#3.9/3.9.0_5/Frameworks/Python.framework/Versions/3.9/lib/python3.9/contextlib.py", line 135, in __exit__
self.gen.throw(type, value, traceback)
File "/usr/local/lib/python3.9/site-packages/urllib3/response.py", line 455, in _error_catcher
raise ProtocolError("Connection broken: %r" % e, e)
urllib3.exceptions.ProtocolError: ("Connection broken: InvalidChunkLength(got length b'', 0 bytes read)", InvalidChunkLength(got length b'', 0 bytes read))
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "crawler.py", line 71, in scrape
r = requests.get(url, timeout=(3, 6))
File "/usr/local/lib/python3.9/site-packages/requests/api.py", line 76, in get
return request('get', url, params=params, **kwargs)
File "/usr/local/lib/python3.9/site-packages/requests/api.py", line 61, in request
return session.request(method=method, url=url, **kwargs)
File "/usr/local/lib/python3.9/site-packages/requests/sessions.py", line 542, in request
resp = self.send(prep, **send_kwargs)
File "/usr/local/lib/python3.9/site-packages/requests/sessions.py", line 697, in send
r.content
File "/usr/local/lib/python3.9/site-packages/requests/models.py", line 831, in content
self._content = b''.join(self.iter_content(CONTENT_CHUNK_SIZE)) or b''
File "/usr/local/lib/python3.9/site-packages/requests/models.py", line 756, in generate
raise ChunkedEncodingError(e)
requests.exceptions.ChunkedEncodingError: ("Connection broken: InvalidChunkLength(got length b'', 0 bytes read)", InvalidChunkLength(got length b'', 0 bytes read))
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "crawler.py", line 89, in scrape
status[url] = "Fatal Error: " + err + url
TypeError: can only concatenate str (not "ChunkedEncodingError") to str
I encountered the same error. Can't speak as to why, but switching from the html.parser parser to lxml fixed it for me.
Might be useful: difference between parsers
Related
Here is a class I wrote to get billboard hot 100 songs by date.
The class uses requests to get website html text
It then uses beautifulsoup to parse the html
The parsing works well
the problem is the intermittent connection errors
import json
import time
from bs4 import BeautifulSoup
import requests
import datetime as DT
class BillBoardScraper():
def __init__(self) -> None:
self.top_100 = None
self.scraped_chart = None
def _scrape_chart(self, date):
url = 'https://www.billboard.com/charts/hot-100'
headers = {
"User-Agent": "Mozilla/5.0"
}
r = requests.get(
f'{url}/{date}', headers=headers)
bill_board_100_soup = BeautifulSoup(r.text, 'html.parser')
r = None
bill_board_100_results_soup = bill_board_100_soup.find_all(
"div", "o-chart-results-list-row-container")
return bill_board_100_results_soup
def _get_song_and_artist(self, idx):
for result_item in self.scraped_chart[idx].find_all('li'):
segment_struct = [tag.name for tag in result_item if tag.name]
if segment_struct == ['h3', 'span']:
song_and_artist = []
for tag in result_item:
if tag.string.strip():
song_and_artist.append(tag.string.strip())
return song_and_artist
def run_parser_and_archive_data(self, date):
self.top_100 = {}
self.scraped_chart = self._scrape_chart(date)
for i in range(0, 100):
song, artist = self._get_song_and_artist(i)
self.top_100[i+1] = {"track": song, "artist": artist, "date": date}
json_string = json.dumps(self.top_100)
with open(f'data_billboard/billboard_hot100_{date}.json', 'w') as outfile:
json.dump(json_string, outfile)
date = DT.date(2010, 3, 19)
n_weeks = 520
c_week = 1
while c_week <= n_weeks:
print(str(date))
top100 = BillBoardScraper()
top100.run_parser_and_archive_data(str(date))
date = date - DT.timedelta(days=7)
time.sleep(10)
Sporadically I receive the following error. Why does this happen? What can I do to mitigate this? Any feedback is appreciated
Traceback (most recent call last):
File "C:\Users\{USER_NAME}\AppData\Local\Programs\Python\Python37\lib\site-packages\urllib3\connectionpool.py", line 672, in urlopen
chunked=chunked,
File "C:\Users\{USER_NAME}\AppData\Local\Programs\Python\Python37\lib\site-packages\urllib3\connectionpool.py", line 421, in _make_request
six.raise_from(e, None)
File "<string>", line 3, in raise_from
File "C:\Users\{USER_NAME}\AppData\Local\Programs\Python\Python37\lib\site-packages\urllib3\connectionpool.py", line 416, in _make_request
httplib_response = conn.getresponse()
File "C:\Users\{USER_NAME}\AppData\Local\Programs\Python\Python37\lib\http\client.py", line 1344, in getresponse
response.begin()
File "C:\Users\{USER_NAME}\AppData\Local\Programs\Python\Python37\lib\http\client.py", line 306, in begin
version, status, reason = self._read_status()
File "C:\Users\{USER_NAME}\AppData\Local\Programs\Python\Python37\lib\http\client.py", line 275, in _read_status
raise RemoteDisconnected("Remote end closed connection without"
http.client.RemoteDisconnected: Remote end closed connection without response
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\Users\{USER_NAME}\AppData\Local\Programs\Python\Python37\lib\site-packages\requests\adapters.py", line 449, in send
timeout=timeout
File "C:\Users\{USER_NAME}\AppData\Local\Programs\Python\Python37\lib\site-packages\urllib3\connectionpool.py", line 720, in urlopen
method, url, error=e, _pool=self, _stacktrace=sys.exc_info()[2]
File "C:\Users\{USER_NAME}\AppData\Local\Programs\Python\Python37\lib\site-packages\urllib3\util\retry.py", line 400, in increment
raise six.reraise(type(error), error, _stacktrace)
File "C:\Users\{USER_NAME}\AppData\Local\Programs\Python\Python37\lib\site-packages\urllib3\packages\six.py", line 734, in reraise
raise value.with_traceback(tb)
File "C:\Users\{USER_NAME}\AppData\Local\Programs\Python\Python37\lib\site-packages\urllib3\connectionpool.py", line 672, in urlopen
chunked=chunked,
File "C:\Users\{USER_NAME}\AppData\Local\Programs\Python\Python37\lib\site-packages\urllib3\connectionpool.py", line 421, in _make_request
six.raise_from(e, None)
File "<string>", line 3, in raise_from
File "C:\Users\{USER_NAME}\AppData\Local\Programs\Python\Python37\lib\site-packages\urllib3\connectionpool.py", line 416, in _make_request
httplib_response = conn.getresponse()
File "C:\Users\{USER_NAME}\AppData\Local\Programs\Python\Python37\lib\http\client.py", line 1344, in getresponse
response.begin()
File "C:\Users\{USER_NAME}\AppData\Local\Programs\Python\Python37\lib\http\client.py", line 306, in begin
version, status, reason = self._read_status()
File "C:\Users\{USER_NAME}\AppData\Local\Programs\Python\Python37\lib\http\client.py", line 275, in _read_status
raise RemoteDisconnected("Remote end closed connection without"
urllib3.exceptions.ProtocolError: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "d:\airflow\plugins\api_billboard100.py", line 55, in <module>
top100.run_parser_and_archive_data(str(date))
File "d:\airflow\plugins\api_billboard100.py", line 39, in run_parser_and_archive_data
self.scraped_chart = self._scrape_chart(date)
File "d:\airflow\plugins\api_billboard100.py", line 20, in _scrape_chart
f'{url}/{date}', headers=headers)
File "C:\Users\{USER_NAME}\AppData\Local\Programs\Python\Python37\lib\site-packages\requests\api.py", line 75, in get
return request('get', url, params=params, **kwargs)
File "C:\Users\{USER_NAME}\AppData\Local\Programs\Python\Python37\lib\site-packages\requests\api.py", line 60, in request
return session.request(method=method, url=url, **kwargs)
File "C:\Users\{USER_NAME}\AppData\Local\Programs\Python\Python37\lib\site-packages\requests\sessions.py", line 533, in request
resp = self.send(prep, **send_kwargs)
File "C:\Users\{USER_NAME}\AppData\Local\Programs\Python\Python37\lib\site-packages\requests\sessions.py", line 646, in send
r = adapter.send(request, **kwargs)
File "C:\Users\{USER_NAME}\AppData\Local\Programs\Python\Python37\lib\site-packages\requests\adapters.py", line 498, in send
raise ConnectionError(err, request=request)
requests.exceptions.ConnectionError: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
I got a Flask app calling different modules. One of them eventually raises a NoPrice exception but the except NoPrice doesn't catch it as a NoPrice but except Exceptioncatches it...
class NoPrice(Exception):
pass
somewhere in a module
raise NoPrice('error')
in my flask app
try:
raise NoPrice('main')
except NoPrice as e:
print('main')
except Exception as e:
print('main2')
try:
resp = alicia.create_predictions(select)
except NoPrice as e:
print('main3')
except Exception as e:
print('main4')
print(repr(e))
print(e)
print(traceback.format_exc())
The output of this is
main
main4
NoPrice('No price found for material_reference 0148054b-e681-4fb6-9722-946f3cfa8529 the weight 1.7471266917293233', 'occurred at index 0')
('No price found for material_reference 0148054b-e681-4fb6-9722-946f3cfa8529 the weight 1.7471266917293233', 'occurred at index 0')
Traceback (most recent call last):
File "c/main.py", line 71, in create_predictions_api
resp = alicia.create_predictions(select)
File "/absolutepath/app/alicia.py", line 703, in create_predictions
self.set_machines_and_format()
File "/absolutepath/app/alicia.py", line 574, in set_machines_and_format
x["is_screenprinting_option"]), axis = 1)
File "/absolutepath/venv/lib/python3.7/site-packages/pandas/core/frame.py", line 6913, in apply
return op.get_result()
File "/absolutepath/venv/lib/python3.7/site-packages/pandas/core/apply.py", line 186, in get_result
return self.apply_standard()
File "/absolutepath/venv/lib/python3.7/site-packages/pandas/core/apply.py", line 292, in apply_standard
self.apply_series_generator()
File "/absolutepath/venv/lib/python3.7/site-packages/pandas/core/apply.py", line 321, in apply_series_generator
results[i] = self.f(v)
File "/absolutepath/app/alicia.py", line 574, in <lambda>
x["is_screenprinting_option"]), axis = 1)
File "/absolutepath/app/alicia.py", line 359, in predict_price_relying_on_format
output = Jerome(self.product, self.hipe_client_config).run(self.quote_input)
File "/absolutepath/app/jerome/jerome.py", line 235, in run
pliant = pliant_obj.price(quote_input, self.material_reference)
File "/absolutepath/app/jerome/options_construction/pliant.py", line 66, in price
quote_input['matter_margin_percentage'])
File "/absolutepath/app/jerome/options_construction/pliant.py", line 52, in pliant
raise NoPrice(f"No price found for material_reference {material_reference.id} the weight {x1}")
exceptions.NoPrice: ('No price found for material_reference 0148054b-e681-4fb6-9722-946f3cfa8529 the weight 1.7471266917293233', 'occurred at index 0')
Why doesn't the except NoPrice catch my exception ?
Shouldn't the ouput be
main
main3
My goal is ultimately to handle only my NoPrice exception
my_function must not be raising the NoPrice exception, and therefore it isn't caught in the 'main 3' section.
So basically this all stems from a previous question I had, so I'll post that question & my edit in its entirely below:
So I have a script I've been working with for a few days trying to get a list of emails from a csv I have, but now I've run into this roadblock. Here is the code:
import sys
try:
import urllib.request as urllib2
except ImportError:
import urllib2
import re
import csv
list1 = []
list2 = []
list3 = []
def addList():
with open('file.csv', 'rt') as f:
reader = csv.reader(f)
for row in reader:
for s in row:
list2.append(s)
def getAddress(url):
http = "http://"
https = "https://"
if http in url:
return url
elif https in url:
return url
else:
url = "http://" + url
return url
def parseAddress(url):
global list3
try:
website = urllib2.urlopen(getAddress(url))
html = website.read()
addys = re.findall('''[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*#(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?''', html, flags=re.IGNORECASE)
global list1
list1.append(addys)
except urllib2.HTTPError as err:
print ("Cannot retrieve URL: HTTP Error Code: "), err.code
list3.append(url)
except urllib2.URLError as err:
print ("Cannot retrive URL: ") + err.reason[1]
list3.append(url)
def execute():
global list2
addList()
totalNum = len(list2)
atNum = 1
for s in list2:
parseAddress(s)
print ("Processing ") + str(atNum) + (" out of ") + str(totalNum)
atNum = atNum + 1
print ("Completed. Emails parsed: ") + str(len(list1)) + "."
### MAIN
def main():
global list2
execute()
global list1
myFile = open("finishedFile.csv", "w+")
wr = csv.writer(myFile, quoting=csv.QUOTE_ALL)
for s in list1:
wr.writerow(s)
myFile.close
global list3
failFile = open("failedSites.csv", "w+")
write = csv.writer(failFile, quoting=csv.QUOTE_ALL)
for j in list3:
write.writerow(j)
failFile.close
main()
and when I run it I get this error:
Traceback (most recent call last):
File "pagescanner.py", line 85, in <module>
main()
File "pagescanner.py", line 71, in main
execute()
File "pagescanner.py", line 60, in execute
parseAddress(s)
File "pagescanner.py", line 42, in parseAddress
addys = re.findall('''[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*#(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?''', html, flags=re.IGNORECASE)
File "/usr/lib/python3.5/re.py", line 213, in findall
return _compile(pattern, flags).findall(string)
TypeError: cannot use a string pattern on a bytes-like object
So I've figured out that I need to figure out how to encode the html string into bytes for the encoding, and Tyler's answer below helped me do so but now I'm getting this error:
Traceback (most recent call last):
File "/usr/lib/python3.5/urllib/request.py", line 1254, in do_open
h.request(req.get_method(), req.selector, req.data, headers)
File "/usr/lib/python3.5/http/client.py", line 1107, in request
self._send_request(method, url, body, headers)
File "/usr/lib/python3.5/http/client.py", line 1152, in _send_request
self.endheaders(body)
File "/usr/lib/python3.5/http/client.py", line 1103, in endheaders
self._send_output(message_body)
File "/usr/lib/python3.5/http/client.py", line 934, in _send_output
self.send(msg)
File "/usr/lib/python3.5/http/client.py", line 877, in send
self.connect()
File "/usr/lib/python3.5/http/client.py", line 849, in connect
(self.host,self.port), self.timeout, self.source_address)
File "/usr/lib/python3.5/socket.py", line 712, in create_connection
raise err
File "/usr/lib/python3.5/socket.py", line 703, in create_connection
sock.connect(sa)
OSError: [Errno 22] Invalid argument
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "pagescanner.py", line 39, in parseAddress
website = urllib2.urlopen(getAddress(url))
File "/usr/lib/python3.5/urllib/request.py", line 163, in urlopen
return opener.open(url, data, timeout)
File "/usr/lib/python3.5/urllib/request.py", line 466, in open
response = self._open(req, data)
File "/usr/lib/python3.5/urllib/request.py", line 484, in _open
'_open', req)
File "/usr/lib/python3.5/urllib/request.py", line 444, in _call_chain
result = func(*args)
File "/usr/lib/python3.5/urllib/request.py", line 1282, in http_open
return self.do_open(http.client.HTTPConnection, req)
File "/usr/lib/python3.5/urllib/request.py", line 1256, in do_open
raise URLError(err)
urllib.error.URLError: <urlopen error [Errno 22] Invalid argument>
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "pagescanner.py", line 85, in <module>
main()
File "pagescanner.py", line 71, in main
execute()
File "pagescanner.py", line 60, in execute
parseAddress(s)
File "pagescanner.py", line 51, in parseAddress
print ("Cannot retrive URL: ") + err.reason[1]
TypeError: 'OSError' object is not subscriptable
Does this mean that one of the urls from the list isn't a valid url? I thought I had finally removed all fo the bad urls from my csv file but I may need to take another look
I'm trying to write a small python 3 utility script that checks to see if a file exists on my server.
So I have the code below that has a big array of string values that I pass to a simple function that returns the url and the response code.
However, when I run it I get all these errors I don't even know where to start:
$ python ReturnPath.py
Traceback (most recent call last):
File "ReturnPath.py", line 86, in <module>
checkResponse(u)
File "ReturnPath.py", line 5, in checkResponse
code = urllib.request.urlopen(url).getcode()
File "C:\Program Files\Python37\lib\urllib\request.py", line 222, in urlopen
return opener.open(url, data, timeout)
File "C:\Program Files\Python37\lib\urllib\request.py", line 510, in open
req = Request(fullurl, data)
File "C:\Program Files\Python37\lib\urllib\request.py", line 328, in __init__
self.full_url = url
File "C:\Program Files\Python37\lib\urllib\request.py", line 354, in full_url
self._parse()
File "C:\Program Files\Python37\lib\urllib\request.py", line 383, in _parse
raise ValueError("unknown url type: %r" % self.full_url)
ValueError: unknown url type: '"https://myserver.org/Media/CharacterAvatarImages/ae275ecb-183e-4e8d-8465-9d6d36c1323f.jpg"'
Here is my code:
import urllib.request
def checkResponse(url):
code = urllib.request.urlopen(url).getcode()
print(url + " = " + code)
return
arrCases = []
arrCases.extend([
"https://myserver.org/Media/CharacterAvatarImages/ae275ecb-183e-4e8d-8465-9d6d36c1323f.jpg",
"https://myserver.org/Media/CharacterAvatarImages/3ea92fa3-1ef0-4358-b38d-bb04e653aa53.jpg",
"https://myserver.org/Media/CharacterAvatarImages/7958a0e3-171b-46b5-875e-970368389bdf.jpg",
"https://myserver.org/Media/CharacterAvatarImages/e9a6cb00-6811-4b47-9aac-88480578dd44.jpg",
"https://myserver.org/Media/CharacterAvatarImages/73df88c3-b829-4519-9523-2bbe1f2c8549.jpg",
"https://myserver.org/Media/CharacterAvatarImages/61aa614b-5c95-487c-b4e3-783231b43677.jpg",
"https://myserver.org/Media/CharacterAvatarImages/8be7811f-18dc-4a81-a557-8b81605e3452.jpg",
"https://myserver.org/Media/CharacterAvatarImages/56539acb-2b1b-4410-a4bc-ac2eb0dc00fa.jpg",
"https://myserver.org/Media/CharacterAvatarImages/8bcf93fc-b435-4fd4-9c82-4aba78c58529.jpg",
])
for u in arrCases:
checkResponse(u)
What am I doing wrong?
You have to catch errors from broken URLs. I also increased speed through multiprocessing.Pool.
import urllib.request
from urllib.error import HTTPError, URLError
import multiprocessing
def checkResponse(url):
try:
code = urllib.request.urlopen(url, timeout=1).getcode()
except (HTTPError, URLError) as error:
print(url, " = ", error)
else:
print(url, " = ", code)
return
arrCases = []
arrCases.extend([
"https://i.stack.imgur.com/DsNOB.jpg",
"https://myserver.org/Media/CharacterAvatarImages/ae275ecb-183e-4e8d-8465-9d6d36c1323f.jpg",
"https://myserver.org/Media/CharacterAvatarImages/3ea92fa3-1ef0-4358-b38d-bb04e653aa53.jpg",
"https://myserver.org/Media/CharacterAvatarImages/7958a0e3-171b-46b5-875e-970368389bdf.jpg",
"https://myserver.org/Media/CharacterAvatarImages/e9a6cb00-6811-4b47-9aac-88480578dd44.jpg",
"https://myserver.org/Media/CharacterAvatarImages/73df88c3-b829-4519-9523-2bbe1f2c8549.jpg",
"https://myserver.org/Media/CharacterAvatarImages/61aa614b-5c95-487c-b4e3-783231b43677.jpg",
"https://myserver.org/Media/CharacterAvatarImages/8be7811f-18dc-4a81-a557-8b81605e3452.jpg",
"https://myserver.org/Media/CharacterAvatarImages/56539acb-2b1b-4410-a4bc-ac2eb0dc00fa.jpg",
"https://myserver.org/Media/CharacterAvatarImages/8bcf93fc-b435-4fd4-9c82-4aba78c58529.jpg",
])
with multiprocessing.Pool(processes=4) as pool:
pool.map(checkResponse, arrCases)
This error occurs on pynsq 0.4.2
STACKTRACE
2013-07-30 15:03:43,205 ERROR [ip-10-114-195-89:4150:nsq_msg_handler] failed to handle_message()
Traceback (most recent call last):
File "/usr/local/lib/python2.7/dist-packages/nsq/Reader.py", line 367, in _data_callback
self._handle_message(conn, message)
File "/usr/local/lib/python2.7/dist-packages/nsq/Reader.py", line 291, in _handle_message
return message.requeue()
File "/usr/local/lib/python2.7/dist-packages/nsq/nsq.py", line 47, in requeue
self.respond(REQ, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/nsq/Reader.py", line 211, in _message_responder
self._requeue(conn, message, time_ms=kwargs.get('time_ms', -1))
File "/usr/local/lib/python2.7/dist-packages/nsq/Reader.py", line 222, in _requeue
return self.finish(conn, message)
AttributeError: 'Reader' object has no attribute 'finish'
CODE
def connect_nsq(self):
r = nsq.Reader(message_handler=self.nsq_msg_handler, lookupd_http_addresses=["127.0.0.1:4161"], topic="test_topic", channel="test_channel", max_in_flight=500)
nsq.run()
# callback
def nsq_msg_handler(self, message):
try:
before_ts = get_utc_now_ts()
json_data = json.loads(message.body)
my_data = json_data["key1"]
my_data = json_data["key2"]
my_data = json_data["key3"]
after_ts = get_utc_now_ts()
delta = after_ts - before_ts
logger.debug("took %f seconds for json_data _id: %s" % (delta, json_data["_id"]))
except Exception as reason:
print reason, traceback.format_exc()
return False
return true
This issue was fixed in the latest release of pynsq which is 0.5.0
Here's the bug report: https://github.com/bitly/pynsq/issues/44