Exception not caught in multiprocessing - python

I'm using multiprocessing module for files processing in parallel, which works perfectly fine almost every time.
Also I've written that in try , except block to catch any exception.
I've come across a situation where except block doesn't catch the exception.
Since the code is huge I'm just putting relevant block which is giving problem.
def reader(que, ip, start, end, filename):
""" Reader function checks each line of the file
and if the line contains any of the ip addresses which are
being scanned, then it writes to its buffer.
If the line field doesn't match date string it skips the line.
"""
logging.info("Processing : %s" % os.path.basename(filename))
ip_pat = re.compile("(\d+\.\d+\.\d+\.\d+\:\d+)")
chunk = 10000000 # taking chunk of 10MB data
buff = ""
with bz2.BZ2File(filename,"rb", chunk) as fh: # open the compressed file
for line in fh:
output = []
fields = line.split()
try:
ts = fields[1].strip() + "/" +fields[0]+"/"+fields[3].split("-")[0]+" "+fields[2]
times = da.datetime.strptime(ts,"%d/%b/%Y %H:%M:%S")
if times < start:
continue
if times > end:
break
ips = re.findall(ip_pat,line)
if len(ips) < 3:
continue
if ips[0].split(":")[0] == ip:
output.append(times.strftime("%d/%m/%Y %H:%M:%S"))
status = "SESSION_OPEN" if "SESSION_OPEN" in line or "CREATE" in line else "SESSION_CLOSE"
protocol = "TCP" if "TCP" in line else "UDP"
output.append(status)
output.append(protocol)
ips[1], ips[2] = ips[2], ips[1]
output.extend(ips)
res = "|".join(output)
buff += res + "\n"
except IndexError, ValueError:
continue
logging.info("Processed : %s of size [ %d ]" % (os.path.basename(filename), os.path.getsize(filename)))
if buff:
que.put((ip,buff))
return buff
And this is what is received as error.
File "/usr/lib64/python2.7/multiprocessing/pool.py", line 554, in get
raise self._value
ValueError: time data '2/Dec/20 10:59:59' does not match format '%d/%b/%Y %H:%M:%S'
What I don't understand is why the exception is not caught, I've mentioned ValueError in except block.
What's the best way to get through this problem.

Provide the multiple exceptions as a tuple:
except (IndexError, ValueError):
continue
The relevant doc is https://docs.python.org/2/tutorial/errors.html#handling-exceptions
Snippet from the page:
Note that the parentheses around this tuple are required, because except ValueError, e: was the syntax used for what is normally written as except ValueError as e: in modern Python (described below). The old syntax is still supported for backwards compatibility. This means except RuntimeError, TypeError is not equivalent to except (RuntimeError, TypeError): but to except RuntimeError as TypeError: which is not what you want.

Related

How to Continue Loop after exception

from textblob import TextBlob
line = """ अशा किंमतीवर खरोखरच चांगला कुकर आहे ok are you sure how are you आधुनिक तंत्रज्ञानासह हे सुलभ आणि सुरक्षित are you आहे இது அற்புதமான தரம் மற்றும் சூப்பர் தயாரிப்பு."""
def split_line(in_line):
line_sp = line.split(" ")
line_two = [" ".join(line_sp[i:i + 3]) for i in range(0, len(line_sp), 3)]
return line_two
#print(split_line(line))
try:
for i in split_line(line):
blob = TextBlob(i)
print (blob.translate(to = 'en'))
except:
print ("same language found not translated")
This is language Translation code some times text blob throw error so I used try/except block so my code stops when print error message but I want to continue this loop after catching exception
Just put the try except block inside the for (if the error is raised by TextBlob(i)):
for i in split_line(line):
try:
blob = TextBlob(i)
print (blob.translate(to = 'en'))
except:
print ("same language found not translated")
This will make the for run until it ends even if an error has been raised in it.

Indentation Error Python Not Working

Im trying to run my code and there is an
File "C:/trcrt/trcrt.py", line 42
def checkInternet():
^
IndentationError: unexpected unindent
The code supposed to check for the traceroute to a website... i know... its not very smart but its what i was told to do
Ive checked the code using pep8 and eveything is seems to be fine...
'''
Developer: Roei Edri
File name: trcrt.py
Date: 24.11.17
Version: 1.1.0
Description: Get an url as an input and prints the traceroute to it.
'''
import sys
import urllib2
i, o, e = sys.stdin, sys.stdout, sys.stderr
from scapy.all import *
from scapy.layers.inet import *
sys.stdin, sys.stdout, sys.stderr = i, o, e
def trcrt(dst):
"""
Check for the route for the given destination
:param dst: Final destination, in a form of a website.
:type dst: str
"""
try:
pckt = IP(dst=dst)/ICMP() # Creates the
# packet
ip = [p for p in pckt.dst] # Gets the ip
print "Tracerouting for {0} : {1}".format(dst, ip[0])
for ttl in range(1, 40):
pckt = IP(ttl=ttl, dst=dst)/ICMP()
timeBefore = time.time()
reply = sr1(pckt, verbose=0, timeout=5)
timeAfter = time.time()
timeForReply = (timeAfter - timeBefore)*1000
if reply is not None:
print "{0} : {1} ; Time for reply: {2}".format(ttl,
reply.src, timeForReply)
if reply.type == 0:
print "Tracerout Completed"
break
else:
print "{0} ... Request Time Out".format(ttl)
def checkInternet():
"""
Checks if there is an internet connection
:return: True if there is an internet connection
"""
try:
urllib2.urlopen('http://45.33.21.159', timeout=1)
return True
except urllib2.URLError as IntError:
return False
Thanks for any help...
Btw pep8 says
"module level import not at top of file"
for lines 12,13
The try block is missing its except clause.
try:
pckt = IP(dst=dst)/ICMP() # Creates the
# packet
ip = [p for p in pckt.dst] # Gets the ip
print "Tracerouting for {0} : {1}".format(dst, ip[0])
for ttl in range(1, 40):
pckt = IP(ttl=ttl, dst=dst)/ICMP()
timeBefore = time.time()
reply = sr1(pckt, verbose=0, timeout=5)
timeAfter = time.time()
timeForReply = (timeAfter - timeBefore)*1000
if reply is not None:
print "{0} : {1} ; Time for reply: {2}".format(ttl,
reply.src, timeForReply)
if reply.type == 0:
print "Tracerout Completed"
break
else:
print "{0} ... Request Time Out".format(ttl)
except: # Here : Add the exception you wish to catch
pass # handle this exception appropriately
As a general rule, do not use catch all except clauses, and do not pass on a caught exception, it lets it fail silently.
If this is your full code, there are two things to check:
1) Have you mixed tabs and spaces? Make sure that all tabs are converted to spaces (I recommend 4 spaces per tab) for indentation. A good IDE will do this for you.
2) The try: in trcrt(dst) does not hava a matching except block.
PEP8 will by the way also tell you, that function names should be lowercase:
check_internet instead of checkInternet, ...
I will give you the same recommendation, that I give to everyone working with me: Start using an IDE that marks PEP8 and other errors for you, there is multiple around. It helps spotting those errors a lot and trains you to write clean Python code that is easily readable and (if you put comments in it) also reausable and understandable a few years later.

Compare lines from streaming API - Python

I am lost here, I have an API that streams prices, I am trying to compare the second to last price with the last price, for instance, if x > y then do something. I cannot figure out how to compare the last to the second to the last price when the prices are streaming. Could someone please shed some light on how this may work? Thanks in advance!
my stream:
def stream_to_queue(self):
response = self.connect_to_stream()
if response.status_code != 200:
return
for line in response.iter_lines(1):
if line:
try:
msg = json.loads(line)
except Exception as e:
print "Caught exception when converting message into json\n" + str(e)
return
if msg.has_key("instrument") or msg.has_key("tick"):
price = msg["tick"]["ask"]
print price
This prints a price like 1.23004 and then continues to loop and print more prices. I have tried to save the current price in a variable outside the loop and then reference it when a new price comes in but it's not working..
my attempt:
def stream_to_queue(self):
response = self.connect_to_stream()
if response.status_code != 200:
return
oldLine = ''
for line in response.iter_lines(1):
if line:
try:
msg = json.loads(line)
except Exception as e:
print "Caught exception when converting message into json\n" + str(e)
return
if msg.has_key("instrument") or msg.has_key("tick"):
price = msg["tick"]["ask"]
oldLine = price
newLine = oldLine
if newLine > oldLine:
print newLine
Couple of things:
1- Your indentation is a bit off as the comparison should be done inside the 'for' loop. In your case, the comparison is only being made when the streaming is complete.
2- You are comparing oldLine with newLine which are equal, so nothing will happen. Instead you should compare newLine with price.
Consider the following code:
for line in response.iter_lines(1):
if line:
try:
msg = json.loads(line)
except Exception as e:
print "Caught exception when converting message into json\n" + str(e)
return
if msg.has_key("instrument") or msg.has_key("tick"):
price = msg["tick"]["ask"]
oldLine = price
newLine = oldLine
if newLine > price:
print newLine

Inputting a String Argument as a Variable in function giving a NameError

I have this code block that it should give out the CIK number when the stock ticker is supplied:
def lookup_cik(ticker, name=None):
good_read = False
ticker = ticker.strip().upper()
url = 'http://www.sec.gov/cgi-bin/browse-edgar?action+getcompany&CIK=(cik)&count=10&output=xml'.format(cik=ticker)
try:
xmlFile = urlopen ( url )
try:
xmlData = xmlFile.read()
good_read = True
finally:
xmlFile.close()
except HTTPError as e:
print( "HTTP Error:", e.code )
except URLError as e:
print( "URL Error:", e.reason )
except TimeoutError as e:
print( "Timeout Error:", e.reason )
except socket.timeout:
print( "Socket Timeout Error" )
if not good_read:
print( "Unable to lookup CIK for ticker:", ticker )
return
try:
root = ET.fromstring(xmlData)
except ET.ParseError as perr:
print( "XML Parser Error:", perr )
try:
cikElement = list(root.iter( "CIK" ))[0]
return int(cikElement.text)
except StopIteration:
pass
However when it try to input a Stock ticker i get
>>> lookup_cik(BDX)
Traceback (most recent call last):
File "<pyshell#34>", line 1, in <module>
lookup_cik(BDX)
NameError: name 'BDX' is not defined
I know that it is a NameError but i have never met an issue where the function does not recognize the supposedly inputted argument data the stock ticker which in our example is BDX.
Your function expects a string, so pass in one:
lookup_cik("BDX")
Without the quotes Python parses that as a name, but you never bound anything to that name (assigned to it).
Note that you'll also get a UnboundLocalError: local variable 'root' referenced before assignment exception if there was a parse error. You probably want to exit the function at that point:
try:
root = ET.fromstring(xmlData)
except ET.ParseError as perr:
print( "XML Parser Error:", perr )
return
You'll most likely get a parse error, because you never actually interpolate the ticker anywhere in the string; you are missing a {cik} placeholder:
url = 'http://www.sec.gov/cgi-bin/browse-edgar?action+getcompany&CIK=(cik)&count=10&output=xml'.format(cik=ticker)
You probably meant to use CIK={cik} there. A quick experiment directly calling the site also shows you need to use action=getcompany (= instead of +):
url = 'http://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK={cik}&count=10&output=xml'.format(cik=ticker)
Because you use list() on root.iter(), the whole expression will not raise StopIteration (list() catches that). Instead, the expression could raise a IndexError instead.
I'd use next() there instead:
cikElement = next(root.iter("CIK"), None)
return cikElement and int(cikElement.text)
or better still, just use Element.find():
cikElement = root.find("CIK")
return cikElement and int(cikElement.text)

python "local variable referenced before assignment" with hundreds of threads

I am having a problem with a piece of code that is executed inside a thread in python. Everything works fine until I start using more than 100 or 150 threads, then I get the following error in several threads:
resp.read(1)
UnboundLocalError: local variable 'resp' referenced before assignment.
The code is the following:
try:
resp = self.opener.open(request)
code = 200
except urllib2.HTTPError as e:
code = e.code
#print e.reason,_url
#sys.stdout.flush()
except urllib2.URLError as e:
resp = None
code = None
try:
if code:
# ttfb (time to first byte)
resp.read(1)
ttfb = time.time() - start
# ttlb (time to last byte)
resp.read()
ttlb = time.time() - start
else:
ttfb = 0
ttlb = 0
except httplib.IncompleteRead:
pass
As you can see if "resp" is not assigned due to an exception, it should raise the exception and "code" coundn't be assigned so it couldn't enter in "resp.read(1)".
Anybody has some clue on wht it is failing? I guess it is related to scopes but I don't know how to avoid this or how to implement it differently.
Thanks and regards.
Basic python:
If there is a HttpError during the open call, resp will not be set, but code will be set to e.code in the exception handler.
Then code is tested and resp.read(1) is called.
This has nothing to do with threads directly, but maybe the high number of threads caused the HTTPError.
Defining and using resp variable are not is same code block. One of them in a try/except, the other is in another try/except block. Try to merge them:
Edited:
ttfb = 0
ttlb = 0
try:
resp = self.opener.open(request)
code = 200
resp.read(1)
ttfb = time.time() - start
resp.read()
ttlb = time.time() - start
except urllib2.HTTPError as e:
code = e.code
#print e.reason,_url
#sys.stdout.flush()
except urllib2.URLError as e:
pass
except httplib.IncompleteRead:
pass

Categories