Try, except not working in Python - python

I am trying to add error handling to this program by adding try and except blocks in case something does not work and so that the program does not shutdown in case there is an error in handling the data. (This is a dumbed down version of my code). When I run it this way, (given that the time is accurate), nothing seems to work - the functions in report_scheduler do not actually ever run.
Here is the code I am looking at:
import schedule
def forex_data_report():
from forex_python.converter import CurrencyRates
import csv
current_dir = os.getcwd()
date_time = time.strftime('%m-%d-%Y_at_%I-%M-%S-%p')
c = CurrencyRates()
usd_eur = c.get_rate('EUR', 'USD')
usd_gbp = c.get_rate('GBP', 'USD')
usd_yen = c.get_rate('JPY', 'USD')
usd_aud = c.get_rate('AUD', 'USD')
eur_gbp = c.get_rate('GBP', 'EUR')
clean_file_location = current_dir + '\\Reports\\Forex_Data\\Forex_Data.csv'
with open(clean_file_location, 'a', newline='') as outfile:
writer = csv.writer(outfile)
writer.writerow([date_time, usd_eur, usd_gbp, usd_yen, usd_aud, eur_gbp])
send_outlook_w_attach('Key Currencies', clean_file_location)
print ('Updated Key Currencies Data.')
def competitor_stock_data_report():
import datetime
import pandas_datareader.data as web
import csv
current_dir = os.getcwd()
date_print = time.strftime('%m-%d-%Y_at_%I-%M-%S-%p')
date_time = datetime.datetime.now()
date = date_time.date()
stocklist = ['LAZ','AMG','BEN','LM','EVR','GHL','HLI','MC','PJT','MS','GS','JPM','AB']
start = datetime.datetime(date.year-1, date.month, date.day-1)
end = datetime.datetime(date.year, date.month, date.day-1)
clean_file_location = current_dir + '\\Reports\\XXX\\Stock_Data.csv'
for x in stocklist:
df = web.DataReader(x, 'google', start, end)
with open(clean_file_location, 'a', newline='') as outfile:
writer = csv.writer(outfile)
writer.writerow([date_print, x, df.loc[df.index[0], 'Close'], df.loc[df.index[-1], 'Close']])
send_outlook_w_attach('Competitor Stock Data vs. XXX', clean_file_location)
print ('Updated XXX Competitor Stock Performance Data.')
def report_scheduler():
try:
schedule.every().day.at("00:00").do(forex_data_report)
except:
pass
try:
schedule.every().friday.at("00:01").do(competitor_stock_data_report)
except:
pass
while True:
schedule.run_pending()
time.sleep(1)
if __name__ == '__main__':
print('Starting background - HANDLER - process...')
report_scheduler()
I understand the pass is not error handling, but I do need some sort of way to tell the program to continue, even if the data is not being updated/an error occurs.
Thanks.

Without actually getting deep into your code, probably an exception is being raised and then caught and then the pass statement means you don't get any output.
Have you checked it runs without the try except blocks?
Also, this might help:
except Exception as e:
print("Exception raised: {}".format(e))
At least then you will get a printout of your exception. You might also want to look into logging the exception.

I'm not familiar with the libraries you are using - it would be very helpful if you would post a complete program that we could tinker with ourselves, and name all of the third-party libraries involved - but I suspect you want the try-except blocks inside forex_data_report and competitor_stock_data_report. You aren't concerned with exceptions thrown by the act of scheduling the periodic task, are you? You want to swallow exceptions from the periodic tasks themselves. Experiment with code structured like this:
def forex_data_report():
# unchanged
def forex_data_report_wrapper():
try:
forex_data_report()
except Exception as e:
logger.exception(e)
# similarly for competitor_stock_data_report
def report_scheduler():
schedule.every().day.at("00:00").do(forex_data_report_wrapper)
schedule.every().friday.at("00:01").do(competitor_stock_data_report_wrapper)
while True:
schedule.run_pending()
time.sleep(1)
Note also that I am using except Exception instead of except. A bare except catches things you almost certainly don't want to catch, such as KeyboardInterrupt and StopIteration.

Related

How can I test concurrency using ThreadPoolExecutor?

I have written a program that I would like to use concurrency on and I have implemented it but I am having a hard time testing to see if it will actually spin up more threads than just one. Does anyone have any suggestions? I am just trying to see if this code will ever use 2,3,4,5 workers.
def read_files():
t0 = time.process_time()
cols = ['fname', ' lname', ' age']
path = 'data'
files = glob.glob(os.path.join(path, "*.csv"))
# with open('data/url') as f:
# for line in f:
# files.append(line.rstrip('\n'))
bad_files = []
df_list = []
for file in files:
try:
temp = pd.read_csv(file)
if temp.columns.to_list() == cols:
df_list.append(temp)
else:
bad_files.append(file)
except ParserError as pe:
bad_files.append(file)
logging.error(f'Parsing Error on {file}. Error: {pe}')
except ValueError as ve:
logging.error(f'Value error on reading the csv: {temp}, error: {ve}')
bad_files.append(file)
except urllib.error.HTTPError as he:
bad_files.append(file)
logging.error(f'Http Error {he}, Code {he.code}')
except Exception as e:
bad_files.append(file)
logging.error(f'Error grabbing data from given {file} possible HTTP error. Error: {e}')
print(f'Files that were not read {bad_files}')
df = pd.concat(df_list)
t1 = time.process_time()
print(f'It took {t1 - t0} seconds, to read and fill the dataframe.')
return df
def run_calculations(df):
if len(df.index) % 2 == 0:
print(f'Even number of entries, pandas median() method will add both middle numbers and find the average.')
average = round(df[' age'].mean())
median = df[' age'].median()
names_arr = df[df[' age'] == median].values[0]
fname = names_arr[0]
lname = names_arr[1]
print(f'The Average Age is {int(average)}, The Median Age is {int(median)}. {fname} {lname} is {int(median)}')
if __name__ == '__main__':
with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
executor.submit(run_calculations(read_files()))
print(f'I have used {len(executor._threads)} thread(s) for processing')
the short answer is: no, your code will not use more than one workers.
The reason is you passed a wrong-typed argument to executor.submit, which accepts a callable while you are passing basically None.
A quick fix would be to replace executor.submit(run_calculations(read_files())) with executor.submit(lambda : run_calculations(read_files()))
The following snippet will help to explain how to submit a callable to executor:
import time
import threading
from concurrent.futures import ThreadPoolExecutor
def task(time_to_sleep):
time.sleep(time_to_sleep)
print(id(threading.current_thread()))
def use_single_worker():
print("in use single worker")
with ThreadPoolExecutor(max_workers=5) as executor:
# a single thread id will get dumped multi times
futures = [executor.submit(task(i)) for i in range(10)]
for future in futures:
try:
future.result()
except Exception:
pass
def use_multiple_workers():
print("in use multiple workers")
with ThreadPoolExecutor(max_workers=5) as executor:
# different thread ids will get dumped
futures = [executor.submit(lambda: task(i)) for i in range(10)]
for future in futures:
try:
future.result()
except Exception:
pass
if __name__ == '__main__':
use_single_worker()
use_multiple_workers()

Indentation Error Python Not Working

Im trying to run my code and there is an
File "C:/trcrt/trcrt.py", line 42
def checkInternet():
^
IndentationError: unexpected unindent
The code supposed to check for the traceroute to a website... i know... its not very smart but its what i was told to do
Ive checked the code using pep8 and eveything is seems to be fine...
'''
Developer: Roei Edri
File name: trcrt.py
Date: 24.11.17
Version: 1.1.0
Description: Get an url as an input and prints the traceroute to it.
'''
import sys
import urllib2
i, o, e = sys.stdin, sys.stdout, sys.stderr
from scapy.all import *
from scapy.layers.inet import *
sys.stdin, sys.stdout, sys.stderr = i, o, e
def trcrt(dst):
"""
Check for the route for the given destination
:param dst: Final destination, in a form of a website.
:type dst: str
"""
try:
pckt = IP(dst=dst)/ICMP() # Creates the
# packet
ip = [p for p in pckt.dst] # Gets the ip
print "Tracerouting for {0} : {1}".format(dst, ip[0])
for ttl in range(1, 40):
pckt = IP(ttl=ttl, dst=dst)/ICMP()
timeBefore = time.time()
reply = sr1(pckt, verbose=0, timeout=5)
timeAfter = time.time()
timeForReply = (timeAfter - timeBefore)*1000
if reply is not None:
print "{0} : {1} ; Time for reply: {2}".format(ttl,
reply.src, timeForReply)
if reply.type == 0:
print "Tracerout Completed"
break
else:
print "{0} ... Request Time Out".format(ttl)
def checkInternet():
"""
Checks if there is an internet connection
:return: True if there is an internet connection
"""
try:
urllib2.urlopen('http://45.33.21.159', timeout=1)
return True
except urllib2.URLError as IntError:
return False
Thanks for any help...
Btw pep8 says
"module level import not at top of file"
for lines 12,13
The try block is missing its except clause.
try:
pckt = IP(dst=dst)/ICMP() # Creates the
# packet
ip = [p for p in pckt.dst] # Gets the ip
print "Tracerouting for {0} : {1}".format(dst, ip[0])
for ttl in range(1, 40):
pckt = IP(ttl=ttl, dst=dst)/ICMP()
timeBefore = time.time()
reply = sr1(pckt, verbose=0, timeout=5)
timeAfter = time.time()
timeForReply = (timeAfter - timeBefore)*1000
if reply is not None:
print "{0} : {1} ; Time for reply: {2}".format(ttl,
reply.src, timeForReply)
if reply.type == 0:
print "Tracerout Completed"
break
else:
print "{0} ... Request Time Out".format(ttl)
except: # Here : Add the exception you wish to catch
pass # handle this exception appropriately
As a general rule, do not use catch all except clauses, and do not pass on a caught exception, it lets it fail silently.
If this is your full code, there are two things to check:
1) Have you mixed tabs and spaces? Make sure that all tabs are converted to spaces (I recommend 4 spaces per tab) for indentation. A good IDE will do this for you.
2) The try: in trcrt(dst) does not hava a matching except block.
PEP8 will by the way also tell you, that function names should be lowercase:
check_internet instead of checkInternet, ...
I will give you the same recommendation, that I give to everyone working with me: Start using an IDE that marks PEP8 and other errors for you, there is multiple around. It helps spotting those errors a lot and trains you to write clean Python code that is easily readable and (if you put comments in it) also reausable and understandable a few years later.

Python Exception Handling with in a loop

An exception occurs when my program can't find the element its looking for, I want to log the event within the CSV, Display a message the error occurred and continue. I have successfully logged the event in the CSV and display the message, Then my program jumps out of the loop and stops. How can I instruct python to continue. Please check out my code.
sites = ['TCF00670','TCF00671','TCF00672','TCF00674','TCF00675','TCF00676','TCF00677']`
with open('list4.csv','wb') as f:
writer = csv.writer(f)
try:
for s in sites:
adrs = "http://turnpikeshoes.com/shop/" + str(s)
driver = webdriver.PhantomJS()
driver.get(adrs)
time.sleep(5)
LongDsc = driver.find_element_by_class_name("productLongDescription").text
print "Working.." + str(s)
writer.writerows([[LongDsc]])
except:
writer.writerows(['Error'])
print ("Error Logged..")
pass
driver.quit()
print "Complete."
Just put the try/except block inside the loop. And there is no need in that pass statement at the end of the except block.
with open('list4.csv','wb') as f:
writer = csv.writer(f)
for s in sites:
try:
adrs = "http://turnpikeshoes.com/shop/" + str(s)
driver = webdriver.PhantomJS()
driver.get(adrs)
time.sleep(5)
LongDsc = driver.find_element_by_class_name("productLongDescription").text
print "Working.." + str(s)
writer.writerows([[LongDsc]])
except:
writer.writerows(['Error'])
print ("Error Logged..")
NOTE It's generally a bad practice to use except without a particular exception class, e.g. you should do except Exception:...

Learning python and threading. I think my code runs infinitely. Help me find bugs?

So I've started learning python now, and I absolutely am in love with it.
I'm building a small scale facebook data scraper. Basically, it will use the Graph API and scrape the first names of the specified number of users. It works fine in a single thread (or no thread I guess).
I used online tutorials to come up with the following multithreaded version (updated code):
import requests
import json
import time
import threading
import Queue
GraphURL = 'http://graph.facebook.com/'
first_names = {} # will store first names and their counts
queue = Queue.Queue()
def getOneUser(url):
http_response = requests.get(url) # open the request URL
if http_response.status_code == 200:
data = http_response.text.encode('utf-8', 'ignore') # Get the text of response, and encode it
json_obj = json.loads(data) # load it as a json object
# name = json_obj['name']
return json_obj['first_name']
# last = json_obj['last_name']
return None
class ThreadGet(threading.Thread):
""" Threaded name scraper """
def __init__(self, queue):
threading.Thread.__init__(self)
self.queue = queue
def run(self):
while True:
#print 'thread started\n'
url = GraphURL + str(self.queue.get())
first = getOneUser(url) # get one user's first name
if first is not None:
if first_names.has_key(first): # if name has been encountered before
first_names[first] = first_names[first] + 1 # increment the count
else:
first_names[first] = 1 # add the new name
self.queue.task_done()
#print 'thread ended\n'
def main():
start = time.time()
for i in range(6):
t = ThreadGet(queue)
t.setDaemon(True)
t.start()
for i in range(100):
queue.put(i)
queue.join()
for name in first_names.keys():
print name + ': ' + str(first_names[name])
print '----------------------------------------------------------------'
print '================================================================'
# Print top first names
for key in first_names.keys():
if first_names[key] > 2:
print key + ': ' + str(first_names[key])
print 'It took ' + str(time.time()-start) + 's'
main()
To be honest, I don't understand some of the parts of the code but I get the main idea. The output is nothing. I mean the shell has nothing in it, so I believe it keeps on running.
So what I am doing is filling queue with integers that are the user id's on fb. Then each ID is used to build the api call URL. getOneUser returns the name of one user at a time. That task (ID) is marked as 'done' and it moves on.
What is wrong with the code above?
Your usage of first_names is not thread-safe. You could add a lock to protect the increment. Otherwise the code should work. You might be hitting some facebook api limit i.e., you should limit your request rate.
You could simplify your code by using a thread pool and counting the names in the main thread:
#!/usr/bin/env python
import json
import urllib2
from collections import Counter
from multiprocessing.dummy import Pool # use threads
def get_name(url):
try:
return json.load(urllib2.urlopen(url))['first_name']
except Exception:
return None # error
urls = ('http://graph.facebook.com/%d' % i for i in xrange(100))
p = Pool(5) # 5 concurrent connections
first_names = Counter(p.imap_unordered(get_name, urls))
print first_names.most_common()
To see what errors you get, you could add logging:
#!/usr/bin/env python
import json
import logging
import urllib2
from collections import Counter
from multiprocessing.dummy import Pool # use threads
logging.basicConfig(level=logging.DEBUG,
format="%(asctime)s %(threadName)s %(message)s")
def get_name(url):
try:
name = json.load(urllib2.urlopen(url))['first_name']
except Exception as e:
logging.debug('error: %s url: %s', e, url)
return None # error
else:
logging.debug('done url: %s', url)
return name
urls = ('http://graph.facebook.com/%d' % i for i in xrange(100))
p = Pool(5) # 5 concurrent connections
first_names = Counter(p.imap_unordered(get_name, urls))
print first_names.most_common()
A simple way to limit number of requests per given time period is to use a semaphore:
#!/usr/bin/env python
import json
import logging
import time
import urllib2
from collections import Counter
from multiprocessing.dummy import Pool # use threads
from threading import _BoundedSemaphore as BoundedSemaphore, Timer
logging.basicConfig(level=logging.DEBUG,
format="%(asctime)s %(threadName)s %(message)s")
class RatedSemaphore(BoundedSemaphore):
"""Limit to 1 request per `period / value` seconds (over long run)."""
def __init__(self, value=1, period=1):
BoundedSemaphore.__init__(self, value)
t = Timer(period, self._add_token_loop,
kwargs=dict(time_delta=float(period) / value))
t.daemon = True
t.start()
def _add_token_loop(self, time_delta):
"""Add token every time_delta seconds."""
while True:
try:
BoundedSemaphore.release(self)
except ValueError: # ignore if already max possible value
pass
time.sleep(time_delta) # ignore EINTR
def release(self):
pass # do nothing (only time-based release() is allowed)
def get_name(gid, rate_limit=RatedSemaphore(value=100, period=600)):
url = 'http://graph.facebook.com/%d' % gid
try:
with rate_limit:
name = json.load(urllib2.urlopen(url))['first_name']
except Exception as e:
logging.debug('error: %s url: %s', e, url)
return None # error
else:
logging.debug('done url: %s', url)
return name
p = Pool(5) # 5 concurrent connections
first_names = Counter(p.imap_unordered(get_name, xrange(200)))
print first_names.most_common()
After the initial burst, it should make a single request every 6 seconds.
Consider using batch requests.
Your original run function only processed one item from the queue. In all you've only removed 5 items from the queue.
Usually run functions look like
run(self):
while True:
doUsefulWork()
i.e. they have a loop which causes the recurring work to be done.
[Edit] OP edited code to include this change.
Some other useful things to try:
Add a print statement into the run function: you'll find that it is only called 5 times.
Remove the queue.join() call, this is what is causing the module to block, then you will be able to probe the state of the queue.
put the entire body of run into a function. Verify that you can use that function in a single threaded manner to get the desired results, then
try it with just a single worker thread, then finally go for
multiple worker threads.

python "local variable referenced before assignment" with hundreds of threads

I am having a problem with a piece of code that is executed inside a thread in python. Everything works fine until I start using more than 100 or 150 threads, then I get the following error in several threads:
resp.read(1)
UnboundLocalError: local variable 'resp' referenced before assignment.
The code is the following:
try:
resp = self.opener.open(request)
code = 200
except urllib2.HTTPError as e:
code = e.code
#print e.reason,_url
#sys.stdout.flush()
except urllib2.URLError as e:
resp = None
code = None
try:
if code:
# ttfb (time to first byte)
resp.read(1)
ttfb = time.time() - start
# ttlb (time to last byte)
resp.read()
ttlb = time.time() - start
else:
ttfb = 0
ttlb = 0
except httplib.IncompleteRead:
pass
As you can see if "resp" is not assigned due to an exception, it should raise the exception and "code" coundn't be assigned so it couldn't enter in "resp.read(1)".
Anybody has some clue on wht it is failing? I guess it is related to scopes but I don't know how to avoid this or how to implement it differently.
Thanks and regards.
Basic python:
If there is a HttpError during the open call, resp will not be set, but code will be set to e.code in the exception handler.
Then code is tested and resp.read(1) is called.
This has nothing to do with threads directly, but maybe the high number of threads caused the HTTPError.
Defining and using resp variable are not is same code block. One of them in a try/except, the other is in another try/except block. Try to merge them:
Edited:
ttfb = 0
ttlb = 0
try:
resp = self.opener.open(request)
code = 200
resp.read(1)
ttfb = time.time() - start
resp.read()
ttlb = time.time() - start
except urllib2.HTTPError as e:
code = e.code
#print e.reason,_url
#sys.stdout.flush()
except urllib2.URLError as e:
pass
except httplib.IncompleteRead:
pass

Categories