I get an error:
AttributeError: 'Park_Stat_Thread' object has no attribute 'park_stat
when I run the script below. Any idea how to pass back the park_stat variable?
As you can see I use multi-threading to crawl a list of URLs from the web:
class Park_Stat_Thread(Thread):
def __init__(self, thread_num):
super().__init__()
self.thread_num = thread_num
def run(self):
print('starting thread %s.' % self.thread_num)
process_queue(self)
print('exiting thread %s.' % self.thread_num)
def process_queue(self):
while True:
try:
x = my_queue.get(block=False)
except queue.Empty:
return
else:
parking_stat_getter(self,x)
time.sleep(1)
def parking_stat_getter(self,x):
base = 'http://www.ahuzot.co.il/Parking/ParkingDetails/?ID={}'
response = requests.get(base.format(self.thread_num))
soup = BeautifulSoup(response.text, "lxml")
try:
self.park_stat = 0
for img in soup.find_all('img' , attrs={'src': re.compile("ParkingIcons")}):
soup_img = str(img)
if 'male' in soup_img:
self.park_stat=1
elif 'meat' in soup_img:
self.park_stat=2
elif 'panui' in soup_img:
self.park_stat=3
except AttributeError:
self.park_stat = np.nan
return self.park_stat
def get_parkings_Stat(parking_id_lists):
threads = [Park_Stat_Thread(t) for t in range(1, 8)]
for thread in threads:
thread.start()
for thread in threads:
thread.join()
park_details = dict(zip(parking_id_lists, [thread.park_stat for thread in threads]))
return park_details
You are missing a class attribute from the class constructor. You cannot declare a class attribute later, but you can set it to some dummy value.
class Park_Stat_Thread(Thread):
def __init__(self, thread_num):
super().__init__()
self.thread_num = thread_num
self.park_stat = 0
def run(self):
print('starting thread %s.' % self.thread_num)
process_queue(self)
print('exiting thread %s.' % self.thread_num)
def process_queue(self):
while True:
try:
x = my_queue.get(block=False)
except queue.Empty:
return
else:
parking_stat_getter(self,x)
time.sleep(1)
def parking_stat_getter(self,x):
base = 'http://www.ahuzot.co.il/Parking/ParkingDetails/?ID={}'
response = requests.get(base.format(self.thread_num))
soup = BeautifulSoup(response.text, "lxml")
try:
self.park_stat = 0
for img in soup.find_all('img' , attrs={'src': re.compile("ParkingIcons")}):
soup_img = str(img)
if 'male' in soup_img:
self.park_stat=1
elif 'meat' in soup_img:
self.park_stat=2
elif 'panui' in soup_img:
self.park_stat=3
except AttributeError:
self.park_stat = np.nan
return self.park_stat
def get_parkings_Stat(parking_id_lists):
threads = [Park_Stat_Thread(t) for t in range(1, 8)]
for thread in threads:
thread.start()
for thread in threads:
thread.join()
park_details = dict(zip(parking_id_lists, [thread.park_stat for thread in threads]))
return park_details
Related
i have one scraper which initiate the "requestes" session and fetch some data, using a IPV6, i have now 10000 ip list, I have prepared it using threading, but its giving error.
Need support to find out the issue.
import requests, queue,threading, urllib3,jso,pandas as pd, os, time, datetime,inspect
num_threads = 2
root = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
with open (root+ "/ip_list.txt") as ips:
device_ip = list(ips)
class Writer_Worker(threading.Thread):
def __init__(self, queue, df, *args, **kwargs):
if not queue:
print("Device Queue not specified")
exit(1)
self.out_q = queue
self.df = df
super().__init__(*args, **kwargs)
def run(self):
while True:
try:
device_details = self.out_q.get(timeout=3)
except queue.Empty:
return
self.df[device_details[0]] = device_details
self.out_q.task_done()
class Worker(threading.Thread):
def __init__(self, queue, out_queue, device_password, *args, **kwargs):
if not queue:
print("Device Queue not specified")
exit(1)
self.queue = queue
self.pas = device_password
self.out_q = out_queue
super().__init__(*args, **kwargs)
def run(self):
while True:
try:
device_ip = self.queue.get(timeout=3)
except queue.Empty:
return
self.connect_to_device_and_process(device_ip)
self.queue.task_done()
def connect_to_device_and_process(self, device_ip):
st = str("Online")
try:
r = requests.post("https://["+device_ip+"]/?q=index.login&mimosa_ajax=1", {"username":"configure", "password":self.pas}, verify=False)
except requests.exceptions.ConnectionError:
st = str("Offline")
self.out_q.put([device_ip,st,"","","","","","","","","","","","","","","","","",""])
return
finally:
if 'Online' in st:
r = requests.get("https://["+device_ip+"]/cgi/dashboard.php", cookies=r.cookies, verify=False)
if "Response [401]" in str(r):
st2 = str("Password Error")
self.out_q.put([device_ip,st2,"","","","","","","","","","","","","","","","","",""])
else:
data = json.loads(r.content.decode())
output5 = data ['config'] ['Spectrum_Power']
self.out_q.put([device_ip,st,output5['Auto_Power'].replace('2', 'Max Power').replace('1', 'Min Power').replace('0', 'off'),output5['AutoConfig']])
def main():
start = time.time()
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
pas = input("Enter Device Password:")
df =pd.DataFrame(columns = ["IP","Status","Auto_Power","AutoChannel"])
q = queue.Queue(len(device_ip))
for ip in device_ip:
q.put_nowait(ip)
out_q = queue.Queue(len(device_ip))
Writer_Worker(out_q, df).start()
for _ in range(num_threads):
Worker(q, out_q, pas).start()
q.join()
print(df)
df.to_excel('iBridge_C5x_Audit_Report.xlsx', sheet_name='Detail', index = False)
if __name__ == "__main__":
main()
below is the error while running the script, seeps I am unable to login to this device.
Any help is appreciable.
You should use a thread pool that distributes the work between a fixed number of threads. This is a core feature of Python since version 3.2.
from concurrent.futures import ThreadPoolExecutor
Define a function perform(ip) that performs the request for one ip
Set variable numThreads to the number of desired threads
Run the thread-pool executor:
print(f'Using {numThreads} threads')
with ThreadPoolExecutor(max_workers=numThreads) as pool:
success = all(pool.map(perform, ips))
Source: https://docs.python.org/3/library/concurrent.futures.html
On that page you find an example even better tailored to your application: https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor-example
from threading import Thread
th = Thread(target=self.fill_imdb, args=(movies_info_part, "thread " + str(count)))
th.start()
fill_imdb is my method
I am trying to end a thread execution without directly referencing the thread. because it is not possible to do that in the full program.
for reference the main program is for the Raspberry Pi and I need it to stop executing a function/thread immediately once a button is pressed.
I have tried raising an exception from main but the other do not catch it for some reason.
Here is the scrap program that I have been testing on:
import threading
import time
class Thread_Exception(Exception):
def __init__(self, msg):
return super().__init__(msg)
def thread_function(index):
bool = True
try:
while bool:
print("Print from thread #", index)
time.sleep(4)
except Thread_Exception:
print('Exception thrown, thread #', index)
bool = False
if __name__ == "__main__":
try:
for index in range(3):
x = threading.Thread(target=thread_function, args=(index,))
x.start()
time.sleep(20)
raise Thread_Exception("intr")
while True:
continue
except KeyboardInterrupt:
print('Interrupted main')
an example of how it can be done:
import threading
import ctypes
import time
class thread_with_exception(threading.Thread):
def __init__(self, name):
threading.Thread.__init__(self)
self.name = name
def run(self):
# target function of the thread class
try:
while True:
print('running ' + self.name)
finally:
print('ended')
def get_id(self):
# returns id of the respective thread
if hasattr(self, '_thread_id'):
return self._thread_id
for id, thread in threading._active.items():
if thread is self:
return id
def raise_exception(self):
thread_id = self.get_id()
res = ctypes.pythonapi.PyThreadState_SetAsyncExc(thread_id,
ctypes.py_object(SystemExit))
if res > 1:
ctypes.pythonapi.PyThreadState_SetAsyncExc(thread_id, 0)
print('Exception raise failure')
t1 = thread_with_exception('Thread 1')
t1.start()
time.sleep(2)
t1.raise_exception()
t1.join()
The article this came from can currently be found here:
https://www.geeksforgeeks.org/python-different-ways-to-kill-a-thread/
Why is not Factory.rdyq visible to inherited classes?
bar.py
import Queue
import threading
class Factory:
name = ""
rdyQ = Queue.Queue()
dispQ = Queue.Queue()
def __init__(self):
self.worker_count = 1
self.worker_thread = None
def worker(self): pass
def initialize(self):
for i in range(self.worker_count):
t = threading.Thread(target=self.worker)
t.daemon = True
t.start()
self.worker_thread = t
#staticmethod
def joinQ():
Factory.rdyQ.join()
Factory.dispQ.join()
return
#staticmethod
def getFactory(factory_name):
if factory_name == "setup":
return SetupFactory()
elif factory_name == "dispatch":
return DispatchFactory()
elif factory_name == "complete":
return CompleteFactory()
else:
return None
class SetupFactory(Factory):
name = "setup"
def worker(self):
while True:
try:
item = Factory.rdyq.get(timeout=1)
print "setup: %s" % item
Factory.rdyq.task_done()
Factory.dispQ.put(item)
except Queue.Empty, msg:
continue
class DispatchFactory(Factory):
name = "dispatch"
def worker(self):
while True:
try:
item = Factory.dispQ.get(timeout=1)
print "dispQ: %s" % item
Factory.dispQ.task_done()
except Queue.Empty, msg:
continue
class CompleteFactory(Factory):
name = "complete"
def worker(self):
for i in range(10):
Factory.rdyq.put(i)
foo.py
import bar as df
setup = df.Factory.getFactory("setup")
dispatch = df.Factory.getFactory("dispatch")
complete = df.Factory.getFactory("complete")
setup.initialize()
dispatch.initialize()
complete.initialize()
df.Factory.joinQ()
setup.worker_thread.join()
dispatch.worker_thread.join()
complete.worker_thread.join()
python foo.py
File "/u/bar.py", line 73, in worker
Factory.rdyq.put(i)
AttributeError: class Factory has no attribute 'rdyq'
You should use a classmethod instead of a staticmethod for joinQ.
You also had a rdyq (bar.py last line) instead of the expected rdyQ and Python is case-sensitive.
bar.py
import Queue
import threading
class Factory:
name = ""
rdyQ = Queue.Queue()
dispQ = Queue.Queue()
def __init__(self):
self.worker_count = 1
self.worker_thread = None
def worker(self): pass
def initialize(self):
for i in range(self.worker_count):
t = threading.Thread(target=self.worker)
t.daemon = True
t.start()
self.worker_thread = t
#classmethod
def joinQ(cls):
cls.rdyQ.join()
cls.dispQ.join()
return
#staticmethod
def getFactory(factory_name):
if factory_name == "setup":
return SetupFactory()
elif factory_name == "dispatch":
return DispatchFactory()
elif factory_name == "complete":
return CompleteFactory()
else:
return None
class SetupFactory(Factory):
name = "setup"
def worker(self):
while True:
try:
item = Factory.rdyq.get(timeout=1)
print "setup: %s" % item
Factory.rdyq.task_done()
Factory.dispQ.put(item)
except Queue.Empty, msg:
continue
class DispatchFactory(Factory):
name = "dispatch"
def worker(self):
while True:
try:
item = Factory.dispQ.get(timeout=1)
print "dispQ: %s" % item
Factory.dispQ.task_done()
except Queue.Empty, msg:
continue
class CompleteFactory(Factory):
name = "complete"
def worker(self):
for i in range(10):
Factory.rdyQ.put(i)
Python variables and attributes are case sensitive, so "rdyq" is not the same as "rdyQ".
You set the name with a capital q, so maybe this will fix it?
Factory.rdyQ.put(i)
I recommend using_underscores_for_variables as you avoid issues camelCase can cause.
Im using the following code to multithread urlib2. However what is the best way to limit the number of threads that it consumes ??
class ApiMultiThreadHelper:
def __init__(self,api_calls):
self.q = Queue.Queue()
self.api_datastore = {}
self.api_calls = api_calls
self.userpass = '#####'
def query_api(self,q,api_query):
self.q.put(self.issue_request(api_query))
def issue_request(self,api_query):
self.api_datastore.update({api_query:{}})
for lookup in ["call1","call2"]:
query = api_query+lookup
request = urllib2.Request(query)
request.add_header("Authorization", "Basic %s" % self.userpass)
f = urllib2.urlopen(request)
response = f.read()
f.close()
self.api_datastore[api_query].update({lookup:response})
return True
def go(self):
threads = []
for i in self.api_calls:
t = threading.Thread(target=self.query_api, args = (self.q,i))
t.start()
threads.append(t)
for t in threads:
t.join()
You should use a thread pool. Here's my implementation I've made years ago (Python 3.x friendly):
import traceback
from threading import Thread
try:
import queue as Queue # Python3.x
except ImportError:
import Queue
class ThreadPool(object):
def __init__(self, no=10):
self.alive = True
self.tasks = Queue.Queue()
self.threads = []
for _ in range(no):
t = Thread(target=self.worker)
t.start()
self.threads.append(t)
def worker(self):
while self.alive:
try:
fn, args, kwargs = self.tasks.get(timeout=0.5)
except Queue.Empty:
continue
except ValueError:
self.tasks.task_done()
continue
try:
fn(*args, **kwargs)
except Exception:
# might wanna add some better error handling
traceback.print_exc()
self.tasks.task_done()
def add_job(self, fn, args=[], kwargs={}):
self.tasks.put((fn, args, kwargs))
def join(self):
self.tasks.join()
def deactivate(self):
self.alive = False
for t in self.threads:
t.join()
You can also find a similar class in multiprocessing.pool module (don't ask me why it is there). You can then refactor your code like this:
def go(self):
tp = ThreadPool(20) # <-- 20 thread workers
for i in self.api_calls:
tp.add_job(self.query_api, args=(self.q, i))
tp.join()
tp.deactivate()
Number of threads is now defined a priori.
I have this
#threads
import thread
import threading
import time
class ThreadTask(threading.Thread):
def __init__(self,name,delay,callback):
threading.Thread.__init__(self)
self.name = name
self.counter = 0
self.delay = delay
self.callback = callback
self.lock = threading.Lock()
def run(self):
while True:
self.counter += 1
print 'running ', self.name , self.counter
time.sleep(self.delay)
if self.counter % 5 == 0:
self.callback(self)
class Barrier(object):
def __init__(self):
self.locks = []
def wait_task(self,task):
print 'lock acquire'
self.locks.append(task.lock)
task.lock.acquire(True)
task.lock.acquire(True)
def notity_task(self,task):
print 'release lock'
for i in self.locks:
try:
i.release()
except Exception, e:
print 'Error', e.message
print 'Lock released'
self.locks = []
try:
barrier = Barrier()
task1 = ThreadTask('Task_1',1,barrier.wait_task)
task4 = ThreadTask('Task_4',1,barrier.wait_task)
task3 = ThreadTask('Task_3',2,barrier.wait_task)
task2 = ThreadTask('Task_2',3,barrier.notity_task)
task2.start()
task1.start()
task3.start()
task4.start()
except Exception as e:
raise e
while 1:
pass
These Thread run ok but if I put 2 task.lock.acquire(True) consecutive otherwise do not work, they stop each 10 when need to be each 5. Any one know what happening?
Thanks
You create a regular, non-reentrant lock here:
self.lock = threading.Lock()
And then try to acquire it twice here
task.lock.acquire(True)
task.lock.acquire(True)
This is illegal, regular locks cannot be acquired twice by same thread.
Perhaps you means to use a threading.RLock()?