Python- name ' ' is not defined - python

class Main:
PROJECT_NAME = 'something'
HOMEPAGE = 'something'
DOMAIN_NAME = get_domain_name(HOMEPAGE)
QUEUE_FILE = PROJECT_NAME + '/queue.txt'
CRAWLED_FILE = PROJECT_NAME + '/crawled.txt'
DATA_FILE = PROJECT_NAME + '/data.txt'
NUMBER_OF_THREADS = 20
queue = Queue()
Spider(PROJECT_NAME, HOMEPAGE, DOMAIN_NAME)
# Create worker threads (will die when main exits)
def create_workers(self):
for _ in range(self.NUMBER_OF_THREADS):
t = self.threading.Thread(target=self.work)
t.daemon = True
t.start()
# Do the next job in the queue
def work(self):
while True:
url = self.queue.get()
Spider.crawl_page(self.threading.current_thread().name, url)
self.queue.task_done()
# Each queued link is a new job
def create_jobs(self):
for link in self.file_to_set(self.QUEUE_FILE):
self.queue.put(link)
self.queue.join()
self.crawl()
# Check if there are items in the queue, if so crawl them
def crawl(self):
queued_links = self.file_to_set(self.QUEUE_FILE)
if len(queued_links) > 0:
print(str(len(queued_links)) + ' links in the queue')
self.create_jobs()
create_workers()
crawl()
Above are my code. I have been receiving:
NameError: name 'create_workers' is not defined and NameError: name 'crawl' is not defined.
Any help or suggestion for beginner here?

You must create an object of the class Main and through that object to use its methods, for this you must change:
create_workers()
crawl()
to:
m = Main()
m.create_workers()
m.crawl()

Related

Python Multithreading with requests

i have one scraper which initiate the "requestes" session and fetch some data, using a IPV6, i have now 10000 ip list, I have prepared it using threading, but its giving error.
Need support to find out the issue.
import requests, queue,threading, urllib3,jso,pandas as pd, os, time, datetime,inspect
num_threads = 2
root = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
with open (root+ "/ip_list.txt") as ips:
device_ip = list(ips)
class Writer_Worker(threading.Thread):
def __init__(self, queue, df, *args, **kwargs):
if not queue:
print("Device Queue not specified")
exit(1)
self.out_q = queue
self.df = df
super().__init__(*args, **kwargs)
def run(self):
while True:
try:
device_details = self.out_q.get(timeout=3)
except queue.Empty:
return
self.df[device_details[0]] = device_details
self.out_q.task_done()
class Worker(threading.Thread):
def __init__(self, queue, out_queue, device_password, *args, **kwargs):
if not queue:
print("Device Queue not specified")
exit(1)
self.queue = queue
self.pas = device_password
self.out_q = out_queue
super().__init__(*args, **kwargs)
def run(self):
while True:
try:
device_ip = self.queue.get(timeout=3)
except queue.Empty:
return
self.connect_to_device_and_process(device_ip)
self.queue.task_done()
def connect_to_device_and_process(self, device_ip):
st = str("Online")
try:
r = requests.post("https://["+device_ip+"]/?q=index.login&mimosa_ajax=1", {"username":"configure", "password":self.pas}, verify=False)
except requests.exceptions.ConnectionError:
st = str("Offline")
self.out_q.put([device_ip,st,"","","","","","","","","","","","","","","","","",""])
return
finally:
if 'Online' in st:
r = requests.get("https://["+device_ip+"]/cgi/dashboard.php", cookies=r.cookies, verify=False)
if "Response [401]" in str(r):
st2 = str("Password Error")
self.out_q.put([device_ip,st2,"","","","","","","","","","","","","","","","","",""])
else:
data = json.loads(r.content.decode())
output5 = data ['config'] ['Spectrum_Power']
self.out_q.put([device_ip,st,output5['Auto_Power'].replace('2', 'Max Power').replace('1', 'Min Power').replace('0', 'off'),output5['AutoConfig']])
def main():
start = time.time()
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
pas = input("Enter Device Password:")
df =pd.DataFrame(columns = ["IP","Status","Auto_Power","AutoChannel"])
q = queue.Queue(len(device_ip))
for ip in device_ip:
q.put_nowait(ip)
out_q = queue.Queue(len(device_ip))
Writer_Worker(out_q, df).start()
for _ in range(num_threads):
Worker(q, out_q, pas).start()
q.join()
print(df)
df.to_excel('iBridge_C5x_Audit_Report.xlsx', sheet_name='Detail', index = False)
if __name__ == "__main__":
main()
below is the error while running the script, seeps I am unable to login to this device.
Any help is appreciable.
You should use a thread pool that distributes the work between a fixed number of threads. This is a core feature of Python since version 3.2.
from concurrent.futures import ThreadPoolExecutor
Define a function perform(ip) that performs the request for one ip
Set variable numThreads to the number of desired threads
Run the thread-pool executor:
print(f'Using {numThreads} threads')
with ThreadPoolExecutor(max_workers=numThreads) as pool:
success = all(pool.map(perform, ips))
Source: https://docs.python.org/3/library/concurrent.futures.html
On that page you find an example even better tailored to your application: https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor-example
from threading import Thread
th = Thread(target=self.fill_imdb, args=(movies_info_part, "thread " + str(count)))
th.start()
fill_imdb is my method

Preforming the same class method on multiple instances simultaneously

from selenium import webdriver
from selenium.webdriver.common.by import By
from multiprocessing import Process
from multiprocessing import Pool
import time
import json
def json_read(file):
with open(file, "r") as f:
return json.loads(f.read())
class item:
def __init__(self, name, level, location, guild, unit_price, volume, total_cost, last_seen):
self.name = name
self.level = level
self.location = location
self.guild = guild
self.unit_price = unit_price
self.volume = volume
self.total_cost = total_cost
self.last_seen = last_seen
class web_page_elements:
def __init__(self, item=None, max_price=None, depth=None):
self.items = []
self.depth = depth
self.max_price = max_price
self.url = "https://eu.tamrieltradecentre.com/pc/Trade/"
self.item = item
self.current_page = 1
self.op = op = webdriver.ChromeOptions()
self.op.add_argument('headless')
self.driver = webdriver.Chrome('chromedriver.exe') # options=op
def search(self):
self.driver.get(self.url)
print("on")
input_field = self.driver.find_element(By.XPATH, "/html/body/div[2]/table/tbody/tr[3]/td[2]/section/div/div[3]/div/form/div[2]/div[2]/div[1]/input[2]")
input_field.send_keys(self.item)
self.driver.find_element(By.XPATH, "/html/body/div[2]/table/tbody/tr[3]/td[2]/section/div/div[3]/div/form/div[2]/div[2]/div[1]/span/button").click()
self.driver.switch_to.window(self.driver.window_handles[-1])
def data_parse(self, data):
raw_data = data.text.split('\n')
return item(raw_data[0], raw_data[1], raw_data[3], raw_data[4], float(raw_data[5].replace(',', '')), int(raw_data[7]), raw_data[9].split()[0], raw_data[9].split(' ')[1])
def next_page(self):
if self.current_page == 1:
self.driver.get(self.driver.current_url + f"&page={self.current_page+1}")
self.current_page = self.current_page + 1
else:
self.driver.get(self.driver.current_url.replace(f"&page={self.current_page}", f"&page={self.current_page+1}"))
self.current_page = self.current_page + 1
def data_pull(self):
# gathers tabl row as elements(objects)
rows = self.driver.find_elements(By.XPATH, '//tr[#class="cursor-pointer"]')
data = [self.data_parse(item) for item in rows]
for item in data:
print(item.__dict__)
def quit(self):
self.driver.close()
self.driver.stop_client()
self.driver.quit()
def run(self):
self.search()
self.data_pull()
time.sleep(2)
self.next_page()
self.data_pull()
tasks = [Process(target=obj.run()) for obj in objs]
for task in tasks:
print(task)
task.start()
if __name__ == '__main__':
main()
this is the entire code as essentially it pulls items of a website will all the relevant data an there is multiple items. currently I create an instance for each item inputted in an external file, and then I then wish to take each object in the objs list and call the run class method on each object so that the run simultaneously instead of one at a time for run time purposes however i am unsure that you are able to do this in python
tasks = [Process(target=obj.run()) for obj in objs]
for task in tasks:
print(task)
task.start()
if __name__ == '__main__':
main()
this is currently how i was trying to do it however when i run the code they still appear to run after the one before has completed and i was hoping if anyone has a solution for this.

multiprocessing - processes won't join?

TL;DR - the consumer processes finish but do not join, no errors are raised and the script runs infinitely, stuck in limbo on the join statment?
I am aiming to speed up a data retrieval process, however I do not know how many 'tasks' (pieces of data to retrieve) there might be. So I made a modified version of the poison pill method so that the task recognizes when it is no longer retrieving information, and triggers the poison pill if statement.
I have posted a proof, which is a working example of my poison pill method, and a full script, which as the name implies is the full script. (both should be able to run as is)
proof:
import multiprocessing
class Task:
def __init__(self, number):
self.number = number
def __call__(self):
"""Find officer and company data and combine and save it"""
try:
# 'gather some data!'
self.result = self.number*2
print(self.number)
# 'fake' finding no data
if self.result >= 8:
raise NameError
except NameError:
# become poison pill once latest is done
self.result = None
def output(self):
return self.result
class Consumer(multiprocessing.Process):
"""Handle process and re-queue complete tasks"""
def __init__(self, waiting_queue, complete_queue):
multiprocessing.Process.__init__(self)
self.waiting_queue = waiting_queue
self.complete_queue = complete_queue
def run(self):
"""process tasks until queue is empty"""
proc_name = self.name
while True:
current_task = self.waiting_queue.get()
current_task()
if current_task.output() is None:
print('{}: Exiting, poison pill reached'.format(proc_name))
self.waiting_queue.task_done()
break
self.waiting_queue.task_done()
self.complete_queue.put(current_task)
print('{}: complete'.format(proc_name))
class Shepard:
"""Handle life cycle of Consumers, Queues and Tasks"""
def __init__(self):
pass
def __call__(self, start_point):
# initialize queues
todo = multiprocessing.JoinableQueue()
finished = multiprocessing.JoinableQueue()
# start consumers
num_consumers = multiprocessing.cpu_count() * 2
consumers = [Consumer(todo, finished) for i in range(num_consumers)]
for q in consumers:
q.start()
# decide on (max) end limit (make much longer than suspected amount of data to be gathered
start = int(start_point)
max_record_range = 100
end = start + max_record_range
# Enqueue jobs
for i in range(start, end):
todo.put(Task(i))
print('Processes joining')
# wait for processes to join
for p in consumers:
p.join()
print('Processes joined')
# process results - UNFINISHED
pass
# return results - UNFINISHED
return 'results!'
if __name__ == '__main__':
# load start points:
start_points = {'cat1': 1, 'cat2': 3, 'cat3': 4}
master = Shepard()
cat1 = master(start_points['cat1'])
print('cat1 done')
cat2 = master(start_points['cat2'])
print('cat2 done')
cat3 = master(start_points['cat3'])
So here is the full script:
import time
import requests
import sys
import json
import pandas as pd
import multiprocessing
import queue
class CompaniesHouseRequest:
"""Retreive information from Companies House"""
def __init__(self, company, catagory_url=''):
"""Example URL: '/officers'"""
self.company = str(company)
self.catagory_url = str(catagory_url)
def retrieve(self, key='Rn7RLDV9Tw9v4ShDCotjDtJFBgp1Lr4d-9GRYZMo'):
"""retrieve data from Companies House"""
call = 'https://api.companieshouse.gov.uk/company/' + self.company + self.catagory_url
retrieve_complete = False
while retrieve_complete is False:
resp = requests.get(call, auth=requests.auth.HTTPBasicAuth(key, ''))
code = resp.status_code
if code == 404:
print(resp.status_code)
raise NameError('Company not found')
elif code == 200:
try:
self.data = json.loads(resp.content.decode('UTF8'))
retrieve_complete = True
except json.decoder.JSONDecodeError:
print('Decode Error in Officers!')
else:
print("Error:", sys.exc_info()[0])
print('Retrying')
time.sleep(5)
return self.data
class Company:
"""Retrieve and hold company details"""
def __init__(self, company_number):
self.company_number = company_number
def __call__(self):
"""Create request and process data"""
# make request
req = CompaniesHouseRequest(self.company_number)
data = req.retrieve()
# extract data
try:
line = [self.company_number,
data['company_name'],
data['registered_office_address'].get('premises', ''),
data['registered_office_address'].get('address_line_1', ''),
data['registered_office_address'].get('address_line_2', ''),
data['registered_office_address'].get('country', ''),
data['registered_office_address'].get('locality', ''),
data['registered_office_address'].get('postal_code', ''),
data['registered_office_address'].get('region', '')]
except KeyError:
line = ['' for i in range(0, 9)]
# save as pandas dataframe
return pd.DataFrame([line], columns=['company_number', 'company_name', 'company_address_premises',
'company_address_line_1', 'company_address_line_2',
'company_address_country', 'company_address_locality',
'company_address_postcode', 'company_address_region'])
def name_splitter(name):
split = name.split(', ')
if len(split) > 2:
return [split[2], split[1], split[0]]
else:
return ['', split[1], split[0]]
class Officers:
"""Retrieve and hold officers details"""
def __init__(self, company_number):
self.company_number = company_number
def __call__(self):
"""Create request and process data"""
# make request
req = CompaniesHouseRequest(self.company_number, '/officers')
data = req.retrieve()
# extract data
for officer in data['items']:
if officer['officer_role'] == 'director':
name = name_splitter(officer['name'])
line = [name[0],
name[1],
name[2],
officer.get('occupation'),
officer.get('country_of_residence'),
officer.get('nationality'),
officer.get('appointed_on', ''),
officer['address'].get('premises', ''),
officer['address'].get('address_line_1', ''),
officer['address'].get('address_line_2', ''),
officer['address'].get('country', ''),
officer['address'].get('locality', ''),
officer['address'].get('postal_code', ''),
officer['address'].get('region', '')]
break
director_count = sum(map(lambda x: x['officer_role'] == 'director', data['items']))
if director_count > 1:
line += [True]
elif director_count == 1:
line += [False]
else:
line = ['no directors'] * 3 + [''] * 12
return pd.DataFrame([line], columns=['title', 'first_name', 'surname', 'occupation', 'country_of_residence',
'nationality', 'appointed_on',
'address_premises', 'address_line_1', 'address_line_2',
'address_country', 'address_locality', 'address_postcode',
'address_region', 'multi_director'])
class Task:
def __init__(self, prefix, company_number):
self.prefix = prefix
self.company_number = company_number
def __call__(self):
"""Find officer and company data and combine and save it"""
comp_id = self.prefix + str(self.company_number)
print(comp_id)
try:
# initialise company class
comp = Company(comp_id)
# initialise officer class
off = Officers(comp_id)
# retrieve and concatonate
self.result = pd.concat([comp(), off()], axis=1)
except NameError:
# become poison pill once latest is done
self.result = None
def output(self):
return self.result
class Consumer(multiprocessing.Process):
"""Handle process and re-queue complete tasks"""
def __init__(self, waiting_queue, complete_queue):
multiprocessing.Process.__init__(self)
self.waiting_queue = waiting_queue
self.complete_queue = complete_queue
def run(self):
"""process tasks until queue is empty"""
proc_name = self.name
while True:
current_task = self.waiting_queue.get()
current_task()
if current_task.output() is None:
print('{}: Exiting, poison pill reached'.format(proc_name))
self.waiting_queue.task_done()
break
self.waiting_queue.task_done()
self.complete_queue.put(current_task)
print('{}: complete'.format(proc_name))
class Shepard:
"""Handle life of Consumers, Queues and Tasks"""
def __init__(self):
pass
def __call__(self, prefix, start_point):
# initialize queues
todo = multiprocessing.JoinableQueue()
finished = multiprocessing.JoinableQueue()
# start consumers
num_consumers = multiprocessing.cpu_count() * 2
consumers = [Consumer(todo, finished) for i in range(num_consumers)]
for q in consumers:
q.start()
# decide on (max) end limit
start = int(start_point)
max_record_range = 1000
end = start + max_record_range
# Enqueue jobs
for i in range(start, end):
todo.put(Task(prefix, i))
print('Processes joining')
# wait for processes to join
for p in consumers:
p.join()
print('Processes joined')
# process results - UNFINISHED
pass
# return results - UNFINISHED
return 'results!'
if __name__ == '__main__':
# paths to data
data_directory = r'C:\Users\hdewinton\OneDrive - Advanced Payment Solutions\Python\Corporate DM\data'
base = r'\base'
# load start points:
init = {"England": 10926071, "Scotland": 574309, "Ireland": 647561}
# gather data for each catagory
master = Shepard()
ireland = master('NI', init['Ireland'])
scotland = master('SC', init['Scotland'])
england = master('', init['England'])
TL;DR - the consequence (getting stuck in limbo while the consumers fail to join) can be fixed by changing this:
finished = multiprocessing.JoinableQueue()
to this:
mananger = multiprocessing.Manager()
finished = mananger.Queue()
Details - "When an object is put on a queue, the object is pickled and a background thread later flushes the pickled data to an underlying pipe. This has some consequences which are a little surprising, but should not cause any practical difficulties – if they really bother you then you can instead use a queue created with a manager." from the documentation
The second queue, of finished items, triggers one of the aforementioned surprising consquences if a certain number of tasks are added to it. Below the limit there are no problems and above the limit the consequence occurs. This does not occur in the dummy because the second queue, while present, is not used. The limit depends on the size and complexity of the Task objects, so I recon this has something to do with the flushing of pickled data only occurring after a certain volume of data is reached - the volume of data triggers this consequence
Addendum - Another error also appears once the fix has been implemented: a pipe error occurs as the consumers of the todo queue are terminated before the queue
is empty leaving the pipe within the queue object with no connection object to send data to. This triggers a WinError 232. Not to worry though, the pipe error can be fixed by emptying the queue before exiting the consumers.
Simply add this to the consumers class run method:
while not self.waiting_queue.empty():
try:
self.waiting_queue.get(timeout=0.001)
except:
pass
self.waiting_queue.close()
this removes every element from the queue, make sure its after the main while loop and the pipe error should not occur because the consumers will empty the will queue before terminating.

Python multiprocessing RemoteManager under a multiprocessing.Process

I'm trying to start a data queue server under a managing process (so that it can later be turned into a service), and while the data queue server function works fine in the main process, it does not work in a process created using multiprocessing.Process.
The dataQueueServer and dataQueueClient code is based on the code from the multiprocessing module documentation here.
When run on its own, dataQueueServer works well. However, when run using a multiprocessing.Process's start() in mpquueue, it doesn't work (when tested with the client). I am using the dataQueueClient without changes to test both cases.
The code does reach the serve_forever in both cases, so I think the server is working, but something is blocking it from communicating back to the client in the mpqueue case.
I have placed the loop that runs the serve_forever() part under a thread, so that it can be stoppable.
Here is the code:
mpqueue # this is the "manager" process trying to spawn the server in a child process
import time
import multiprocessing
import threading
import dataQueueServer
class Printer():
def __init__(self):
self.lock = threading.Lock()
def tsprint(self, text):
with self.lock:
print text
class QueueServer(multiprocessing.Process):
def __init__(self, name = '', printer = None):
multiprocessing.Process.__init__(self)
self.name = name
self.printer = printer
self.ml = dataQueueServer.MainLoop(name = 'ml', printer = self.printer)
def run(self):
self.printer.tsprint(self.ml)
self.ml.start()
def stop(self):
self.ml.stop()
if __name__ == '__main__':
printer = Printer()
qs = QueueServer(name = 'QueueServer', printer = printer)
printer.tsprint(qs)
printer.tsprint('starting')
qs.start()
printer.tsprint('started.')
printer.tsprint('Press Ctrl-C to quit')
try:
while True:
time.sleep(60)
except KeyboardInterrupt:
printer.tsprint('\nTrying to exit cleanly...')
qs.stop()
printer.tsprint('stopped')
dataQueueServer
import time
import threading
from multiprocessing.managers import BaseManager
from multiprocessing import Queue
HOST = ''
PORT = 50010
AUTHKEY = 'authkey'
## Define some helper functions for use by the main process loop
class Printer():
def __init__(self):
self.lock = threading.Lock()
def tsprint(self, text):
with self.lock:
print text
class QueueManager(BaseManager):
pass
class MainLoop(threading.Thread):
"""A thread based loop manager, allowing termination signals to be sent
to the thread"""
def __init__(self, name = '', printer = None):
threading.Thread.__init__(self)
self._stopEvent = threading.Event()
self.daemon = True
self.name = name
if printer is None:
self.printer = Printer()
else:
self.printer = printer
## create the queue
self.queue = Queue()
## Add a function to the handler to return the queue to clients
self.QM = QueueManager
self.QM.register('get_queue', callable=lambda:self.queue)
self.queue_manager = self.QM(address=(HOST, PORT), authkey=AUTHKEY)
self.queue_server = self.queue_manager.get_server()
def __del__(self):
self.printer.tsprint( 'closing...')
def run(self):
self.printer.tsprint( '{}: started serving'.format(self.name))
self.queue_server.serve_forever()
def stop(self):
self.printer.tsprint ('{}: stopping'.format(self.name))
self._stopEvent.set()
def stopped(self):
return self._stopEvent.isSet()
def start():
printer = Printer()
ml = MainLoop(name = 'ml', printer = printer)
ml.start()
return ml
def stop(ml):
ml.stop()
if __name__ == '__main__':
ml = start()
raw_input("\nhit return to stop")
stop(ml)
And a client:
dataQueueClient
import datetime
from multiprocessing.managers import BaseManager
n = 0
N = 10**n
HOST = ''
PORT = 50010
AUTHKEY = 'authkey'
def now():
return datetime.datetime.now()
def gen(n, func, *args, **kwargs):
k = 0
while k < n:
yield func(*args, **kwargs)
k += 1
class QueueManager(BaseManager):
pass
QueueManager.register('get_queue')
m = QueueManager(address=(HOST, PORT), authkey=AUTHKEY)
m.connect()
queue = m.get_queue()
def load(msg, q):
return q.put(msg)
def get(q):
return q.get()
lgen = gen(N, load, msg = 'hello', q = queue)
t0 = now()
while True:
try:
lgen.next()
except StopIteration:
break
t1 = now()
print 'loaded %d items in ' % N, t1-t0
t0 = now()
while queue.qsize() > 0:
queue.get()
t1 = now()
print 'got %d items in ' % N, t1-t0
So it seems like the solution is simple enough: Don't use serve_forever(), and use manager.start() instead.
According to Eli Bendersky, the BaseManager (and it's extended version SyncManager) already spawns the server in a new process (and looking at the multiprocessing.managers code confirms this). The problem I have been experiencing stems from the form used in the example, in which the server is started under the main process.
I still don't understand why the current example doesn't work when run under a child process, but that's no longer an issue.
Here's the working (and much simplified from OP) code to manage multiple queue servers:
Server:
from multiprocessing import Queue
from multiprocessing.managers import SyncManager
HOST = ''
PORT0 = 5011
PORT1 = 5012
PORT2 = 5013
AUTHKEY = 'authkey'
name0 = 'qm0'
name1 = 'qm1'
name2 = 'qm2'
description = 'Queue Server'
def CreateQueueServer(HOST, PORT, AUTHKEY, name = None, description = None):
name = name
description = description
q = Queue()
class QueueManager(SyncManager):
pass
QueueManager.register('get_queue', callable = lambda: q)
QueueManager.register('get_name', callable = name)
QueueManager.register('get_description', callable = description)
manager = QueueManager(address = (HOST, PORT), authkey = AUTHKEY)
manager.start() # This actually starts the server
return manager
# Start three queue servers
qm0 = CreateQueueServer(HOST, PORT0, AUTHKEY, name0, description)
qm1 = CreateQueueServer(HOST, PORT1, AUTHKEY, name1, description)
qm2 = CreateQueueServer(HOST, PORT2, AUTHKEY, name2, description)
raw_input("return to end")
Client:
from multiprocessing.managers import SyncManager
HOST = ''
PORT0 = 5011
PORT1 = 5012
PORT2 = 5013
AUTHKEY = 'authkey'
def QueueServerClient(HOST, PORT, AUTHKEY):
class QueueManager(SyncManager):
pass
QueueManager.register('get_queue')
QueueManager.register('get_name')
QueueManager.register('get_description')
manager = QueueManager(address = (HOST, PORT), authkey = AUTHKEY)
manager.connect() # This starts the connected client
return manager
# create three connected managers
qc0 = QueueServerClient(HOST, PORT0, AUTHKEY)
qc1 = QueueServerClient(HOST, PORT1, AUTHKEY)
qc2 = QueueServerClient(HOST, PORT2, AUTHKEY)
# Get the queue objects from the clients
q0 = qc0.get_queue()
q1 = qc1.get_queue()
q2 = qc2.get_queue()
# put stuff in the queues
q0.put('some stuff')
q1.put('other stuff')
q2.put({1:123, 2:'abc'})
# check their sizes
print 'q0 size', q0.qsize()
print 'q1 size', q1.qsize()
print 'q2 size', q2.qsize()
# pull some stuff and print it
print q0.get()
print q1.get()
print q2.get()
Adding an additional server to share a dictionary with the information of the running queue servers so that consumers can easily tell what's available where is easy enough using that model. One thing to note, though, is that the shared dictionary requires slightly different syntax than a normal dictionary: dictionary[0] = something will not work. You need to use dictionary.update([(key, value), (otherkey, othervalue)]) and dictionary.get(key) syntax, which propagates across to all other clients connected to this dictionary..

Calling dbus-python inside a thread

I'm getting segfaults when calling a dbus method inside a thread. This is my scenario: I have a program Service1 that exposes a method test. A second program Service2 exposes a method expose. As this method does some serious numerical computation, I pass some params from expose to a running thread reader. This thread, in turn, calls the method test of Service1 when it ends its work. I'm geting segfaults in last dbus call.
The code:
# Service1.py
class Service1(Object):
def __init__(self, bus):
name = BusName('com.example.Service1', bus)
path = '/'
super(Service1, self).__init__(name, path)
#method(dbus_interface='com.example.Service1',
in_signature='s', out_signature='s')
def test(self, test):
print 'test being called'
return test
dbus_loop = DBusGMainLoop()
dsession = SessionBus(mainloop=dbus_loop)
loop = gobject.MainLoop()
gobject.threads_init()
im = Service1(dsession)
loop.run()
# Service2.py
dbus_loop = DBusGMainLoop()
dsession = SessionBus(mainloop=dbus_loop)
class Service2(Object):
def __init__(self, bus):
name = BusName('com.example.Service2', bus)
super(Service2, self).__init__(name, '/')
self.queue = Queue()
self.db = bus.get_object('com.example.Service1', '/')
self.dbi = dbus.Interface(self.db, dbus_interface='com.example.Service1')
#method(dbus_interface='com.example.Service2',
in_signature='', out_signature='')
def expose(self):
print 'calling expose'
self.queue.put(('params',))
def reader(self):
while True:
val = self.queue.get()
dd = self.dbi.test('test')
print dd
self.queue.task_done()
gobject.threads_init()
loop = gobject.MainLoop()
im = Service2(dsession)
reader = threading.Thread(target=im.reader)
reader.start()
loop.run()
To test, run Service1.py, Service2.py and later this snippet:
dbus_loop = DBusGMainLoop()
session = SessionBus(mainloop=dbus_loop)
proxy = session.get_object('com.example.Service2', '/')
test_i = dbus.Interface(proxy, dbus_interface='com.example.Service2')
test_i.expose()
Service2.py should crash after running this code a few times. But why?
gobject.threads_init() is not enough, you need to call dbus.mainloop.glib.threads_init() to make dbus-glib thread safe.
In Service1.py, Try calling gobject.threads_init() before assigning dbus_loop to DBusGMainLoop().

Categories