sqlalchemy + mysql deadlocks - python

I insert rows with random data into mysql database. After inserting some rows, greenlet hangs on connecting. Statistics printing greenlet keeps running
This happens with any number of workers (including one), both with mysql-connector and mysqldb drivers.
sqlite works fine.
This has no effect (as i understand it is already fixed in new gevent)
def patch():
from gevent import monkey
monkey.patch_all()
# fix https://bugs.launchpad.net/myconnpy/+bug/712037
from mysql.connector.connection import MySQLConnection
MySQLConnection.get_characterset_info = MySQLConnection.get_charset
patch()
from sqlalchemy import MetaData, Table, Column, Integer, String, create_engine
from gevent import spawn, sleep
from random import randrange
from time import time
class Stats(object):
def __init__(self):
self.inserts, self.faults = 0, 0
def run(self):
while True:
sleep(1)
print "%d %d %d" % (time(), self.inserts, self.faults)
self.inserts, self.faults = 0, 0
class Victim(object):
metadata = MetaData()
Entry = Table(
'entry', metadata,
Column('id', Integer, primary_key=True),
Column('junk', String(128), unique=True)
)
def __init__(self, cs, stats):
self.e = create_engine(cs)
self.metadata.drop_all(self.e)
self.metadata.create_all(self.e)
self.stats = stats
def add(self, junk, i):
print i, 'connecting'
c = self.e.connect()
print i, 'connected'
t = c.begin()
try:
q = self.Entry.insert().values(junk=junk)
c.execute(q)
t.commit()
self.stats.inserts += 1
except Exception as e:
print i, 'EXCEPTION: ', e
t.rollback()
self.stats.faults += 1
print i, 'done'
def flood(victim, i):
a, z, l = ord('a'), ord('z')+1, 100
while True:
victim.add(''.join(chr(randrange(a, z)) for _ in xrange(l)), i)
sleep(0)
def main(n_threads, cs):
stats = Stats()
victim = Victim(cs, stats)
threads = [ spawn(flood, victim, i) for i in xrange(n_threads) ]
threads.append(spawn(stats.run))
[t.join() for t in threads]
#main(2, 'mysql://root:root#localhost/junk')
main(1, 'mysql+mysqlconnector://root:root#localhost/junk')
What is happening?
Retested, error persists without gevent, probably something with server configuration

I just forgot to release used connections, so connections where never checked-in back into pool.
...
def add(self, junk, i):
print i, 'connecting'
c = self.e.connect()
...
try:
...
except Exception as e:
...
finally:
c.close() # <-- this returns conenction into pool
print i, 'done'

Related

Python Postgres psycopg2 ThreadedConnectionPool exhausted

I have looked into several 'too many clients' related topic here but still can't solve my problem, so I have to ask this again, for me specific case.
Basically, I set up my local Postgres server and need to do tens of thousands of queries, so I used the Python psycopg2package. Here are my codes:
import psycopg2
import pandas as pd
import numpy as np
from flashtext import KeywordProcessor
from psycopg2.pool import ThreadedConnectionPool
from concurrent.futures import ThreadPoolExecutor
df = pd.DataFrame({'S':['California', 'Ohio', 'Texas'], 'T':['Dispatcher', 'Zookeeper', 'Mechanics']})
# df = pd.concat([df]*10000) # repeat df 10000 times
DSN = "postgresql://User:password#localhost/db"
tcp = ThreadedConnectionPool(1, 800, DSN)
def do_one_query(inputS, inputT):
conn = tcp.getconn()
c = conn.cursor()
q = r"SELECT * from eridata where "State" = 'California' and "Title" = 'Dispatcher' limit 1;"
c.execute(q)
all_results = c.fetchall()
for row in all_results:
return row
tcp.putconn(conn, close=True)
cnt=0
for idx, row in df.iterrows():
cnt+=1
with ThreadPoolExecutor(max_workers=1) as pool:
ret = pool.submit(do_one_query, row["S"], row["T"])
print ret.result()
print cnt
The code runs well with a small df. If I repeat df by 10000 times, I got error message saying connection pool exhausted
. I though the connections I used have been closed by this line:
tcp.putconn(conn, close=True)
But I guess actually they are not closed? How can I get around this issue?
I've struggled to find really detailed information on how the ThreadedConnectionPool works. https://bbengfort.github.io/observations/2017/12/06/psycopg2-transactions.html ain't bad, but it turns out that its claim that getconn blocks until a connection becomes available is incorrect. Checking the code, all ThreadedConnectionPool adds is a lock around the AbstractConnectionPool methods to prevent race conditions. If more than maxconn connections are attempted used at any point, the connection pool exhausted PoolError will be raised.
If you want something a bit simpler than the accepted answer, further wrapping the methods in a Semaphore providing the blocking until a connection becomes available should do the trick:
from psycopg2.pool import ThreadedConnectionPool as _ThreadedConnectionPool
from threading import Semaphore
class ThreadedConnectionPool(_ThreadedConnectionPool):
def __init__(self, minconn, maxconn, *args, **kwargs):
self._semaphore = Semaphore(maxconn)
super().__init__(minconn, maxconn, *args, **kwargs)
def getconn(self, *args, **kwargs):
self._semaphore.acquire()
try:
return super().getconn(*args, **kwargs)
except:
self._semaphore.release()
raise
def putconn(self, *args, **kwargs):
try:
super().putconn(*args, **kwargs)
finally:
self._semaphore.release()
# closeall is inherited as is. This means the Semaphore does
# not get reset, but neither do the core structures for
# maintaining the pool in the original ThreadedConnectionPool
# so a closed pool is not intended to be reused once closed.
Note that ConnectionPools, both standard and threaded, only come with the three putconn, getconn and closeall methods, and nothing fancy like context management. So the above should cover all existing functionality.
You need to use a queue on top of your pool.
Something like the following should work:
import gevent, sys, random, psycopg2, logging
from contextlib import contextmanager
from gevent.queue import Queue
from gevent.socket import wait_read, wait_write
from psycopg2.pool import ThreadedConnectionPool
from psycopg2 import extensions, OperationalError
import sys
logger = logging.getLogger(__name__)
poolsize = 100 #number of max connections
pdsn = '' # put your dsn here
if sys.version_info[0] >= 3:
integer_types = (int,)
else:
import __builtin__
integer_types = (int, __builtin__.long)
class ConnectorError(Exception):
""" This is a base class for all CONNECTOR related exceptions """
pass
#simplified calls etc. db.fetchall(SQL, arg1, arg2...)
def cursor(): return Pcursor()
def fetchone(PSQL, *args): return Pcursor().fetchone(PSQL, *args)
def fetchall(PSQL, *args): return Pcursor().fetchall(PSQL, *args)
def execute(PSQL, *args): return Pcursor().execute(PSQL, *args)
#singleton connection pool, gets reset if a connection is bad or drops
_pgpool = None
def pgpool():
global _pgpool
if not _pgpool:
try:
_pgpool = PostgresConnectionPool(maxsize=poolsize)
except psycopg2.OperationalError as exc:
_pgpool = None
return _pgpool
class Pcursor(object):
def __init__(self, **kwargs):
#in case of a lost connection lets sit and wait till it's online
global _pgpool
if not _pgpool:
while not _pgpool:
try:
pgpool()
except:
logger.debug('Attempting Connection To Postgres...')
gevent.sleep(1)
def fetchone(self, PSQL, *args):
with _pgpool.cursor() as cursor:
try:
cursor.execute(PSQL, args)
except TypeError:
cursor.execute(PSQL, args[0])
except Exception as exc:
print(sys._getframe().f_back.f_code)
print(sys._getframe().f_back.f_code.co_name)
logger.warning(str(exc))
logger.debug(cursor.query)
return cursor.fetchone()
def fetchall(self, PSQL, *args):
with _pgpool.cursor() as cursor:
try:
cursor.execute(PSQL, args)
except TypeError:
cursor.execute(PSQL, args[0])
except Exception as exc:
print(sys._getframe().f_back.f_code)
print(sys._getframe().f_back.f_code.co_name)
logger.warning(str(exc))
logger.debug(cursor.query)
return cursor.fetchall()
def execute(self, PSQL, *args):
with _pgpool.cursor() as cursor:
try:
cursor.execute(PSQL, args)
except TypeError:
cursor.execute(PSQL, args[0])
except Exception as exc:
print(sys._getframe().f_back.f_code)
print(sys._getframe().f_back.f_code.co_name)
logger.warning(str(exc))
finally:
logger.debug(cursor.query)
return cursor.query
def fetchmany(self, PSQL, *args):
with _pgpool.cursor() as cursor:
try:
cursor.execute(PSQL, args)
except TypeError:
cursor.execute(PSQL, args[0])
while 1:
items = cursor.fetchmany()
if not items:
break
for item in items:
yield item
class AbstractDatabaseConnectionPool(object):
def __init__(self, maxsize=poolsize):
if not isinstance(maxsize, integer_types):
raise TypeError('Expected integer, got %r' % (maxsize, ))
self.maxsize = maxsize
self.pool = Queue()
self.size = 0
def create_connection(self):
#overridden by PostgresConnectionPool
raise NotImplementedError()
def get(self):
pool = self.pool
if self.size >= self.maxsize or pool.qsize():
return pool.get()
self.size += 1
try:
new_item = self.create_connection()
except:
self.size -= 1
raise
return new_item
def put(self, item):
self.pool.put(item)
def closeall(self):
while not self.pool.empty():
conn = self.pool.get_nowait()
try:
conn.close()
except Exception:
pass
#contextmanager
def connection(self, isolation_level=None):
conn = self.get()
try:
if isolation_level is not None:
if conn.isolation_level == isolation_level:
isolation_level = None
else:
conn.set_isolation_level(isolation_level)
yield conn
except:
if conn.closed:
conn = None
self.closeall()
raise
else:
if conn.closed:
raise OperationalError("Cannot commit because connection was closed: %r" % (conn, ))
finally:
if conn is not None and not conn.closed:
if isolation_level is not None:
conn.set_isolation_level(isolation_level)
self.put(conn)
#contextmanager
def cursor(self, *args, **kwargs):
isolation_level = kwargs.pop('isolation_level', None)
with self.connection(isolation_level) as conn:
try:
yield conn.cursor(*args, **kwargs)
except:
global _pgpool
_pgpool = None
del(self)
class PostgresConnectionPool(AbstractDatabaseConnectionPool):
def __init__(self,**kwargs):
try:
self.pconnect = ThreadedConnectionPool(1, poolsize, dsn=pdsn)
except:
global _pgpool
_pgpool = None
raise ConnectorError('Database Connection Failed')
maxsize = kwargs.pop('maxsize', None)
self.kwargs = kwargs
AbstractDatabaseConnectionPool.__init__(self, maxsize)
def create_connection(self):
self.conn = self.pconnect.getconn()
self.conn.autocommit = True
return self.conn
def gevent_wait_callback(conn, timeout=None):
"""A wait callback useful to allow gevent to work with Psycopg."""
while 1:
state = conn.poll()
if state == extensions.POLL_OK:
break
elif state == extensions.POLL_READ:
wait_read(conn.fileno(), timeout=timeout)
elif state == extensions.POLL_WRITE:
wait_write(conn.fileno(), timeout=timeout)
else:
raise ConnectorError("Bad result from poll: %r" % state)
extensions.set_wait_callback(gevent_wait_callback)
Then you can call your connection via this:
import db
db.Pcursor().execute(PSQL, arg1, arg2, arg3)
Basically I borrowed the gevent example of async postgres and modified it to support threadpooling via pyscopg2.
https://github.com/gevent/gevent/blob/master/examples/psycopg2_pool.py
I added what psycogreen does inside the module, so all you need to do is import and call the class. Each call to the class stacks a new query on the queue, but only uses the pool at a certain size. This way you don't run out of connections. This is essentially similar to what PGBouncer does, which I think would also eliminate your problem.
https://pgbouncer.github.io/
Your problem here is, that you actually do not return the connection to the pool, but close it forever with
tcp.putconn(conn, close=True)
See the documentation here http://initd.org/psycopg/docs/pool.html
If close is True, discard the connection from the pool.
So, if you put 800 connections into your pool, after 801 loops you will get the "exhausted error" because your connection pool size is zero.
I think the reason why you get PoolError("exhausted connections") maybe you return before close connection when all_results is not None. so, connection pool exhausted
def do_one_query(inputS, inputT):
...
for row in all_results:
return row <---- return row before putconn when all_results is not None,
tcp.putconn(conn, close=True)
for idx, row in df.iterrows():
cnt+=1
with ThreadPoolExecutor(max_workers=1) as pool:
ret = pool.submit(do_one_query, row["S"], row["T"])
print ret.result()
print cnt
I make a ugly implementation with when exhausted or connection lost, try reconnect to get new conn, like below
class PostgresConnectionPool:
def __init__(self, minconn, maxconn, *args, **kwargs):
self.pool = ThreadedConnectionPool(minconn=minconn, maxconn=maxconn, *args, **kwargs)
def get_conn(self):
try:
# check if connection lost or pool exhausted
con = self.pool.getconn()
cur = con.cursor()
cur.execute("select 1;")
except (OperationalError, PoolError) as oe:
print(f"get pg connection with err:{oe}, reconnect")
# reconnect
key = str(uuid.uuid4())
con = self.pool._connect(key)
return con

multiprocessing - processes won't join?

TL;DR - the consumer processes finish but do not join, no errors are raised and the script runs infinitely, stuck in limbo on the join statment?
I am aiming to speed up a data retrieval process, however I do not know how many 'tasks' (pieces of data to retrieve) there might be. So I made a modified version of the poison pill method so that the task recognizes when it is no longer retrieving information, and triggers the poison pill if statement.
I have posted a proof, which is a working example of my poison pill method, and a full script, which as the name implies is the full script. (both should be able to run as is)
proof:
import multiprocessing
class Task:
def __init__(self, number):
self.number = number
def __call__(self):
"""Find officer and company data and combine and save it"""
try:
# 'gather some data!'
self.result = self.number*2
print(self.number)
# 'fake' finding no data
if self.result >= 8:
raise NameError
except NameError:
# become poison pill once latest is done
self.result = None
def output(self):
return self.result
class Consumer(multiprocessing.Process):
"""Handle process and re-queue complete tasks"""
def __init__(self, waiting_queue, complete_queue):
multiprocessing.Process.__init__(self)
self.waiting_queue = waiting_queue
self.complete_queue = complete_queue
def run(self):
"""process tasks until queue is empty"""
proc_name = self.name
while True:
current_task = self.waiting_queue.get()
current_task()
if current_task.output() is None:
print('{}: Exiting, poison pill reached'.format(proc_name))
self.waiting_queue.task_done()
break
self.waiting_queue.task_done()
self.complete_queue.put(current_task)
print('{}: complete'.format(proc_name))
class Shepard:
"""Handle life cycle of Consumers, Queues and Tasks"""
def __init__(self):
pass
def __call__(self, start_point):
# initialize queues
todo = multiprocessing.JoinableQueue()
finished = multiprocessing.JoinableQueue()
# start consumers
num_consumers = multiprocessing.cpu_count() * 2
consumers = [Consumer(todo, finished) for i in range(num_consumers)]
for q in consumers:
q.start()
# decide on (max) end limit (make much longer than suspected amount of data to be gathered
start = int(start_point)
max_record_range = 100
end = start + max_record_range
# Enqueue jobs
for i in range(start, end):
todo.put(Task(i))
print('Processes joining')
# wait for processes to join
for p in consumers:
p.join()
print('Processes joined')
# process results - UNFINISHED
pass
# return results - UNFINISHED
return 'results!'
if __name__ == '__main__':
# load start points:
start_points = {'cat1': 1, 'cat2': 3, 'cat3': 4}
master = Shepard()
cat1 = master(start_points['cat1'])
print('cat1 done')
cat2 = master(start_points['cat2'])
print('cat2 done')
cat3 = master(start_points['cat3'])
So here is the full script:
import time
import requests
import sys
import json
import pandas as pd
import multiprocessing
import queue
class CompaniesHouseRequest:
"""Retreive information from Companies House"""
def __init__(self, company, catagory_url=''):
"""Example URL: '/officers'"""
self.company = str(company)
self.catagory_url = str(catagory_url)
def retrieve(self, key='Rn7RLDV9Tw9v4ShDCotjDtJFBgp1Lr4d-9GRYZMo'):
"""retrieve data from Companies House"""
call = 'https://api.companieshouse.gov.uk/company/' + self.company + self.catagory_url
retrieve_complete = False
while retrieve_complete is False:
resp = requests.get(call, auth=requests.auth.HTTPBasicAuth(key, ''))
code = resp.status_code
if code == 404:
print(resp.status_code)
raise NameError('Company not found')
elif code == 200:
try:
self.data = json.loads(resp.content.decode('UTF8'))
retrieve_complete = True
except json.decoder.JSONDecodeError:
print('Decode Error in Officers!')
else:
print("Error:", sys.exc_info()[0])
print('Retrying')
time.sleep(5)
return self.data
class Company:
"""Retrieve and hold company details"""
def __init__(self, company_number):
self.company_number = company_number
def __call__(self):
"""Create request and process data"""
# make request
req = CompaniesHouseRequest(self.company_number)
data = req.retrieve()
# extract data
try:
line = [self.company_number,
data['company_name'],
data['registered_office_address'].get('premises', ''),
data['registered_office_address'].get('address_line_1', ''),
data['registered_office_address'].get('address_line_2', ''),
data['registered_office_address'].get('country', ''),
data['registered_office_address'].get('locality', ''),
data['registered_office_address'].get('postal_code', ''),
data['registered_office_address'].get('region', '')]
except KeyError:
line = ['' for i in range(0, 9)]
# save as pandas dataframe
return pd.DataFrame([line], columns=['company_number', 'company_name', 'company_address_premises',
'company_address_line_1', 'company_address_line_2',
'company_address_country', 'company_address_locality',
'company_address_postcode', 'company_address_region'])
def name_splitter(name):
split = name.split(', ')
if len(split) > 2:
return [split[2], split[1], split[0]]
else:
return ['', split[1], split[0]]
class Officers:
"""Retrieve and hold officers details"""
def __init__(self, company_number):
self.company_number = company_number
def __call__(self):
"""Create request and process data"""
# make request
req = CompaniesHouseRequest(self.company_number, '/officers')
data = req.retrieve()
# extract data
for officer in data['items']:
if officer['officer_role'] == 'director':
name = name_splitter(officer['name'])
line = [name[0],
name[1],
name[2],
officer.get('occupation'),
officer.get('country_of_residence'),
officer.get('nationality'),
officer.get('appointed_on', ''),
officer['address'].get('premises', ''),
officer['address'].get('address_line_1', ''),
officer['address'].get('address_line_2', ''),
officer['address'].get('country', ''),
officer['address'].get('locality', ''),
officer['address'].get('postal_code', ''),
officer['address'].get('region', '')]
break
director_count = sum(map(lambda x: x['officer_role'] == 'director', data['items']))
if director_count > 1:
line += [True]
elif director_count == 1:
line += [False]
else:
line = ['no directors'] * 3 + [''] * 12
return pd.DataFrame([line], columns=['title', 'first_name', 'surname', 'occupation', 'country_of_residence',
'nationality', 'appointed_on',
'address_premises', 'address_line_1', 'address_line_2',
'address_country', 'address_locality', 'address_postcode',
'address_region', 'multi_director'])
class Task:
def __init__(self, prefix, company_number):
self.prefix = prefix
self.company_number = company_number
def __call__(self):
"""Find officer and company data and combine and save it"""
comp_id = self.prefix + str(self.company_number)
print(comp_id)
try:
# initialise company class
comp = Company(comp_id)
# initialise officer class
off = Officers(comp_id)
# retrieve and concatonate
self.result = pd.concat([comp(), off()], axis=1)
except NameError:
# become poison pill once latest is done
self.result = None
def output(self):
return self.result
class Consumer(multiprocessing.Process):
"""Handle process and re-queue complete tasks"""
def __init__(self, waiting_queue, complete_queue):
multiprocessing.Process.__init__(self)
self.waiting_queue = waiting_queue
self.complete_queue = complete_queue
def run(self):
"""process tasks until queue is empty"""
proc_name = self.name
while True:
current_task = self.waiting_queue.get()
current_task()
if current_task.output() is None:
print('{}: Exiting, poison pill reached'.format(proc_name))
self.waiting_queue.task_done()
break
self.waiting_queue.task_done()
self.complete_queue.put(current_task)
print('{}: complete'.format(proc_name))
class Shepard:
"""Handle life of Consumers, Queues and Tasks"""
def __init__(self):
pass
def __call__(self, prefix, start_point):
# initialize queues
todo = multiprocessing.JoinableQueue()
finished = multiprocessing.JoinableQueue()
# start consumers
num_consumers = multiprocessing.cpu_count() * 2
consumers = [Consumer(todo, finished) for i in range(num_consumers)]
for q in consumers:
q.start()
# decide on (max) end limit
start = int(start_point)
max_record_range = 1000
end = start + max_record_range
# Enqueue jobs
for i in range(start, end):
todo.put(Task(prefix, i))
print('Processes joining')
# wait for processes to join
for p in consumers:
p.join()
print('Processes joined')
# process results - UNFINISHED
pass
# return results - UNFINISHED
return 'results!'
if __name__ == '__main__':
# paths to data
data_directory = r'C:\Users\hdewinton\OneDrive - Advanced Payment Solutions\Python\Corporate DM\data'
base = r'\base'
# load start points:
init = {"England": 10926071, "Scotland": 574309, "Ireland": 647561}
# gather data for each catagory
master = Shepard()
ireland = master('NI', init['Ireland'])
scotland = master('SC', init['Scotland'])
england = master('', init['England'])
TL;DR - the consequence (getting stuck in limbo while the consumers fail to join) can be fixed by changing this:
finished = multiprocessing.JoinableQueue()
to this:
mananger = multiprocessing.Manager()
finished = mananger.Queue()
Details - "When an object is put on a queue, the object is pickled and a background thread later flushes the pickled data to an underlying pipe. This has some consequences which are a little surprising, but should not cause any practical difficulties – if they really bother you then you can instead use a queue created with a manager." from the documentation
The second queue, of finished items, triggers one of the aforementioned surprising consquences if a certain number of tasks are added to it. Below the limit there are no problems and above the limit the consequence occurs. This does not occur in the dummy because the second queue, while present, is not used. The limit depends on the size and complexity of the Task objects, so I recon this has something to do with the flushing of pickled data only occurring after a certain volume of data is reached - the volume of data triggers this consequence
Addendum - Another error also appears once the fix has been implemented: a pipe error occurs as the consumers of the todo queue are terminated before the queue
is empty leaving the pipe within the queue object with no connection object to send data to. This triggers a WinError 232. Not to worry though, the pipe error can be fixed by emptying the queue before exiting the consumers.
Simply add this to the consumers class run method:
while not self.waiting_queue.empty():
try:
self.waiting_queue.get(timeout=0.001)
except:
pass
self.waiting_queue.close()
this removes every element from the queue, make sure its after the main while loop and the pipe error should not occur because the consumers will empty the will queue before terminating.

Python 3.6 Object/Class Threading

I'm looking to create a "self contained threaded class" using Python 3.
At a high level, want I would liked to do is to spawn up 50 asynchronous device objects from my "main" class and then just use their methods as needed. This is not difficult when just dealing with objects in synchronous situation but gets cloudy quite quickly as we move to asynchronous processing. The primary idea to keep the threading self contained in the device class so my base (main.py) code stays streamlined/clean and without any of the thread management.
I don't plan on any resource sharing in this case so I think I'm clear of any thread lock issues.
Here is some sample code that I hope someone can provide some hints or samples into making it a self threaded class (meaning I don't want to manage threads at the main.py level):
Sample main.py
from deviceworker import Device
availableworkers = {'USA':'services.groupkt.com', 'IND':'services.groupkt.com'}
Activeworkers = []
for name, ip in availableworkers.items():
Activeworkers.append(Device(name, ip))
for worker in Activeworkers:
worker.checkcountry() # asynchronous call - (we don't want to wait for a response)
# The idea is to keep this code as clean as possible.
Sample Object: deviceworker.py
import urllib.request
import urllib.parse
import json
class Device:
def __init__(self, name, endpoint, preamble = 'state', port = 80 ):
self.name = name
self.connected =False
self.connection = HTTPConnection(endpoint, preamble, port)
self.getStatus()
def getStatus(self, check_for = None):
self.urlresponse = json.loads(self.connection.GET('get/USA/all')) #Use USA just to verify connection
if check_for:
pass
self.connected = True
def checkcountry(self):
print(self.connection.GET('get/%s/all' % self.name))
class HTTPConnection:
def __init__(self, endpoint, preamble = None, port = 80):
if preamble: # specificing a version after the port and before method
self.url = 'http://%s:%s/%s/' % (endpoint, port, preamble)
else:
self.url = 'http://%s:%s/' % (endpoint, port)
print('_init_ url=%s' % self.url)
def GET(self, operation):
#try:
#print('%s%s' % (self.url, operation))
with urllib.request.urlopen('%s%s' % (self.url, operation)) as f:
return f.read().decode('utf-8')
#except Exception as e:
#raise Exception("GET Request Failed")
I've stripped most of the exception handling for simplicity. The sample above should work.
--- UPDATE ---
So I think I've sort of figured it out. Still not getting the parrellism I would expect from the documentation.
import threading
import urllib.request
import urllib.parse
import json
import time
class Device(threading.Thread):
def __init__(self, name, endpoint, preamble = 'state', port = 80 ):
threading.Thread.__init__(self)
self.name = name
self.connected = False
self.connection = HTTPConnection(endpoint, preamble, port)
print('%s: __init__' % self.name)
def run(self):
self.getStatus()
print('%s: hit run()' % self.name)
def getStatus(self):
self.urlresponse = json.loads(self.connection.GET('get/USA/all')) #Use USA just to verify connection
self.connected = True
def checkcountry(self):
if (self.name == 'USA'): self.waittime = 10
else: self.waittime = 0
print('%s: Getting Codes - wait time: %s' % (self.name, self.waittime))
start_time=time.time()
time.sleep(self.waittime)
result =self.connection.GET('get/%s/all' % self.name)
elapsed_time=time.time() - start_time
print('%s: Got Codes - second: %s' % (self.name, elapsed_time))
class HTTPConnection:
def __init__(self, endpoint, preamble = None, port = 80):
if preamble: # specificing a version after the port and before method
self.url = 'http://%s:%s/%s/' % (endpoint, port, preamble)
else:
self.url = 'http://%s:%s/' % (endpoint, port)
def GET(self, operation):
with urllib.request.urlopen('%s%s' % (self.url, operation)) as f:
return f.read().decode('utf-8')
DeviceList = {'USA':'services.groupkt.com', 'IND':'services.groupkt.com'}
ActiveDevices = []
DeviceList = {'USA':'services.groupkt.com', 'IND':'services.groupkt.com'}
ActiveDevices = []
for name, ip in DeviceList.items():
print('main: creating object for: %s' % name)
newDevice = Device(name, ip)
ActiveDevices.append(newDevice)
newDevice.start()
for device in ActiveDevices:
print('main: calling checkcountry() for: %s' % device.name)
device.checkcountry()
Here are the results:
main: creating object for: USA
USA: __init__
main: creating object for: IND
IND: __init__
main: calling checkcountry() for: USA
USA: Getting Codes - wait time: 10
USA: Got Codes - second: 10.167016744613647
main: calling checkcountry() for: IND
IND: Getting Codes - wait time: 0
IND: Got Codes - second: 0.11001110076904297
I by adding in the delay to the USA search I would have expected the IND to finish first but it appears that it serialized.
I'm running this on:
Python 3.6.0 (v3.6.0:41df79263a11, Dec 23 2016, 07:18:10) [MSC v.1900 32 bit (Intel)] on win32
Here is a custom thread sample with locking worked great for me, better than using the event.
Try it in Colab.
import threading,time
i=0
luk=threading.Lock()
global i
global y
global t_num
class special_thread(threading.Thread):
"""This function starts a Thread class"""
def __init__(self, execute,threadID , name, daemon,args=(), repetitive=False,kwargs=None, interval_sec=60 ):
threading.Thread.__init__(self)
self.daemon = daemon
self.stopped = threading.Event()
self.interval_sec = interval_sec
self.execute = execute
self.name = name
if kwargs is None:
kwargs = {}
self.args = args
self.kwargs=kwargs
self.repetitive=repetitive
self.threadID = threadID
print(args)
def stop(self):
self.stopped.set()
self.join()
def run(self):
if self.repetitive:
while not self.stopped.wait(self.interval_sec):
self.execute(*self.args,**self.kwargs)
else:
self.execute(*self.args,**self.kwargs)
def center(t_num):
y=0
luk.acquire()
caller = inspect.getouterframes(inspect.currentframe())[1][3]
print(' {} is aquiring by {} '.format( caller, str(time.ctime())))
y+=t_num
print( "Inside %s()" % caller)
print('thread number is ',t_num,y)
time.sleep(2*t_num)
luk.release()
print(' {} is releasing by {} '.format( caller, str(time.ctime())))
def target_uno():
t_num=1
center(t_num)
def target_dos():
t_num=2
center(t_num)
target_uno=special_thread(execute=target_uno, args=(),repetitive=True, interval_sec=1,threadID=10004,
name='target_uno',
daemon=False )
target_dos=special_thread(execute=target_dos, args=(),repetitive=True, interval_sec=1,threadID=10004,
name='target_dos',
daemon=False )
if __name__ == "__main__":
target_uno.start()
target_dos.start()
time.sleep(20)
target_uno.stop()
target_dos.stop()

Python multiprocessing RemoteManager under a multiprocessing.Process

I'm trying to start a data queue server under a managing process (so that it can later be turned into a service), and while the data queue server function works fine in the main process, it does not work in a process created using multiprocessing.Process.
The dataQueueServer and dataQueueClient code is based on the code from the multiprocessing module documentation here.
When run on its own, dataQueueServer works well. However, when run using a multiprocessing.Process's start() in mpquueue, it doesn't work (when tested with the client). I am using the dataQueueClient without changes to test both cases.
The code does reach the serve_forever in both cases, so I think the server is working, but something is blocking it from communicating back to the client in the mpqueue case.
I have placed the loop that runs the serve_forever() part under a thread, so that it can be stoppable.
Here is the code:
mpqueue # this is the "manager" process trying to spawn the server in a child process
import time
import multiprocessing
import threading
import dataQueueServer
class Printer():
def __init__(self):
self.lock = threading.Lock()
def tsprint(self, text):
with self.lock:
print text
class QueueServer(multiprocessing.Process):
def __init__(self, name = '', printer = None):
multiprocessing.Process.__init__(self)
self.name = name
self.printer = printer
self.ml = dataQueueServer.MainLoop(name = 'ml', printer = self.printer)
def run(self):
self.printer.tsprint(self.ml)
self.ml.start()
def stop(self):
self.ml.stop()
if __name__ == '__main__':
printer = Printer()
qs = QueueServer(name = 'QueueServer', printer = printer)
printer.tsprint(qs)
printer.tsprint('starting')
qs.start()
printer.tsprint('started.')
printer.tsprint('Press Ctrl-C to quit')
try:
while True:
time.sleep(60)
except KeyboardInterrupt:
printer.tsprint('\nTrying to exit cleanly...')
qs.stop()
printer.tsprint('stopped')
dataQueueServer
import time
import threading
from multiprocessing.managers import BaseManager
from multiprocessing import Queue
HOST = ''
PORT = 50010
AUTHKEY = 'authkey'
## Define some helper functions for use by the main process loop
class Printer():
def __init__(self):
self.lock = threading.Lock()
def tsprint(self, text):
with self.lock:
print text
class QueueManager(BaseManager):
pass
class MainLoop(threading.Thread):
"""A thread based loop manager, allowing termination signals to be sent
to the thread"""
def __init__(self, name = '', printer = None):
threading.Thread.__init__(self)
self._stopEvent = threading.Event()
self.daemon = True
self.name = name
if printer is None:
self.printer = Printer()
else:
self.printer = printer
## create the queue
self.queue = Queue()
## Add a function to the handler to return the queue to clients
self.QM = QueueManager
self.QM.register('get_queue', callable=lambda:self.queue)
self.queue_manager = self.QM(address=(HOST, PORT), authkey=AUTHKEY)
self.queue_server = self.queue_manager.get_server()
def __del__(self):
self.printer.tsprint( 'closing...')
def run(self):
self.printer.tsprint( '{}: started serving'.format(self.name))
self.queue_server.serve_forever()
def stop(self):
self.printer.tsprint ('{}: stopping'.format(self.name))
self._stopEvent.set()
def stopped(self):
return self._stopEvent.isSet()
def start():
printer = Printer()
ml = MainLoop(name = 'ml', printer = printer)
ml.start()
return ml
def stop(ml):
ml.stop()
if __name__ == '__main__':
ml = start()
raw_input("\nhit return to stop")
stop(ml)
And a client:
dataQueueClient
import datetime
from multiprocessing.managers import BaseManager
n = 0
N = 10**n
HOST = ''
PORT = 50010
AUTHKEY = 'authkey'
def now():
return datetime.datetime.now()
def gen(n, func, *args, **kwargs):
k = 0
while k < n:
yield func(*args, **kwargs)
k += 1
class QueueManager(BaseManager):
pass
QueueManager.register('get_queue')
m = QueueManager(address=(HOST, PORT), authkey=AUTHKEY)
m.connect()
queue = m.get_queue()
def load(msg, q):
return q.put(msg)
def get(q):
return q.get()
lgen = gen(N, load, msg = 'hello', q = queue)
t0 = now()
while True:
try:
lgen.next()
except StopIteration:
break
t1 = now()
print 'loaded %d items in ' % N, t1-t0
t0 = now()
while queue.qsize() > 0:
queue.get()
t1 = now()
print 'got %d items in ' % N, t1-t0
So it seems like the solution is simple enough: Don't use serve_forever(), and use manager.start() instead.
According to Eli Bendersky, the BaseManager (and it's extended version SyncManager) already spawns the server in a new process (and looking at the multiprocessing.managers code confirms this). The problem I have been experiencing stems from the form used in the example, in which the server is started under the main process.
I still don't understand why the current example doesn't work when run under a child process, but that's no longer an issue.
Here's the working (and much simplified from OP) code to manage multiple queue servers:
Server:
from multiprocessing import Queue
from multiprocessing.managers import SyncManager
HOST = ''
PORT0 = 5011
PORT1 = 5012
PORT2 = 5013
AUTHKEY = 'authkey'
name0 = 'qm0'
name1 = 'qm1'
name2 = 'qm2'
description = 'Queue Server'
def CreateQueueServer(HOST, PORT, AUTHKEY, name = None, description = None):
name = name
description = description
q = Queue()
class QueueManager(SyncManager):
pass
QueueManager.register('get_queue', callable = lambda: q)
QueueManager.register('get_name', callable = name)
QueueManager.register('get_description', callable = description)
manager = QueueManager(address = (HOST, PORT), authkey = AUTHKEY)
manager.start() # This actually starts the server
return manager
# Start three queue servers
qm0 = CreateQueueServer(HOST, PORT0, AUTHKEY, name0, description)
qm1 = CreateQueueServer(HOST, PORT1, AUTHKEY, name1, description)
qm2 = CreateQueueServer(HOST, PORT2, AUTHKEY, name2, description)
raw_input("return to end")
Client:
from multiprocessing.managers import SyncManager
HOST = ''
PORT0 = 5011
PORT1 = 5012
PORT2 = 5013
AUTHKEY = 'authkey'
def QueueServerClient(HOST, PORT, AUTHKEY):
class QueueManager(SyncManager):
pass
QueueManager.register('get_queue')
QueueManager.register('get_name')
QueueManager.register('get_description')
manager = QueueManager(address = (HOST, PORT), authkey = AUTHKEY)
manager.connect() # This starts the connected client
return manager
# create three connected managers
qc0 = QueueServerClient(HOST, PORT0, AUTHKEY)
qc1 = QueueServerClient(HOST, PORT1, AUTHKEY)
qc2 = QueueServerClient(HOST, PORT2, AUTHKEY)
# Get the queue objects from the clients
q0 = qc0.get_queue()
q1 = qc1.get_queue()
q2 = qc2.get_queue()
# put stuff in the queues
q0.put('some stuff')
q1.put('other stuff')
q2.put({1:123, 2:'abc'})
# check their sizes
print 'q0 size', q0.qsize()
print 'q1 size', q1.qsize()
print 'q2 size', q2.qsize()
# pull some stuff and print it
print q0.get()
print q1.get()
print q2.get()
Adding an additional server to share a dictionary with the information of the running queue servers so that consumers can easily tell what's available where is easy enough using that model. One thing to note, though, is that the shared dictionary requires slightly different syntax than a normal dictionary: dictionary[0] = something will not work. You need to use dictionary.update([(key, value), (otherkey, othervalue)]) and dictionary.get(key) syntax, which propagates across to all other clients connected to this dictionary..

Python threads - crashing when they access postgreSQL

here is a simple threading program which works fine:
import psycopg2
import threading
import time
class testit(threading.Thread):
def __init__(self, currency):
threading.Thread.__init__(self)
self.currency = currency
def run(self):
global SQLConnection
global cursor
SQLString = "Select dval from ddata where dname ='%s' and ddate = '2009-07-17'" \
%self.currency
z = time.time()
while (time.time() - z) < 2:
print SQLString
SQLConnection = psycopg2.connect(database = "db", user = "xxxx", password = "xxxx")
cursor = SQLConnection.cursor()
a = testit('EURCZK')
b = testit('EURPLN')
a.start()
b.start()
However as soon as I try to start accessing the postgresql database in the thread with the following code, I always get a stop-sign crash:
import psycopg2
import threading
import time
class testit(threading.Thread):
def __init__(self, currency):
threading.Thread.__init__(self)
self.currency = currency
def run(self):
global SQLConnection
global cursor
SQLString = "Select dval from ddata where dname ='%s'and ddate = '2009-07-17'" %self.currency
z = time.time()
while (time.time() - z) < 2:
cursor.execute(SQLString)
print cursor.fetchall()
SQLConnection = psycopg2.connect(database = "db", user = "xxxx", password = "xxxx")
cursor = SQLConnection.cursor()
a = testit('EURCZK')
b = testit('EURPLN')
a.start()
b.start()
The only difference between the two is in the while loop. I am fairly new to thread programming. Is the postgres library (psycopg2) not "thread safe"? All this is running on Windows XP. Anything I can do?
Thanks.
global SQLConnection
global cursor
Seems you're accessing globals from multiple threads ? You should never do that unless those globals are thread safe, or you provide the proper locking yourself.
You now have 2 threads accessing the same connection and the same cursor. They'll step on eachothers toes. psycopg2 connection might be thread safe but cursors are not.
Use one cursor(probably one connection as well) per thread.
bingo it's working. Someone left an answer but then seems to have removed it, to give each thread its own connection. And yep that solves it. So this code works:
import psycopg2
import threading
import time
class testit(threading.Thread):
def __init__(self, currency):
threading.Thread.__init__(self)
self.currency = currency
self.SQLConnection = psycopg2.connect(database = "db", user = "xxxx", password = "xxxx")
self.cursor = self.SQLConnection.cursor()
def run(self):
SQLString = "Select dval from ddata where dname ='%s' and ddate = '2009-07-17'" \
%self.currency
z = time.time()
while (time.time() - z) < 2:
self.cursor.execute(SQLString)
print self.cursor.fetchall()
a = testit('EURCZK')
b = testit('EURPLN')
a.start()
b.start()

Categories