I wrote a script with this sort of logic in order to insert many records into a PostgreSQL table as they are generated.
#!/usr/bin/env python3
import asyncio
from concurrent.futures import ProcessPoolExecutor as pool
from functools import partial
import sqlalchemy as sa
from sqlalchemy.ext.declarative import declarative_base
metadata = sa.MetaData(schema='stackoverflow')
Base = declarative_base(metadata=metadata)
class Example(Base):
__tablename__ = 'example'
pk = sa.Column(sa.Integer, primary_key=True)
text = sa.Column(sa.Text)
sa.event.listen(Base.metadata, 'before_create',
sa.DDL('CREATE SCHEMA IF NOT EXISTS stackoverflow'))
engine = sa.create_engine(
'postgresql+psycopg2://postgres:password#localhost:5432/stackoverflow'
)
Base.metadata.create_all(engine)
session = sa.orm.sessionmaker(bind=engine, autocommit=True)()
def task(value):
engine.dispose()
with session.begin():
session.add(Example(text=value))
async def infinite_task(loop):
spawn_task = partial(loop.run_in_executor, None, task)
while True:
await asyncio.wait([spawn_task(value) for value in range(10000)])
def main():
loop = asyncio.get_event_loop()
with pool() as executor:
loop.set_default_executor(executor)
asyncio.ensure_future(infinite_task(loop))
loop.run_forever()
loop.close()
if __name__ == '__main__':
main()
This code works just fine, creating a pool of as many processes as I have CPU cores, and happily chugging along forever. I wanted to see how threads would compare to processes, but I could not get a working example. Here are the changes I made:
from concurrent.futures import ThreadPoolExecutor as pool
session_maker = sa.orm.sessionmaker(bind=engine, autocommit=True)
Session = sa.orm.scoped_session(session_maker)
def task(value):
engine.dispose()
# create new session per thread
session = Session()
with session.begin():
session.add(Example(text=value))
# remove session once the work is done
Session.remove()
This version runs for a while before a flood of "too many clients" exceptions:
sqlalchemy.exc.OperationalError: (psycopg2.OperationalError) FATAL: sorry, too many clients already
What am I missing?
It turns out that the problem is engine.dispose(), which, in the words of Mike Bayer (zzzeek) "is leaving PG connections lying open to be garbage collected."
Source: https://groups.google.com/forum/#!topic/sqlalchemy/zhjCBNebnDY
So the updated task function looks like this:
def task(value):
# create new session per thread
session = Session()
with session.begin():
session.add(Example(text=value))
# remove session object once the work is done
session.remove()
It looks like you're opening a lot of new connections without closing them, try to add engine.dispose() after:
from concurrent.futures import ThreadPoolExecutor as pool
session_maker = sa.orm.sessionmaker(bind=engine, autocommit=True)
Session = sa.orm.scoped_session(session_maker)
def task(value):
engine.dispose()
# create new session per thread
session = Session()
with session.begin():
session.add(Example(text=value))
# remove session once the work is done
Session.remove()
engine.dispose()
Keep in mind the cost of a new connection, so ideally you should have one connection per process/thread, but I'm not sure how ThreadPoolExecutor works and probably connections are not being closed on thread's execution finish.
Related
engine = db.create_engine(self.url, convert_unicode=True, pool_size=5, pool_recycle=1800, max_overflow=10)
connection = self.engine.connect()
Session = scoped_session(sessionmaker(bind=self.engine, autocommit=False, autoflush=True))
I initialize my session like this. And
def first():
with Session() as session:
second(session)
def second(session):
session.add(obj)
third(session)
def third(session):
session.execute(query)
I use my session like this.
I think the pool is assigned one for each session. So, I think that the above code can work well when pool_size=1, max_overflow=0. But, when I set up like that It stuck and return Exception like.
descriptor '__init__' requires a 'super' object but received a 'tuple'
Why is that? The pool is assigned more than one per session rather than one by one?
And when using session with with, Can do I careless commit and rollback when exception?
I discovered that SQLAlchemy does not release the database connections (in my case) so this piles up to the point that it might crash the server. The connections are made from different threads.
Here is the simplified code
"""
Test to see DB connection allocation size while making call from multiple threads
"""
from time import sleep
from threading import Thread, current_thread
import uuid
from sqlalchemy import func, or_, desc
from sqlalchemy import event
from sqlalchemy import ForeignKey, Column, Integer, String, DateTime, UniqueConstraint
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from sqlalchemy.orm import scoped_session
from sqlalchemy.orm import relationship
from sqlalchemy.orm import scoped_session, Session
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.types import Integer, DateTime, String, Boolean, Text, Float
from sqlalchemy.engine import Engine
from sqlalchemy.pool import NullPool
# MySQL
SQLALCHEMY_DATABASE = 'mysql'
SQLALCHEMY_DATABASE_URI = 'mysql+pymysql://amalgam:amalgam#localhost/amalgam?charset=utf8mb4' # https://stackoverflow.com/questions/47419943/pymysql-warning-1366-incorrect-string-value-xf0-x9f-x98-x8d-t
SQLALCHEMY_ECHO = False
SQLALCHEMY_ENGINE_OPTIONS = {'pool_size': 40, 'max_overflow': 0}
SQLALCHEMY_ISOLATION_LEVEL = "AUTOCOMMIT"
# DB Engine
# engine = create_engine(SQLALCHEMY_DATABASE_URI, echo=SQLALCHEMY_ECHO, pool_recycle=3600,
# isolation_level= SQLALCHEMY_ISOLATION_LEVEL,
# **SQLALCHEMY_ENGINE_OPTIONS
# ) # Connect to server
engine = create_engine(SQLALCHEMY_DATABASE_URI,
echo=SQLALCHEMY_ECHO,
# poolclass=NullPool,
pool_recycle=3600,
isolation_level= SQLALCHEMY_ISOLATION_LEVEL,
**SQLALCHEMY_ENGINE_OPTIONS
) # Connect to server
session_factory = sessionmaker(bind=engine)
Base = declarative_base()
# ORM Entity
class User(Base):
LEVEL_NORMAL = 'normal'
LEVEL_ADMIN = 'admin'
__tablename__ = "users"
id = Column(Integer, primary_key=True)
name = Column(String(100), nullable=True)
email = Column(String(100), nullable=True, unique=True)
password = Column(String(100), nullable=True)
level = Column(String(100), default=LEVEL_NORMAL)
# Workers
NO = 10
workers = []
_scoped_session_factory = scoped_session(session_factory)
def job(job_id):
session = _scoped_session_factory()
print("Job is {}".format(job_id))
user = User(name='User {} {}'.format(job_id, uuid.uuid4()), email='who cares {} {}'.format(job_id, uuid.uuid4()))
session.add(user)
session.commit()
session.close()
print("Job {} done".format(job_id))
sleep(10)
# Create worker threads
for i in range(NO):
workers.append(Thread(target=job, kwargs={'job_id':i}))
# Start them
for worker in workers:
worker.start()
# Join them
for worker in workers:
worker.join()
# Allow some time to see MySQL's "show processlist;" command
sleep(10)
The moment the program reaches
sleep(10)
and I run the
show processlist;
it give the following result - meaning that all connections to the DB are still alive.
How can I force closing those connections?
Note: I could make use of
poolclass=NullPool
but I feel that that solution is too restrictive - I would like to still have access to a database pool but being able to somehow close connections when wanted
The following is from the signature for QueuePool constructor
pool_size – The size of the pool to be maintained, defaults to 5. This
is the largest number of connections that will be kept persistently in
the pool. Note that the pool begins with no connections; once this
number of connections is requested, that number of connections will
remain. pool_size can be set to 0 to indicate no size limit; to
disable pooling, use a NullPool instead.
max_overflow – The maximum overflow size of the pool. When the number
of checked-out connections reaches the size set in pool_size,
additional connections will be returned up to this limit. When those
additional connections are returned to the pool, they are disconnected
and discarded. It follows then that the total number of simultaneous
connections the pool will allow is pool_size + max_overflow, and the
total number of “sleeping” connections the pool will allow is
pool_size. max_overflow can be set to -1 to indicate no overflow
limit; no limit will be placed on the total number of concurrent
connections. Defaults to 10.
SQLALCHEMY_ENGINE_OPTIONS = {'pool_size': 40, 'max_overflow': 0}
Given the above, this configuration is asking SQLAlchemy to keep up to 40 connections open.
If you don't like that, but want to keep some connections available you might try a configuration like this:
SQLALCHEMY_ENGINE_OPTIONS = {'pool_size': 10, 'max_overflow': 30}
This will keep 10 persistent connections in the pool, and will burst up to 40 connections if requested concurrently. Any connection in surplus of the configured pool size are immediately closed upon being checked back into the pool.
I am trying to convert my single threaded application to multi threaded application which uses database using SQLAlchemy. And I found that SQLAlchemy session is not thread safe. So we need to use scoped_session factory for thread safe db access.
Below is my input dataset
input_list = [data1, data2, data3, data4, data5]
Single thread application
from sqlalchemy.orm import sessionmaker, scoped_session
Session = sessionmaker(bind=engine_url)
for data in input_list:
def myfunction(data):
db_session = Session()
print(db_session)
# use db_session to query/store the data
When I try to convert it to multithreaded application
from sqlalchemy.orm import sessionmaker, scoped_session
Session = scoped_session(sessionmaker(bind=engine_url))
def myfunction(data):
db_session = Session()
print(db_session)
# use db_session to query/store the data
def myfunction_parallel():
with ThreadPool(4) as pool:
output = pool.map(myfunction, input_list)
In multithread variant, I am getting db_session as same object, but my expectation is that there should be a new session object created for each thread and the session should be different?
The scoped session registry registers session for each thread that requests one. This enables code to call db_session = Session() and get the expected session for the thread.
However it's application's responsibility to inform the session registry when a session is no longer required. The application does this by calling Session.remove(), as documented here:
The scoped_session.remove() method first calls Session.close() on the current Session, which has the effect of releasing any connection/transactional resources owned by the Session first, then discarding the Session itself. “Releasing” here means that connections are returned to their connection pool and any transactional state is rolled back, ultimately using the rollback() method of the underlying DBAPI connection.
At this point, the scoped_session object is “empty”, and will create a new Session when called again.
This code should work as expected:
def myfunction(data):
db_session = Session()
print(db_session)
# use db_session to query/store the data
Session.remove()
i'm making a python flask app with sqlite database
is there a way to make a queue for write requests so that it can run smoothly AS SQLITE doesn't support multiple concurrent writes or commits
this is my connection string
engine = create_engine('sqlite:///IT_DataBase.db',
connect_args={'check_same_thread': False})
Base.metadata.bind = engine
DBSession = sessionmaker(bind=engine)
session = DBSession()
and this is the commit code as example:
#app.route('/NewRequest', methods=['GET', 'POST'])
#login_required
def NewRequest():
connUser=session.query(User).filter(User.id==Session.get('user_id')).one()
if request.method == 'GET':
Types = session.query(Req_Type.id,Req_Type.Type_name)
Pr = session.query(Req_Priorities.id,Req_Priorities.Priority_name)
return render_template('NewRequest.html',conn=connUser ,name=current_user.name, items=Types,priorities=Pr)
else:
name= request.form['Name']
Description= request.form['Description']
Type = request.form.get('Type')
Priority = request.form.get('Priority')
newRequest = Requests(name=name, Record_Created=datetime.now().strftime("%Y-%m-%d %H:%M"), Description=Description, Assigned_To=None, Type_Name=str(Type), Priority_Name=str(Priority), Status_Name='Opened', User_ID=Session.get('user_id') )
session.add(newRequest)
flash('New Request With Name %s Successfully Created' % newRequest.name)
session.commit()
UserRequests= session.query(Requests).filter_by(User_ID=Session.get('user_id')).filter(Requests.Status_Name!='Solved').all()
return render_template('ReqData.html',conn=connUser , title='User Requests', rows=UserRequests)
i think that if we didn't change the database engine the solution is either to queue the commits but i don't know how
or to make flask wait random time before commiting but i think this will make performance poor
what should i do
You need to serialize the commits. Create a lock like below.
from threading import RLock
sql_lock = RLock()
Wrap session.add and session.commit like the following. lock.acquire() will block the code while another thread has acquired the lock, and is yet to release the lock. This ensures that only one thread (or none) is running between acquire() and release() at all times.
try:
sql_lock.acquire()
session.add(newRequest)
session.commit()
finally:
sql_lock.release()
I'm utilizing Flask and SqlAlchemy. The database I've created for SqlAlchemy seems to mess up when I try to run my website and will pop up with the error stating that there's a thread error. I'm wondering if it's because I haven't dropped my table from my previous schema. I'm using a linux server to try and run the "python3" and the file to set up my database.
I've tried to physically delete the table from my local drive and the re run it but I still up this error.
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from sqlalchemy.orm import scoped_session
from database_setup import Base, Category, Item
engine = create_engine('sqlite:///database_tables.db')
Base.metadata.bind = engine
Session = sessionmaker()
Session.bind = engine
session = Session()
brushes = Category(id = 1, category_name = 'Brushes')
session.add(brushes)
session.commit()
pencils = Category(id = 2, category_name = 'Pencils')
session.add(pencils)
session.commit()
When I am in debug mode using Flask, I click the links I've made using these rows, but after three clicks I get the error
"(sqlite3.ProgrammingError) SQLite objects created in a thread can only be used in that same thread.The object was created in thread id 140244909291264 and this is thread id 140244900898560 [SQL: SELECT category.id AS category_id, category.category_name AS category_category_name FROM category] [parameters: [{}]] (Background on this error at: http://sqlalche.me/e/f405)"
you can use for each thread a session, by indexing them using the thread id _thread.get_ident():
import _thread
engine = create_engine('sqlite:///history.db', connect_args={'check_same_thread': False})
...
Base.metadata.create_all(engine)
sessions = {}
def get_session():
thread_id = _thread.get_ident() # get thread id
if thread_id in sessions:
return sessions[thread_id]
session_factory = sessionmaker(bind=engine)
Session = scoped_session(session_factory)
sessions[thread_id] = Session()
return sessions[thread_id]
then use get_session() where it is needed, in your case:
get_session().add(brushes)
get_session().commit()