How to map a Class to multiple databases in sqlalchemy orm - python

I am using a sqlite database as my application file through sqlalchemy. I have a separate configuration file.
There are some classes whose information I persist on my application file that I would like to replicate on my configuration file. The thing is that I would load it alternatively from one or the other source depending on availability.
I saw this mention on the documentation, but I think it does not directly apply as the secondary mapping will not not persist the information. Also, the notion of which would be the primary is blurry. Both databases would carry the same information, maybe not on the same version, though.
http://sqlalchemy.readthedocs.org/en/rel_1_0/orm/nonstandard_mappings.html#multiple-mappers-for-one-class
I will try to make it clearer with an example:
I have a class A which represents a multi-field user input. I save this on my application file.
A class B also on my application file file is composed of an instance of Class A.
The same instance from Class A may compose several suitable instances of Class B. These are all stored on my application file.
My problem is that on another session, with a brand new configuration file I might want to reuse that Class A instance. I can not have it only on the application file, because if it gets updated, it will be relevant across all application files that use it.
On the other hand, it can not be only in the configuration file, as a user might share his application file with another and the later might not have a suitable configuration and would have to do it manually.
I need to have it in both places, be able to choose which database will be the source at runtime and have all changes persist on both databases at once.
Can it be done in sqlalchemy+sqlite? Is it a good idea? Are there classic solutions for this?
EDIT:
I think I am describing something that looks like a cache, which sqlalchemy does not do. Does any other approach come to mind?
Does sqlalchemy allow me to map an instance to a database upon instance creation? This would allow for two instances of the same class to be mapped against different databases. Then I would listen for an update event by sqlalchemy and issue the same sql to the other database. I also do not know how to do this.
Another option: map my class against a union query. Sqlalchemy might allow as it does for arbitrary selects, BUT then there is the persistence issue.
Another option: add a layer to the engine so that it connects to two databases simultaneously, issuing the same commands to both for reading and writing. I could deal with the duplicated returns.

I came up with the mixin below. I does not handle expunge or rollback, as I do not use those in my application nor know how to get about them.
It looks like it is working. I will proceed to expand it to handle collections.
import os
from sqlalchemy import Column, Float, String, Enum, Integer, event
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import orm
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
class ReplicateMixin:
#classmethod
def get_or_create(cls,prime_session, sessoes = None, **kwargs):
if sessoes is None:
sessoes = []
if not isinstance(sessoes, list):
sessoes = [sessoes]
sessoes = [prime_session] + sessoes #They are passed separatelly just to make explicit that the first might receive diferent treatment
replicas = []
for sessao in sessoes: #Gets a result or creates a new instance from each database
instance = sessao.query(Datum).filter_by(**kwargs).first()
if instance is None:
instance = cls(**kwargs)
setattr(instance, "__new", True)
sessao.add(instance)
instance.sessao = sessao
replicas.append(instance)
fittest = cls.__select_fittest(replicas) #Selects the instance whose data will prevail
prime = replicas.pop(0) #Instance from the session we will be issuing commits to. The others must simply follow.
cls.__copy_data(fittest, prime, ReplicateMixin.__get_primary_keys(prime))
setattr(prime, "__replicas", replicas) #The object will carry references to its copies
return prime
#staticmethod
def __select_fittest(instances):
"""This method should contain logic for choosing the instance that has
the most relevant information. It may be altered by child classes"""
if getattr(instances[0], "__new", False):
return instances[1]
else:
return instances[0]
#staticmethod
def __copy_data(source, dest, primary_keys = None):
primary_keys = [] if primary_keys is None else primary_keys
for prop in orm.class_mapper(type(source)).iterate_properties:
if (isinstance(prop, orm.ColumnProperty)
and prop.key not in primary_keys):
setattr(dest, prop.key,
getattr(source, prop.key))
#staticmethod
def __replicate(mapper, connection, original_obj):
replicants = getattr(original_obj, "__replicas", []) #if it IS a replicant it will not have a __replicas attribute
primary_keys = ReplicateMixin.__get_primary_keys(original_obj)
for objeto in replicants:
ReplicateMixin.__copy_data(original_obj, objeto, primary_keys)
objeto.sessao.commit()
#staticmethod
def __replicate_del(mapper, conection, original_obj):
replicants = getattr(original_obj, "__replicas", []) #if it IS a replicant it will not have a __replicas attribute
for objeto in replicants:
if objeto in objeto.sessao.new:
objeto.sessao.expunge(objeto)
else:
objeto.sessao.delete(objeto)
objeto.sessao.commit()
#staticmethod
def __get_primary_keys(mapped_object):
return [key.name for key in orm.class_mapper(type(mapped_object)).primary_key]
#classmethod
def __declare_last__(cls):
"""Binds certain events to functions"""
event.listen(cls, "before_insert", cls.__replicate)
event.listen(cls, "before_update", cls.__replicate)
event.listen(cls, "before_delete", cls.__replicate_del)
#FIXME might not play well with rollback
Example:
DeclarativeBase = declarative_base()
class Datum (ReplicateMixin, DeclarativeBase):
__tablename__ = "xUnitTestData"
Key = Column(Integer, primary_key=True)
Value = Column(Float)
nome = Column(String(10))
def __repr__(self):
return "{}; {}; {}".format(self.Key, self.Value, self.nome)
end_local = os.path.join(os.path.expanduser("~"), "Desktop", "local.bd")
end_remoto = os.path.join(os.path.expanduser("~"), "Desktop", "remoto.bd")
src_engine = create_engine('sqlite:///'+end_local, echo=False)
dst_engine = create_engine('sqlite:///'+end_remoto, echo=False)
DeclarativeBase.metadata.create_all(src_engine)
DeclarativeBase.metadata.create_all(dst_engine)
SessionSRC = sessionmaker(bind=src_engine)
SessionDST = sessionmaker(bind=dst_engine)
session1 = SessionSRC()
session2 = SessionDST()
item = Datum.pegar_ou_criar(session1, session2, Value = 0.5, nome = "terceiro")
item.Value = item.Value/2
print(item)
session1.delete(item)
session1.commit()
session1.close()

Related

SQLAlchemy with multiple session, how to avoid "Object is already attached to session X"?

I have a special configuration where I've created two SQLAlchemy Sessions from the sessionmaker :
db.read = sessionmaker(autoflush=False)
db.write = sessionmaker()
When calling db.write.add(obj), I sometimes have the following error (but not always):
sqlalchemy.exc.InvalidRequestError: Object '<User at 0x7f3fa9ebee50>' is already attached to session '14' (this is '15')
Is there a way to do the following :
"If session is db.write, then call db.write.add(obj)
Otherwise, add obj to the db.write session in order to save it, and (important), update both the object in db.write session AND db.read session"
(I've managed to save to via write, but then, obj.id is empty in db.read)
Thank you for your help
We can approximate the desired behaviour by extending Session.add to detect and handle the case when an object is already in another session, and by adding an event listener to reinstate the object into the read session once it has been committed. It isn't possible for an object to exist in two sessions simultaneously: if it's preferable that the object remains in the write session the listener and the _prev_session attribute are not required.
import weakref
import sqlalchemy as sa
from sqlalchemy import orm
...
class CarefulSession(orm.Session):
def add(self, object_):
# Get the session that contains the object, if there is one.
object_session = orm.object_session(object_)
if object_session and object_session is not self:
# Remove the object from the other session, but keep
# a reference so we can reinstate it.
object_session.expunge(object_)
object_._prev_session = weakref.ref(object_session)
return super().add(object_)
# The write session must use the Session subclass; the read
# session need not.
WriteSession = orm.sessionmaker(engine, class_=CarefulSession)
#sa.event.listens_for(WriteSession, 'after_commit')
def receive_after_commit(session):
"""Reinstate objects into their previous sessions."""
objects = filter(lambda o: hasattr(o, '_prev_session'), session.identity_map.values())
for object_ in objects:
prev_session = object_._prev_session()
if prev_session:
session.expunge(object_)
prev_session.add(object_)
delattr(object_, '_prev_session')
A complete implementation might add an after_rollback listener, and perhaps initialise _prev_session in the base model's __init__ to avoid the del/has attr calls.

Using mongoengine with multiprocessing - how do you close mongoengine connections?

No matter what I try I keep hitting the "MongoClient opened before fork" warning regarding not forking active mongo connections when trying to use multiprocessing on a mongoengine db. The standard mongo advice seems to be to only connect to the db from within the child processes but I think what I'm doing should be functionally equivalent because I'm closing the database prior to using multiprocessing however I still hit the problem.
Related questions either without a minimal example or with inapplicable solutions are here, here, and specifically for the case of flask/celery and here
Minimal example to reproduce the problem:
from mongoengine import connect, Document, StringField, ListField, ReferenceField
from pathos.multiprocessing import ProcessingPool
class Base(Document):
key = StringField(primary_key=True)
name = StringField()
parent = ReferenceField('Parent', required=True)
class Parent(Document):
key = StringField(primary_key=True)
name = StringField()
bases = ListField(ReferenceField('Base'))
def remove_base(key):
db = connect('mydb')
mongo_b = Base.objects().get(key=key)
mongo_b.parent.update(pull__bases=mongo_b)
mongo_b.delete()
### setup
db = connect('mydb', connect=False)
Base(key='b1', name='test', parent='p1').save()
Base(key='b2', name='test', parent='p1').save()
Base(key='b3', name='test2', parent='p1').save()
p=Parent(key='p1', name='parent').save()
p.update(add_to_set__bases='b1')
p.update(add_to_set__bases='b2')
p.update(add_to_set__bases='b3')
### find objects we want to delete
my_base_objects = Base.objects(name='test')
keys = [b.key for b in my_base_objects]
del my_base_objects
# close db to avoid problems?!
db.close()
del db
# parallel map removing base objects and references from the db
# warning generated here
pp = ProcessingPool(2)
pp.map(remove_base, keys)
Ok so I figured it out. Mongoengine caches connections to the database all over the place. If you manually remove them then the issue is resolved. Adding the following import
from mongoengine import connection
then adding in:
connection._connections = {}
connection._connection_settings ={}
connection._dbs = {}
Base._collection = None
Parent._collection = None
to the '#close db' section appears to solve the issue.
Complete code:
from mongoengine import connect, Document, StringField, ListField, ReferenceField, connection
from pathos.multiprocessing import ProcessingPool
class Base(Document):
key = StringField(primary_key=True)
name = StringField()
parent = ReferenceField('Parent', required=True)
class Parent(Document):
key = StringField(primary_key=True)
name = StringField()
bases = ListField(ReferenceField('Base'))
def remove_base(key):
db = connect('mydb', connect=False)
mongo_b = Base.objects().get(key=key)
mongo_b.parent.update(pull__bases=mongo_b)
mongo_b.delete()
def setup():
Base(key='b1', name='test', parent='p1').save()
Base(key='b2', name='test', parent='p1').save()
Base(key='b3', name='test2', parent='p1').save()
p=Parent(key='p1', name='parent').save()
p.update(add_to_set__bases='b1')
p.update(add_to_set__bases='b2')
p.update(add_to_set__bases='b3')
db = connect('mydb', connect=False)
setup()
### find objects we want to delete
my_base_objects = Base.objects(name='test')
keys = [b.key for b in my_base_objects]
del my_base_objects
### close db to avoid problems?!
db.close()
db = None
connection._connections = {}
connection._connection_settings ={}
connection._dbs = {}
Base._collection = None
Parent._collection = None
### parallel map removing base objects from the db
pp = ProcessingPool(2)
pp.map(remove_base, keys)
This got recently improved and as of MongoEngine>=0.18.0, the method disconnect() and disconnect_all() should be used to respectively disconnect 1 or all existing connections (changelog 0.18.0)
See official doc

connection not getting created to the new dbname in mongoengine

Using python Mongoengine I am trying to create databases and add documents to different databases. Here's how I am trying to do it :
from mongoengine import *
class MongoDataAccessObject():
# method to connect to the database and initialize the tables etc.
def __init__(self, my_env, helperObj):
print "initializing db for the environment ", my_env
self.con = None
self.dbName = my_env
self.helper_obj = helperObj
try:
self.con = connect(db=self.dbName)
except Exception as e:
print e
def addRecord(self, document_object):
document_object.save()
Now, I pass the names of different databases that I want created while creating the object of the above class, and add the documents like this :
for my_env in list_of_envs:
dao = MongoDataAccessObject(my_env, helper_object)
dao.addRecord(myDocument)
Now there are 2 questions here:
For some reason all my documents keep getting added to the same DB (the first one being passed while MongoDataAccessObject object creation. I would assume that when I am creating a new object every time, while passing a different db name each time, a new connection should get created to the new db passed and documents should get added to the db which is currently connected to.
To verify if I am actually connected to the DB in question or not, I could not find a method like get_database_name() on the connection object. Is there a way to verify if I am getting connected to the DB name being passed or not ?
Ok did some more research and found this:
https://github.com/MongoEngine/mongoengine/issues/605
Tried it out like this in iptyhon:
from mongoengine import *
import datetime
class Page(Document):
title = StringField(max_length=200, required=True)
date_modified = DateTimeField(default=datetime.datetime.now)
def switch(model, db):
model._meta['db_alias'] = db
# must set _collection to none so it is re-evaluated
model._collection = None
return model
register_connection('default', name='testing')
register_connection('mycon', name='db1')
page = Page(title="Test Page")
page = switch(page, 'mycon')
page.save()
This works and creates a db named db1 and stores the document there.
Now I do this again:
register_connection('mycon2', name='db2')
page = Page(title="Test Page")
page = switch(page, 'mycon2')
page.save()
Contrary to my expectation this time db2 was not created (checked from both mongo client and from Robomongo), however the document was saved successfully. Wonder where exactly did the document get saved then ??
So to figure that out repeated the above exercise with a small change as below:
register_connection('mycon2', name='db2')
page = Page(title="Test Page")
page = switch(page, 'mycon2')
x = page.save()
# did a dir(x) and found that there is _get_db, so tried it out as below
x._get_db()
and the output was :
Database(MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True, read_preference=Primary()), u'db2')
which I guess means that the document got saved in a database named db2. But where on earth is this db2 ???? Why can't I see it through either mongo client or even Robomongo etc. ?
Finally the only way I could find to achieve the above was through the context manager with statement provided by MongoEngine, which also is well documented here : http://docs.mongoengine.org/guide/connecting.html#switch-database
The way I did it in my code above is something like this :
In the first call to the db, a default db needs to be created, which should have the alias as default. Only then can another alias be created, else Mongoengine throws an error saying that no default db found.
So to do this, when the very first db object is created, a False flag is sent to the __init__of MongoDataAccessObject
So to do this, the MongoDataAccessObject was also changed to something like this:
class MongoDataAccessObject():
# method to connect to the database and initialize the tables etc.
def __init__(self, my_env, helperObj, is_default_db_set):
print "initializing db for the environment ", my_env
self.con = None
self.dbName = my_env
self.helper_obj = helperObj
self.db_alias = my_env
self.is_default_db_set = is_default_db_set
try:
# all of this part with the is_default_db_set and register_connection() is needed because MongoEngine does not
# provide a simple way of changing dbs or switching db connections. The only way to do it is through the switch_db()
# context manager they provide (which has been used in the addRecord() below)
if not self.is_default_db_set:
self.con = connect(db=self.dbName, alias='default')
else:
# register_connection(alias_name, db_name)
register_connection(self.db_alias, self.dbName)
except Exception as e:
print e
And the addRecord() was also modified as :
def addRecord(self, document_object):
with switch_db(model_object, self.db_alias) as model_object:
document_object = model_object()
document_object.save()
And this part above:
for my_env in list_of_envs:
dao = MongoDataAccessObject(my_env, helper_object)
dao.addRecord(myDocument)
was also modified as:
for my_env in list_of_envs:
dao = MongoDataAccessObject(my_env,helper_object,mongo_default_db_flag)
dao.addRecord(myDocument)
And this seemed to do the job for me.

Pyramid / SQLAlchemy trouble with joined models

I'm really new to Python & as new to Pyramid (this is the first thing I've written in Python) and am having trouble with a database query...
I have the following models (relevant to my question anyway):
MetadataRef (contains info about a given metadata type)
Metadata (contains actual metadata) -- this is a child of MetadataRef
User (contains users) -- this is linked to metadata. MetadataRef.model = 'User' and metadata.model_id = user.id
I need access to name from MetadataRef and value from Metadata.
Here's my code:
class User(Base):
...
_meta = None
def meta(self):
if self._meta == None:
self._meta = {}
try:
for item in DBSession.query(MetadataRef.key, Metadata.value).\
outerjoin(MetadataRef.meta).\
filter(
Metadata.model_id == self.id,
MetadataRef.model == 'User'
):
self._meta[item.key] = item.value
except DBAPIError:
##TODO: actually do something with this
self._meta = {}
return self._meta
The query SQLAlchemy is generating does return what I need (close enough anyway -- it needs to query model_id as part of the ON clause rather than the WHERE, but that's minor and I'm pretty sure I can figure that out myself):
SELECT metadata_refs.`key` AS metadata_refs_key, metadata.value AS metadata_value
FROM metadata_refs LEFT OUTER JOIN metadata ON metadata_refs.id = metadata.metadata_ref_id
WHERE metadata.model_id = %s AND metadata_refs.model = %s
However, when I access the objects I get this error:
AttributeError: 'KeyedTuple' object has no attribute 'metadata_value'
This leads me to think there's some other way I need to access it, but I can't figure out how. I've tried both .value and .metadata_value. .key does work as expected.
Any ideas?
You're querying separate attributes ("ORM-enabled descriptors" in SA docs):
DBSession.query(MetadataRef.key, Metadata.value)
in this case the query returns not full ORM-mapped objects, but a KeyedTuple, which is a cross between a tuple and an object with attributes corresponding to the "labels" of the fields.
So, one way to access the data is by its index:
ref_key = item[0]
metadata_value = item[1]
Alternatively, to make SA to use a specific name for column, you may use Column.label() method:
for item in DBSession.query(MetadataRef.key.label('ref_key'), Metadata.value.label('meta_value'))...
self._meta[item.key] = item.meta_value
For debugging you can use Query.column_descriptions() method which will tell you the names of the columns returned by the query.

How can I use multiple databases in the same request in Cherrypy and SQLAlchemy?

My app connects to multiple databases using a technique similar to this. It works so long as I don't try to access different databases in the same request. Having looked back to the above script I see they have written a comment to this end:
SQLAlchemy integration for CherryPy,
such that you can access multiple databases,
but only one of these databases per request or thread.
My app now requires me to fetch data from Database A and Database B. Is it possible to do this in a single request?
Please see below for sources and examples:
Working Example 1:
from model import meta
my_object_instance = meta.main_session().query(MyObject).filter(
MyObject.id == 1
).one()
Working Example 2:
from model import meta
my_user = meta.user_session().query(User).filter(
User.id == 1
).one()
Error Example:
from model import meta
my_object_instance = meta.main_session().query(MyObject).filter(
MyObject.id == 1
).one()
my_user = meta.user_session().query(User).filter(
User.id == 1
).one()
This errors with:
(sqlalchemy.exc.ProgrammingError) (1146, "Table 'main_db.user' doesn't exist")
Sources:
# meta.py
import cherrypy
import sqlalchemy
from sqlalchemy import MetaData
from sqlalchemy.orm import scoped_session, sessionmaker
from sqlalchemy.ext.declarative import declarative_base
# Return an Engine
def create_engine(defaultschema = True, schema = "", **kwargs):
# A blank DB is the same as no DB so to specify a non-schema-specific connection just override with defaultschema = False
connectionString = 'mysql://%s:%s#%s/%s?charset=utf8' % (
store['application'].config['main']['database-server-config-username'],
store['application'].config['main']['database-server-config-password'],
store['application'].config['main']['database-server-config-host'],
store['application'].config['main']['database-server-config-defaultschema'] if defaultschema else schema
)
# Create engine object. we pass **kwargs through so this call can be extended
return sqlalchemy.create_engine(connectionString, echo=True, pool_recycle=10, echo_pool=True, encoding='utf-8', **kwargs)
# Engines
main_engine = create_engine()
user_engine = None
# Sessions
_main_session = None
_user_session = None
# Metadata
main_metadata = MetaData()
main_metadata.bind = main_engine
user_metadata = MetaData()
# No idea what bases are/do but nothing works without them
main_base = declarative_base(metadata = main_metadata)
user_base = declarative_base(metadata = user_metadata)
# An easy collection of user database connections
engines = {}
# Each thread gets a session based on this object
GlobalSession = scoped_session(sessionmaker(autoflush=True, autocommit=False, expire_on_commit=False))
def main_session():
_main_session = cherrypy.request.main_dbsession
_main_session.configure(bind=main_engine)
return _main_session
def user_session():
_user_session = cherrypy.request.user_dbsession
_user_session.configure(bind = get_user_engine())
return _user_session
def get_user_engine():
# Get dburi from the users instance
dburi = cherrypy.session['auth']['user'].instance.database
# Store this engine for future use
if dburi in engines:
engine = engines.get(dburi)
else:
engine = engines[dburi] = create_engine(defaultschema = False, schema = dburi)
# Return Engine
return engine
def get_user_metadata():
user_metadata.bind = get_user_engine()
return user_metadata
# open a new session for the life of the request
def open_dbsession():
cherrypy.request.user_dbsession = cherrypy.thread_data.scoped_session_class
cherrypy.request.main_dbsession = cherrypy.thread_data.scoped_session_class
return
# close the session for this request
def close_dbsession():
if hasattr(cherrypy.request, "user_dbsession"):
try:
cherrypy.request.user_dbsession.flush()
cherrypy.request.user_dbsession.remove()
del cherrypy.request.user_dbsession
except:
pass
if hasattr(cherrypy.request, "main_dbsession"):
try:
cherrypy.request.main_dbsession.flush()
cherrypy.request.main_dbsession.remove()
del cherrypy.request.main_dbsession
except:
pass
return
# initialize the session factory class for the selected thread
def connect(thread_index):
cherrypy.thread_data.scoped_session_class = scoped_session(sessionmaker(autoflush=True, autocommit=False))
return
# add the hooks to cherrypy
cherrypy.tools.dbsession_open = cherrypy.Tool('on_start_resource', open_dbsession)
cherrypy.tools.dbsession_close = cherrypy.Tool('on_end_resource', close_dbsession)
cherrypy.engine.subscribe('start_thread', connect)
You could also choose an ORM that is designed from the ground up for multiple databases, like Dejavu.
Take a look at this:
http://pythonhosted.org/Flask-SQLAlchemy/binds.html
Basically, it suggests that you use a bind param - for each connection. That said, this seems to be a bit of a hack.
This question has a lot more detail in the answer:
With sqlalchemy how to dynamically bind to database engine on a per-request basis
That said, both this question and the one referenced aren't the newest and sqlalchemy will probably have moved on since then.

Categories