There is a model has self-referential relationship, I want to find out the root node/record of the reference, for example in the following example, Package may depends on another package.
# myapp.py
from flask import Flask
from flask_sqlalchemy import SQLAlchemy
from sqlalchemy.orm import relationship
app = Flask(__name__)
app.config['SQLALCHEMY_DATABASE_URI'] = 'sqlite:////tmp/test.db'
db = SQLAlchemy(app)
class Package(db.Model):
__tablename__ = "packages"
id = db.Column(db.Integer, primary_key=True)
name = db.Column(db.String(80), unique=True)
dep_on_id = db.Column(db.Integer, db.ForeignKey('packages.id'))
dep_on = relationship('Package', remote_side=[id])
def __init__(self, name):
self.name = name
def __repr__(self):
return '<Package (%r)>' % self.name
If package D depends on C, C depends on B, B depends on A, I want to find out the root depend package of D, which is A, So the expect result in the follwing test code should be <Package (u'a')>, is there an easy approach I can do with sqlalchemy to replace the find_root_dep function?
# test.py
from myapp import db, Package
db.drop_all()
db.create_all()
a = Package('a')
b = Package('b')
c = Package('c')
d = Package('d')
b.dep_on = a
c.dep_on = b
d.dep_on = c
for p in [a, b, c, d]:
db.session.add(p)
db.session.commit()
def find_root_dep(package):
dep_on = package.dep_on
while dep_on:
dep = dep_on.dep_on
if dep:
dep_on = dep
else:
break
return dep_on
print find_root_dep(d)
Walking trees and graphs can be done in SQL using a recursive CTE, or Query.cte() in SQLAlchemy.
def find_root_dep(package):
# The initial step. Find the 1st dependency of the Package passed
# as the argument.
cte = db.session.query(Package).\
filter_by(id=package.dep_on_id).\
cte(recursive=True)
# The iterative step. Find Packages that found packages
# depend on. Iteration stops when the query results in
# an empty set, since no Package has NULL id.
cte = cte.union_all(
db.session.query(Package).
filter_by(id=cte.c.dep_on_id))
# Create an alias for returning an entity object.
result_alias = db.aliased(Package, cte)
# The root depends on nothing.
return db.session.query(result_alias).\
filter_by(dep_on_id=None).\
one_or_none()
Your original implementation would return None, if passed a root package, so the SQL implementation starts directly by looking up the 1st dependency, which will result in an empty set for root packages.
Related
EDITED
I've been trying to create an class that will allow me to iterate through a list, or dictionary, and generate tables and rows.
The code starts as follows:
from flask_sqlalchemy import SQLAlchemy as sa
from flask import Flask as fl
import pymysql
pymysql.install_as_MySQLdb()
app = fl(__name__)
app.config['SQLALCHEMY_DATABASE_URI'] = 'mysql://root:the_other_stuff'
app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] =False
DEBUG =True
db = sa(app)
a=['some_stuff','gaaahhhh','lalala','rawr','pizza']
class test(db.Model):
__tablename__ = 'stuff'
id = db.Column('id', db.Integer, primary_key = True)
data = db.Column('data', db.Unicode(50))
def __init__(self, id, data):
self.id = id
self.data = data
def stuff():
for i in range(len(a)):
data= a[i]
i = i + 1
id = i
db.session.add(test(id,data))
db.create_all()
return db.session.commit()
stuff()
I'm still going to try and structure it so that it can take a dictionary, or list, and then add the key as the table name if it is a dict. If someone has that code, I won't argue with you sharing, but if not I'll post when that is done.
use db.session.add / db.session.commit. db object is initialized from flask_sqlalchemy. It is not db.Session.
The below code auto inserts data into a one-to-many relationship table set. The recursive function acts like a while loop (%timeit showed the same results for both) I just like the way it looks over a while loop. The function will index to a list in a list, I was going 3 deep but modified the loops to simply. This was originally designed to push a list like so: list[a][i][0] each zero value inside every [i] was the same value type, I set it to only do [a][i] to keep it a little more simple, and so it could be used as as base if someone liked it. [a][i][0] was very fast, but [a][i] might be better off as a list of pd.DataFrame, instead of as np.array. If [i] is not going to the same db.Column() you'll have to declare one for each set of [i] and figure out a way of indexing through it.
A table generator involves a class generator with a base class. I don't have a working example, but can upload when I do
import numpy as np
from flask_sqlalchemy import SQLAlchemy as sa
from flask import Flask as fl
import pymysql
pymysql.install_as_MySQLdb()
app = fl(__name__)
app.config['SQLALCHEMY_DATABASE_URI'] = 'mysql://root:the_other_stuff'
app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] =False
DEBUG =True
db = sa(app)
list=[np.array(np.ones(100)),np.array(np.ones(100)),np.array(np.ones(100)),
np.array(np.ones(100)),np.array(np.ones(100))]
class Parent(db.Model):
id = db.Column('id', db.Integer, primary_key=True)
data= db.Column('data', db.VARCHAR(45))
_child = db.relationship('child', backref='parent', lazy=True)
def __init__(self, data):
self.data = data
def __repr__(self):
return '<Parent %r>' % self.id
class Child(db.Model):
id = db.Column('id', db.Integer, primary_key = True)
data = db.Column('data', db.VARCHAR(45))
parent_id = db.Column(db.Integer, db.ForeignKey('Parent.id'))
parent = db.relationship('Parent')
def __init__(self,data):
self.data = data
def __repr__(self):
return '<Child %r>' % self.id
def child_loop(i = 0):
for a in range(len(list)):
with db.session.no_autoflush:
try:
p = Parent(symbol_col=data[a])
c = child(data = list[a][i])
s.c.append(child)
db.session.add(p)
db.session.add(c)
except exc.IntegrityError as e:
db.session.rollback()
i = i + 1
if a < len(list[0]):
child_loop(i = i)
return print('inserted open_close')
child_loop(i=0)
I often see, that the model instance with one-to-many relationship is explicitly extended following its initialization, such as:
one = One()
# some code goes here
one.many = [Many(), Many(), Many()]
one.many.append(Many())
But in my case I see it reasonable to initialize a many-to-one object with it's relationship already supplied to __init__:
one = One()
many = Many(one = one)
Is it somehow considered a bad practice?
For a reproducible example, please consider the following code:
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Column, Integer, ForeignKey, create_engine
from sqlalchemy.orm import relationship, sessionmaker
Base = declarative_base()
class One(Base):
__tablename__ = 'one'
id = Column(Integer, primary_key=True)
many = relationship("Many", back_populates="one")
class Many(Base):
__tablename__ = 'many'
id = Column(Integer, primary_key=True)
one_id = Column(Integer, ForeignKey('one.id'))
one = relationship("One", back_populates="many")
def __init__(self, one=None):
if one is not None:
self.one = one
# Setup the DB and connection
engine = create_engine('sqlite:///:memory:', echo=True)
conn = engine.connect()
session = sessionmaker(bind=engine)()
Base.metadata.create_all(engine)
# Is it proper to initialize a model instance, with its relationship as an argument?
one1 = One()
many1 = Many(one1)
print(many1.one is one1) # True
print(one1.many[0] is many1) # True
Although it looks clean to me so far, this approach may result in an ambiguous code:
# What happens here?
# Instance of many first initialized with one2a,
# then reassigned to one2b?
one2a = One()
many2 = Many(one=one2a)
print(many2.one is one2a) # True
one2b = One(many=[many2]) # same when One(many=[Many(one=one2a)])
print(many2 is one2b.many[0]) # True
print(many2.one is one2a) # False
print(many2.one is one2b) # True
Finally, please consider the aforementioned __init__ method. Since self.many is by default expected to be an empty list, what would be the desired way to initialize it as such?
def __init__(self, many=None):
if many is not None:
self.many = many
Assume the following setup:
from sqlalchemy import Column, Integer, String
from sqlalchemy.ext.declarative import declarative_base
Base = declarative_base()
class MyClass(Base):
id = Column(Integer, primary_key=True)
name = Column(String)
The normal paradigm to query the DB with SQLAlchemy is to do the following:
Session = sessionmaker()
engine = 'some_db_location_string'
session = Session(bind=engine)
session.query(MyClass).filter(MyClass.id == 1).first()
Suppose, I want to simplify the query to the following:
MyClass(s).filter(MyClass.id == 1).first()
OR
MyClass(s).filter(id == 1).first()
How would I do that? My first attempt at that to use a model Mixin class failed. This is what I tried:
class ModelMixins(object)
def __init__(self, session):
self.session = session
def filter(self, *args):
self.session.query(self).filter(*args)
# Redefine MyClass to use the above class
class MyClass(ModelMixins, Base):
id = Column(Integer, primary_key=True)
name = Column(String)
The main failure seems to be that I can't quite transfer the expression 'MyClass.id == 1' to the actual filter function that is part of the session object.
Folks may ask why would I want to do:
MyClass(s).filter(id == 1).first()
I have seen something similar like this used before and thought that the syntax becomes so much cleaner I can achieve this. I wanted to replicate this but have not been able to. Being able to do something like this:
def get_stuff(some_id):
with session_scope() as s:
rec = MyClass(s).filter(MyClass.id== some_id').first()
if rec:
return rec.name
else:
return None
...seems to be the cleanest way of doing things. For one, session management is kept separate. Secondly, the query itself is simplified. Having a Mixin class like this would allow me to add the filter functionality to any number of classes...So can someone help in this regard?
session.query takes a class; you're giving it self, which is an instance. Replace your filter method with:
def filter(self, *args):
return session.query(self.__class__).filter(*args)
and at least this much works:
In [45]: MyClass(session).filter(MyClass.id==1)
Out[45]: <sqlalchemy.orm.query.Query at 0x10e0bbe80>
The generated SQL looks right, too (newlines added for clarity):
In [57]: str(MyClass(session).filter(MyClass.id==1))
Out[57]: 'SELECT "MyClass".id AS "MyClass_id", "MyClass".name AS "MyClass_name"
FROM "MyClass"
WHERE "MyClass".id = ?'
No guarantees there won't be oddities; I've never tried anything like this before.
Ive been using this mixin to good success. Most likely not the most efficient thing in the world and I am no expert. I define a date_created column for every table
class QueryBuilder:
"""
This class describes a query builer.
"""
q_debug = False
def query_from_dict(self, db_session: Session, **q_params: dict):
"""
Creates a query.
:param db_session: The database session
:type db_session: Session
:param q_params: The quarter parameters
:type q_params: dictionary
"""
q_base = db_session.query(type(self))
for param, value in q_params.items():
if param == 'start_date':
q_base = q_base.filter(
type(self).__dict__.get('date_created') >= value
)
elif param == 'end_date':
q_base = q_base.filter(
type(self).__dict__.get('date_created') <= value
)
elif 'like' in param:
param = param.replace('_like', '')
member = type(self).__dict__.get(param)
if member:
q_base = q_base.filter(member.ilike(f'%{value}%'))
else:
q_base = q_base.filter(
type(self).__dict__.get(param) == value
)
if self.q_debug:
print(q_base)
return q_base
Any tips on how I should be accessing other manager methods from within Manager.py?
No matter what I do, I can't seem to access my other manager's method. Python complains that it isn't defined...
Is it going to cause problems if I import models inside of managers.py? Circular includes or whatever?
managers.py:
# Returns the whole family who are active
def get_active_dependents_including_guardian( self, consumer, connectedOnly = False ):
logger.debug('get_active_dependents_including_guardian')
results = self.model.objects.filter( guardian = consumer,
is_active = True ).order_by('dob')
if connectedOnly:
from myir import models
#OPTIMIZE: this can be optimized if I query for all patient ids for each dependent in one trip. But I don't even know how to do this yet cause I'm a noob.
results = [d for d in results if models.DependentPatientID.objects.get_patient_ids(d)[0].patient_id_integer == 0] **#HERE IS PROBLEM**
return results
# some stuff omitted...
# this is the manager of models.DependentPatientId
class DependentPatientIDManager( models.Manager ):
def get_patient_ids(self, dependent ):
dpid = self.model.objects.get( dependent = dependent.id )
return dpid
You need to change:
from myir import models
to
from myir.models import DependentPatientID
The reason being, you might have already done from django.db import models and the names are conflicting.
Now,
class DependentPatientIDManager( models.Manager ):
def get_patient_ids(self, dependent ):
dpid = self.model.objects.get( dependent = dependent.id )
return dpid
returns an object, and not a queryset. So, DependentPatientID.objects.get_patient_ids(d)[0] would fail.
So try this
if connectedOnly:
from myir.models import DependentPatientID
patient_id_integer = 0
dep_patient_id = DependentPatientID.objects.get_patient_ids(d)
if dep_patient_id:
patient_id_integer = dep_patient_id.patient_id_integer
results = [d for d in results if patient_id_integer == 0]
#Or just
if not patient_id_integer:
results = []
I made this statement using flask-sqlalchemy and I've chosen to keep it in its original form. Post.query is equivalent to session.query(Post)
I attempted to make a subquery that would filter out all posts in a database which are in the draft state and not made or modified by the current user. I made this query,
Post.query\
.filter(sqlalchemy.and_(
Post.post_status != Consts.PostStatuses["Draft"],
sqlalchemy.or_(
Post.modified_by_id == current_user.get_id(),
Post.created_by_id == current_user.get_id()))
which created:
Where true AND ("Post".modified_by_id = :modified_by_id_1 OR
"Post".created_by_id = :created_by_id_1)
Expected outcome:
Where "Post".post_status != "Draft" AND (
"Post".modified_by_id = :modified_by_id_1 OR
"Post".created_by_id = :created_by_id_1)
I'm wondering, why this is happening? How can I increase the error level in SQLAlchemy? I think my project is silently failing and I would like to confirm my guess.
Update:
I used the wrong constants dictionary. One dictionary contains ints, the other contains strings (one for data base queries, one for printing).
_post_status = db.Column(
db.SmallInteger,
default=Consts.post_status["Draft"])
post_status contains integers, Consts.PostStatuses contains strings. In hind sight, really bad idea. I'm going to make a single dictionary that returns a tuple instead of two dictionaries.
#property
def post_status(self):
return Consts.post_status.get(getattr(self, "_post_status", None))
the problem is that your post_status property isn't acceptable for usage in an ORM level query, as this is a python descriptor which at the class level by default returns itself:
from sqlalchemy import *
from sqlalchemy.orm import *
from sqlalchemy.ext.declarative import declarative_base
Base = declarative_base()
class A(Base):
__tablename__ = 'a'
id = Column(Integer, primary_key=True)
_post_status = Column(String)
#property
def post_status(self):
return self._post_status
print (A.post_status)
print (A.post_status != 5678)
output:
$ python test.py
<property object at 0x10165bd08>
True
the type of usage you're looking for seems like that of a hybrid attribute, which is a SQLAlchemy-included extension to a "regular" python descriptor which produces class-level behavior that's compatible with core SQL expressions:
from sqlalchemy.ext.hybrid import hybrid_property
class A(Base):
__tablename__ = 'a'
id = Column(Integer, primary_key=True)
_post_status = Column(String)
#hybrid_property
def post_status(self):
return self._post_status
print (A.post_status)
print (A.post_status != 5678)
output:
$ python test.py
A._post_status
a._post_status != :_post_status_1
be sure to read the hybrid doc carefully including how to establish the correct SQL expression behavior, descriptors that work both at the instance and class level is a somewhat advanced Python technique.