I've run into an issue after following the SqlAlchemy guide here.
Given the following simplified module:
class _Base():
id_ = Column(Integer, primary_key=True, autoincrement=True)
Base = declarative_base(cls=_Base)
class BlgMixin():
#declared_attr
def __table_args__(cls):
return {'schema': "belgarath_backup", "extend_existing": True}
class DataAccessLayer():
def __init__(self):
conn_string = "mysql+mysqlconnector://root:root#localhost/"
self.engine = create_engine(conn_string)
def create_session(self):
Base.metadata.create_all(self.engine)
Session = sessionmaker()
Session.configure(bind=self.engine)
self.session = Session()
class Player(Base, BlgMixin):
__tablename__ = "player"
name_ = Column(String(100))
match = relationship("MatchResult")
class MatchResult(Base, BlgMixin):
__tablename__ = "match_result"
p1_id = Column(Integer, ForeignKey(f"{BlgMixin.__table_args__.get('schema')}.player.id_"))
p2_id = Column(Integer, ForeignKey(f"{BlgMixin.__table_args__.get('schema')}.player.id_"))
p1 = relationship("Player", foreign_keys=f"{BlgMixin.__table_args__.get('schema')}.player.id_")
p2 = relationship("Player", foreign_keys=f"{BlgMixin.__table_args__.get('schema')}.player.id_")
That I am attempting to build a query using:
dal = DataAccessLayer()
dal.create_session()
player_1 = aliased(Player)
player_2 = aliased(Player)
matches = dal.session.query(MatchResult.p1_id, player_1.name_, MatchResult.p2_id, player_2.name_)
matches = matches.join(player_1)
matches = matches.join(player_2)
Why am I getting the following error?
Could not determine join condition between parent/child tables on relationship Player.match - there are multiple foreign key paths linking the tables. Specify the 'foreign_keys' argument, providing a list of those columns which should be counted as containing a foreign key reference to the parent table.
I was pretty sure I'd specified the two foreign key relationships?
Update:
I've tried the following combination as I think has been suggested in the comments but got the same error:
p1 = relationship("Player", foreign_keys=[p1_id])
p2 = relationship("Player", foreign_keys=[p2_id])
Update 2:
Added some details on what the output should look like:
player table:
+-----+-------+
| id_ | name_ |
+-----+-------+
| 1 | foo |
| 2 | bar |
| 3 | baz |
| 4 | zoo |
+-----+-------+
match_result table:
+-----+-------+-------+
| id_ | p1_id | p2_id |
+-----+-------+-------+
| 1 | 1 | 2 |
| 2 | 2 | 1 |
| 3 | 3 | 1 |
| 4 | 1 | 4 |
+-----+-------+-------+
Query output:
+-------+---------+-------+---------+
| p1_id | p1_name | p2_id | p2_name |
+-------+---------+-------+---------+
| 1 | foo | 2 | bar |
| 2 | bar | 1 | foo |
| 3 | baz | 1 | foo |
| 1 | foo | 4 | zoo |
+-------+---------+-------+---------+
The two-way relationship and multiple join paths prevent SQLAlchemy from automatically determining the joins, and the relationships in both tables emit very similar error messages makes it difficult to understand where the problems lie (and whether a given change makes any progress in solving them). I found the simplest approach was to comment out the relationship in Player until MatchResult was working properly.
The changes to MatchResult are the same as those specified in the multiple join paths docs referenced in the question. To get the relationship in Player to work I specified the primary join condition so that SQLAlchemy could determine how to join to MatchResult.
class Player(Base):
__tablename__ = 'player'
id = sa.Column(sa.Integer, primary_key=True)
name = sa.Column(sa.String(100))
matches = orm.relationship('MatchResult',
primaryjoin="or_(Player.id == MatchResult.p1_id, Player.id == MatchResult.p2_id)")
class MatchResult(Base):
__tablename__ = 'match_result'
id = sa.Column(sa.Integer, primary_key=True)
p1_id = sa.Column(sa.Integer, sa.ForeignKey('player.id'))
p2_id = sa.Column(sa.Integer, sa.ForeignKey('player.id'))
p1 = orm.relationship("Player", foreign_keys=[p1_id])
p2 = orm.relationship("Player", foreign_keys=[p2_id])
Once these changes have been made, basic querying can be done without any explicit aliasing or joins.
ms = session.query(MatchResult)
for r in ms:
print(r.p1_id, r.p1.name, r.p2_id, r.p2.name)
p1 = session.query(Player).filter(Player.name == 'bar').one()
for m in p1.matches:
print(m.p1.name, m.p2.name)
The above code, for clarity and usefulness to other readers, does not include the inheritance, mixin and session management code that is specific to the OP's application. Thiis version includes all of these.
import sqlalchemy as sa
from sqlalchemy.ext.declarative import declarative_base, declared_attr
from sqlalchemy import orm
class _Base():
id_ = sa.Column(sa.Integer, primary_key=True, autoincrement=True)
Base = declarative_base(cls=_Base)
class BlgMixin():
#declared_attr
def __table_args__(cls):
return {'schema': "belgarath_backup", "extend_existing": True}
class DataAccessLayer():
def __init__(self):
conn_string = "mysql+mysqlconnector://root:root#localhost/"
self.engine = sa.create_engine(conn_string)
def create_session(self):
Base.metadata.create_all(self.engine)
Session = orm.sessionmaker()
Session.configure(bind=self.engine)
self.session = Session()
class Player(Base, BlgMixin):
__tablename__ = 'player'
name = sa.Column(sa.String(100))
match = orm.relationship('MatchResult',
primaryjoin="or_(Player.id_ == MatchResult.p1_id, Player.id_ == MatchResult.p2_id)")
class MatchResult(Base, BlgMixin):
__tablename__ = 'match_result'
p1_id = sa.Column(sa.Integer, sa.ForeignKey(f'{BlgMixin.__table_args__.get("schema")}.player.id_'))
p2_id = sa.Column(sa.Integer, sa.ForeignKey(f'{BlgMixin.__table_args__.get("schema")}.player.id_'))
p1 = orm.relationship("Player", foreign_keys=[p1_id])
p2 = orm.relationship("Player", foreign_keys=[p2_id])
dal = DataAccessLayer()
Base.metadata.drop_all(bind=dal.engine)
Base.metadata.create_all(bind=dal.engine)
names = ['foo', 'bar', 'baz', 'zoo']
dal.create_session()
ps = [Player(name=n) for n in names]
dal.session.add_all(ps)
dal.session.flush()
p1, p2, p3, p4 = ps
m1 = MatchResult(p1_id=p1.id_, p2_id=p2.id_)
m2 = MatchResult(p1_id=p2.id_, p2_id=p1.id_)
m3 = MatchResult(p1_id=p3.id_, p2_id=p1.id_)
m4 = MatchResult(p1_id=p1.id_, p2_id=p4.id_)
dal.session.add_all([m1, m2, m3, m4])
dal.session.commit()
ms = dal.session.query(MatchResult)
for r in ms:
print(r.p1_id, r.p1.name, r.p2_id, r.p2.name)
print()
p1 = dal.session.query(Player).filter(Player.name == 'bar').one()
for m in p1.match:
print(m.p1.name, m.p2.name)
dal.session.close()
The issue is with the definition of this relationship match = relationship("MatchResult") for the Player class. If you completely remove this line, and use the below definitions for the relationships, all the queries you mentioned should work as expected:
class Player(Base, BlgMixin):
__tablename__ = "player"
name_ = Column(String(100))
class MatchResult(Base, BlgMixin):
__tablename__ = "match_result"
p1_id = Column(ForeignKey(Player.id_))
p2_id = Column(ForeignKey(Player.id_))
p1 = relationship(Player, foreign_keys=p1_id)
p2 = relationship(Player, foreign_keys=p2_id)
In fact, the desired select query can also be constructed, but you need to specify the relationships explicitly on JOINs:
player_1 = aliased(Player)
player_2 = aliased(Player)
q = (
dal.session
.query(
MatchResult.p1_id,
player_1.name_,
MatchResult.p2_id,
player_2.name_,
)
.join(player_1, MatchResult.p1) # explicitly specify which relationship/FK to join on
.join(player_2, MatchResult.p2) # explicitly specify which relationship/FK to join on
)
I would, however, make few more changes to the model to make it even more user-friednly:
add backref to the relationship so that it can be navigated back from the Player
add a property to show all the matches of one player for both sides
Model definitions:
class Player(Base, BlgMixin):
__tablename__ = "player"
name_ = Column(String(100))
#property
def all_matches(self):
return self.matches_home + self.matches_away
class MatchResult(Base, BlgMixin):
__tablename__ = "match_result"
p1_id = Column(ForeignKey(Player.id_))
p2_id = Column(ForeignKey(Player.id_))
p1 = relationship(Player, foreign_keys=p1_id, backref="matches_home")
p2 = relationship(Player, foreign_keys=p2_id, backref="matches_away")
This will allow navigating the relationships as per below example:
p1 = session.query(Player).get(1)
print(p1)
for match in p1.all_matches:
print(" ", match)
Related
I work with Rolls of plastic film in different legnth and width. And I'm creating a Database to store all the orders, and, in order to avoid repetition, I created separate tables for length(class(Comprimento)) and width(class(Largura)). I used UUID to create distinct ID's.
Now, I want to cross both tables in a Model class. Which is:
class Largura(Base):
__tablename__ = 'largura'
id = Column(GUID(), primary_key=True, default=lambda: str(uuid.uuid4()))
largura = Column(String)
modelos_l = relationship('Modelo', back_populates='larguras', cascade='all, delete')
def __repr__(self):
return f"<Largura {self.largura}>"
class Comprimento(Base):
__tablename__ = 'comprimento'
id = Column(GUID(), primary_key=True, default=lambda: str(uuid.uuid4()))
comprimento = Column(String)
modelos_c = relationship('Modelo', back_populates='comprimentos', cascade='all, delete')
def __repr__(self):
return f"<Comprimento {self.comprimento}>"
class Modelo(Base):
__tablename__ = 'modelo'
id = Column(GUID(), primary_key=True, default=lambda: str(uuid.uuid4()))
descricao = Column(String(50))
largura_id = Column(GUID(), ForeignKey("largura.id"), default=lambda: str(uuid.uuid4()))
comprimento_id = Column(GUID(), ForeignKey("comprimento.id"), default=lambda: str(uuid.uuid4()))
larguras = relationship('Largura', back_populates='modelos_l')
comprimentos = relationship('Comprimento', back_populates='modelos_c')
def __repr__(self):
return f"<Modelo {self.id}>"
Then, i created a file dedicated to my data insert on this table:
from DBModelPy3 import Comprimento,Largura,Modelo,session
from sqlalchemy import create_engine
import pandas as pd
#Pre Loading my CSV file
df = pd.read_csv("dataorged.csv", sep=',')
pd.set_option('display.float_format','{:.0f}'.format) #change the number format to hide the ','
cnx = create_engine('sqlite:///data_hub2.db', echo=True).connect()
df_modelo = df[['larg_ajustada', 'comp']] # My dataframe that contains the orders. I chose the specifics columns needed for this insertion.
#print(df_modelo)
# Loading the Tables from my database
df_largura = pd.read_sql_table('largura', cnx)
df_comprimento = pd.read_sql_table('comprimento', cnx)
With everything loaded I decided to combine all the legnths and widths i had already on my two tables (df_largura and df_comprimento), and then filtered using the original file which contains the orders.
# COMBINING ALL THE LENGTH AND WIDTH OF MY TABLES
model_num = []
for n_larg in range(len(df_largura)):
db_larg = str(df_largura['largura'][n_larg])
for n_comp in range(len(df_comprimento)):
db_comp = df_comprimento['comprimento'][n_comp]
combined = str(db_larg) + "x" + str(db_comp)
model_num.append([db_larg,db_comp,combined])
df_modelos_ex = pd.DataFrame(model_num)
df_modelos_ex.columns = ['larg','comp','combined']
With these, i had all possible combinations on my dataframe.
And created the combined variable to match later
modelos_existentes = []
# COMBINATIONS THAT APPEAR IN THE ORDER DATAFRAME #
for item in range(len(df_modelo)):
mod_larg = df_modelo['larg_ajustada'][item]
mod_comp = df_modelo['comp'][item]
mod_comb = str(mod_larg) + "x" + str(mod_comp)
modelos_existentes.append([mod_larg,mod_comp,mod_comb])
df_mod_existentes = pd.DataFrame(modelos_existentes)
df_mod_existentes.columns = ['ex_larg','ex_comp','ex_comb']
df_limpo = df_mod_existentes.drop_duplicates(subset=['ex_comb'])
df_limpo.reset_index(drop=True, inplace=True)
With all my elements, then the madness began.
I started a loop to run through all my Dataframes:
for l_row in range(len(df_limpo)): # For Each Row in my dataframe which contains the orders,
larg = df_limpo['ex_larg'][l_row] # create variable for width
comp = df_limpo['ex_comp'][l_row] # create variable for lenght
comb = df_limpo['ex_comb'][l_row] # create variable for combination of both
for n_row in range(len(df_largura)): # For each row in my width table from DB,
db_larg_id = df_largura['id'][n_row] # I create a Variable for the PK from width
db_larg_largura = df_largura['largura'][n_row] # Create a Variable with the value
lar = session.query(Largura).filter(Largura.id == db_larg_id).first()
if db_larg_largura == larg: # If the value on my table matches the value of the row in the order,
for m_row in range(len(df_comprimento)): # For each length in my table on the DB,
db_comp_id = df_comprimento['id'][m_row]
db_comp_comprimento = df_comprimento['comprimento'][m_row]
compr = session.query(Comprimento).filter(Comprimento.id == db_comp_id).first()
if db_comp_comprimento == comp: # If the value on my table matches the value of the row in the order
new_model = Modelo(descricao=df_limpo['ex_comb'][n_linha], larguras=lar, comprimentos=compr)
from here, i would only add the session.add(new_model) and session.commit() to finish my code.
But it's not adding.
What I would like is for my Modelo table be like:
MODELO Table
ID(PK) | DESCRIPTION (Combined values String) | Largura_id (width_id, FK) | Comprimento_id (length_id, FK)
Sorry about the long explanation. Tried my best!
If anyone have the same trouble:
##########################
# ADDING TO THE DATABANK #
##########################
lista_a = [] #Created an empty List
for n_linha in range(len(df_limpo)): #Ran through my dataframe
larg_a = df_limpo['ex_larg'][n_linha] #Extracted width and length from it
comp_a = df_limpo['ex_comp'][n_linha]
for m_linha in range(len(df_largura)): #Ran my width table from database
db_larg_id = df_largura['id'][m_linha]
db_larg_largura = df_largura['largura'][m_linha]
if larg_a == db_larg_largura: #Checked if the width from my dataframe matches the one on the table
lista_a.append([larg_a,comp_a,db_larg_id]) #appended to the list_a
#print(lista_a)
df_lista_a = pd.DataFrame(lista_a) #Created a new Dataframe
df_lista_a.columns = ['larg','comp','id_larg']
lista_b = [] #Created a new list
for n_row in range(len(df_lista_a)): #Ran through my new dataframe
larg_b = df_lista_a['larg'][n_row] #Extracted each column from it
comp_b = df_lista_a['comp'][n_row]
larg_b_id = df_lista_a['id_larg'][n_row]
#df_limpo_lrow = df_limpo['ex_larg'][n_row]
#df_limpo_crow = df_limpo['ex_comp'][n_row]
#df_limpo_cbrow = df_limpo['ex_comb'][n_row]
#print(larg_b,comp_b,larg_b_id,n_row)
for m_row in range(len(df_comprimento)): #Ran through my lenght table
db_comp_id = df_comprimento['id'][m_row]
db_comp_comprimento = df_comprimento['comprimento'][m_row]
if comp_b == db_comp_comprimento: #Check if the lenght from dataframe matches the lenght on my table on the database
#print(larg_b,comp_b,n_row,m_row,df_limpo_lrow)
lista_b.append([larg_b,comp_b,larg_b_id,db_comp_id]) #appended the lenght id to my list
break
#print(lista_b)
#print(len(df_lista_a),len(df_limpo),len(lista_b))
df_lista_b = pd.DataFrame(lista_b) #converted to Dataframe.
df_lista_b.columns = ['larg','comp','id_larg','id_comp']
# HERE's the ACTUAL INSERTION
for n_model in range(len(df_lista_b)): #For each model found on the list, extract the values and add to new_model.
mod_largura = df_lista_b['larg'][n_model]
mod_comprimento = df_lista_b['comp'][n_model]
mod_largura_id = df_lista_b['id_larg'][n_model]
mod_comprimento_id = df_lista_b['id_comp'][n_model]
lar = session.query(Largura).filter(Largura.id == df_largura['id'][1]).first()
compr = session.query(Comprimento).filter(Comprimento.id == df_comprimento['id'][1]).first()
new_model = Modelo(descricao=df_limpo['ex_comb'][n_model], larguras=lar, comprimentos=compr)
print("Modelo: " + df_limpo['ex_comb'][n_model] + " com Id's " + mod_largura_id + " e " + mod_comprimento_id + " adicionados!")
session.add(new_model)
session.commit()
Then it's done.
I have the same question/problem than this post -> peewee - modify db model meta (e.g. schema) dynamically . I want to change the schema field in my Meta class dynamically. This is my code:
class GPSPosition(Model):
def __init__(self, esquema, vehiculo, fechaFrom):
self.esquema = esquema + '_org'
self.vehiculo = vehiculo
self.fechaFrom = fechaFrom
orgid = BigIntegerField()
id = BigIntegerField()
vehicleid = BigIntegerField()
driverid = BigIntegerField()
originaldriverid = BigIntegerField(null=False)
blockseq = IntegerField(null=False)
time = DateTimeField(null=False)
latitude = FloatField(null=False)
longitude = FloatField(null=False)
altitude = SmallIntegerField(null=False)
heading = SmallIntegerField(null=False)
satellites = SmallIntegerField(null=False)
hdop = FloatField(null=False)#float
ageofreading = IntegerField(null=False)
distancesincereading = IntegerField(null=False)
velocity = FloatField(null=False)
isavl = BooleanField(null=False)
coordvalid = BooleanField(null=False)
speedkilometresperhour = DecimalField(null=False)
speedlimit = DecimalField(null=False)
vdop = SmallIntegerField(null=False)
pdop = SmallIntegerField(null=False)
odometerkilometres = DecimalField(null=False)
formattedaddress = CharField(null=False)
source = CharField(null=False)
class Meta:
database = db
schema = esquema
db_table = 'test_gpspositions'
primary_key = CompositeKey("orgid", "id")
Can someone please show me the light about this? Thanks!
Well I'll answer my own question since I found the answer time ago and it's very simple, just add this 1-2 lines at the point you want to change the schema name:
schemaname = 'your_schema_name'
setattr(YourPeeweeModel._meta, "schema", schemaname)
Works fine.
I am learning SQLAlchemy of Python.
Below is an example I am useing.
First I generate a datafile contains puppy information like below:
class Puppy(Base):
__tablename__ = 'puppy'
id = Column(Integer, primary_key=True)
name = Column(String(250), nullable=False)
gender = Column(String(6), nullable = False)
dateOfBirth = Column(Date)
shelter_id = Column(Integer, ForeignKey('shelter.id'))
weight = Column(Numeric(10))
male_names = ["Bailey", "Max", ...just some names..., "Luke", "Henry"]
female_names = ['Bella', 'Lucy', ...just some names..., 'Honey', 'Dakota']
def CreateRandomAge():
today = datetime.today()
days_old = randint(0,540)
birthday = today - timedelta(days = days_old)
return birthday
def CreateRandomWeight():
return random.uniform(1.0, 40.0)
for i,x in enumerate(male_names):
new_puppy = Puppy(name = x, gender = "male", dateOfBirth = CreateRandomAge(), weight= CreateRandomWeight())
session.add(new_puppy)
session.commit()
for i,x in enumerate(female_names):
new_puppy = Puppy(name = x, gender = "female", dateOfBirth = CreateRandomAge(), weight= CreateRandomWeight())
session.add(new_puppy)
session.commit()
Now I want to filter some kinds of puppies as below:
testpuppy = session.query(Puppy).filter_by(name='Lucy')
print(testpuppy)
birthdate = datetime.today() - timedelta(days=180)
smallpuppy = session.query(Puppy).filter_by(dateOfBirth < birthdate)
print(smallpuppy)
Then it is strange, because the testpuppy passed, I can get Lucy, but the dateofBirth can not pass, every time I want to get these smallpuppies, I just got an error
NameError: name 'dateOfBirth' is not defined
I really can not understand, why my filter can only be operated on some attribute, where is wrong?
The problem is that you need to use filter instead of filter_by like this:
smallpuppy = session.query(Puppy).filter(Puppy.dateOfBirth < birthdate)
For filter, the criterion should use ClassName.propertyName to access the column, and you can use < or >.
For filter_by, the criterion could be use propertyName directly to access the column, but you cannot use < or >.
Please refer to this answer, it will give you more details about the difference between filter and filter_by.
I'm using SQLalchemy to define my tables. These tables describe seismic events, which are arranged in Events, Origin, Magnitude, Real_Quantity and Time_Quantity. They are well to follow the standard of QuakeML. The Event table is relationship with Origin through .preferredOriginID and .publicID, Origin is relationship with Real_Quantity through .latitude_id and .id.
I want to find all longitudes and latitudes that are within a specified radius, but the problem is that both latitude and longitude are in the same Real_Quantity column and the Origin table is where specify which are different.
This is the code that I want to implement, but it is in MySQL
SELECT
id,
(
acos(
(
cos(radians(37))
* cos(radians(lat))
* cos(radians(lng) - radians(-122))
)
+ (
sin(radians(37))
* sin(radians(lat))
)
) * 3959
) AS distance
FROM markers
HAVING distance < 25
ORDER BY distance
LIMIT 0, 20;
This is what I did, but only you can use the latitudes and I want to use the latitudes with longitudes
z = self.session.query(Event) \
.join(Origin) \
.join(RealQuantity, Origin.latitude) \
.filter(
Event.preferredOriginID == Origin.publicID,
RealQuantity.id == Origin.latitude_id
) \
.group_by(Event, Origin.latitude, RealQuantity.value) \
.having(func.cos(RealQuantity.value) < 50)
Event:
id| publicID | preferredOriginID | preferredMagnitudeID | type |....
Origin:
id| publicID | time_id |latitude_id | longitude_id | depth_id |...
Real_Quantity:
id| value | ....
The Origin is just pointers, the values of this are in Real_Quantity
My models are:
class Event(Base):
__tablename__ = 'event'
id = Column(Integer, primary_key=True)
publicID = Column(String)
preferredOriginID = Column(String)
preferredMagnitudeID = Column(String)
type = Column(String)
typeCertainty = Column(String)
creationInfo_id = Column(Integer, ForeignKey('creation_info.id'))
creationInfo = relationship(CreationInfo, backref=backref('event', uselist=False))
class Origin(Base):
__tablename__ = 'origin'
id = Column(Integer, primary_key=True)
publicID = Column(String)
time_id = Column(Integer, ForeignKey('time_quantity.id'))
time = relationship(TimeQuantity, backref=backref('origin', uselist=False))
latitude_id = Column(Integer, ForeignKey('real_quantity.id'))
latitude = relationship(RealQuantity, foreign_keys=[latitude_id]
, backref=backref('origin_lat', uselist=False))
longitude_id = Column(Integer, ForeignKey('real_quantity.id'))
longitude = relationship(RealQuantity, foreign_keys=[longitude_id]
, backref=backref('origin_lon', uselist=False))
depth_id = Column(Integer, ForeignKey('real_quantity.id'))
depth = relationship(RealQuantity, foreign_keys=[depth_id],
backref=backref('origin_depth', uselist=False))
creationInfo_id = Column(Integer, ForeignKey('creation_info.id'))
creationInfo = relationship(CreationInfo, backref=backref('origin', uselist=False))
event_id = Column(Integer, ForeignKey('event.id'))
event = relationship('Event', backref=backref('origin', uselist=True))
class RealQuantity(Base):
__tablename__ = 'real_quantity'
id = Column(Integer, primary_key=True)
value = Column(Float)
uncertainty = Column(Float)
lowerUncertainty = Column(Float)
upperUncertainty = Column(Float)
confidenceLevel = Column(Float)
Not a solution (yet), just some comments:
For every query, you are doing a complex calculation on every entry in the Origin table. As the number of entries increases, this will become very slow (computationally expensive).
Think of a circle (x=lon, y=lat, r=distance) projected on the globe. You can calculate min and max latitude easily; min and max longitude can also be done, although the math is quite a bit trickier.
If you have properly indexed the Origin table by latitude and longitude, you can do a very fast (computationally cheap) initial box-select on min_lat <= lat <= max_lat and min_lon <= lon <= max_lon which should trivially discard 99% of the entries (depending on radius and clustery-ness of the Origin points); remaining entries should have roughly an 80% chance of belonging to your desired data-set, and you only need to run the expensive calculation on the remaining entries.
I would strongly recommend writing this as a stored procedure.
I am using SQLAlchemy with the ORM paragdim. I don't manage to find a way to do a CASE WHEN instruction. I don't find info about this on the web.
Is it possible ?
See sqlalchemy.sql.expression.case function and more examples on the documentation page. But it would look like this (verbatim from the documentation linked to):
case([(orderline.c.qty > 100, item.c.specialprice),
(orderline.c.qty > 10, item.c.bulkprice)
], else_=item.c.regularprice)
case(value=emp.c.type, whens={
'engineer': emp.c.salary * 1.1,
'manager': emp.c.salary * 3,
})
edit-1: (answering the comment) Sure you can, see example below:
class User(Base):
__tablename__ = 'users'
id = Column(Integer, primary_key=True, autoincrement=True)
first_name = Column(String)
last_name = Column(String)
xpr = case([(User.first_name != None, User.first_name + " " + User.last_name),],
else_ = User.last_name).label("full_name")
qry = session.query(User.id, xpr)
for _usr in qry:
print _usr.fullname
Also see Using a hybrid for an example of case used in the hybrid properties.
I got this to work with an aggregate function, in this case func.sum
My Example Code
from sqlalchemy import func, case
my_case_stmt = case(
[
(MyTable.hit_type.in_(['easy', 'medium']), 1),
(MyTable.hit_type == 'hard', 3)
]
)
score = db.session.query(
func.sum(my_case_stmt)
).filter(
MyTable.success == 1
)
return score.scalar()
My Use Case
MyTable looks like this:
| hit_type | success |
-----------------------------
| easy | 1 |
| medium | 1 |
| easy | 0 |
| hard | 1 |
| easy | 0 |
| easy | 1 |
| medium | 1 |
| hard | 1 |
score is computed as such:
score = num_easy_hits + num_medium_hits + (3 * num_hard_hits)
4 successful easy/medium hits and 2 successful hard hits gives you (4 + (2*3)) = 10
Here is the link in the doc:
http://docs.sqlalchemy.org/en/latest/core/sqlelement.html?highlight=case#sqlalchemy.sql.expression.Case
but it confused me to see those examples, and there is no runnable code.
I have try many times, and I have met many kinds of problem.
Finally, I found two ways to implement "Case when" within sqlalchemy.
The first way:
By the way, my occasion is I need to mask the phone field depending on if the user has logged in.
#staticmethod
def requirement_list_common_query(user=None):
`enter code here` phone_mask = case(
[
(db.true() if user else db.false(), Requirement.temp_phone),
],
else_=func.concat(func.left(Requirement.temp_phone, 3), '****', func.right(Requirement.temp_phone, 4))
).label('temp_phone')
query = db.session.query(Requirement.company_id,
Company.uuid.label('company_uuid'),
Company.name.label('company_name'),
Requirement.uuid,
Requirement.title,
Requirement.content,
Requirement.level,
Requirement.created_at,
Requirement.published_at,
Requirement.end_at,
Requirement.status,
# Requirement.temp_phone,
phone_mask,
User.name.label('user_name'),
User.uuid.label('user_uuid')
)
query = query.join(Company, Company.id == Requirement.company_id) \
.join(User, User.id == Requirement.user_id)
return query
Requirement is my one of my models.
the user argument in the method 'requirement_list_common_query' is the logged-in user if the user has logged in.
the second way:
the occasion here is I want to classify the employees depend on their income.
the models are:
class Dept(Base):
__tablename__ = 'dept'
deptno = Column(Integer, primary_key=True)
dname = Column(String(14))
loc = Column(String(13))
def __repr__(self):
return str({
'deptno': self.deptno,
'dname': self.dname,
'loc': self.loc
})
class Emp(Base):
__tablename__ = 'emp'
empno = Column(Integer, primary_key=True)
ename = Column(String(10))
job = Column(String(9))
mgr = Column(Integer)
hiredate = Column(Date)
sal = Column(DECIMAL(7, 2))
comm = Column(DECIMAL(7, 2))
deptno = Column(Integer, ForeignKey('dept.deptno'))
def __repr__(self):
return str({
'empno': self.empno,
'ename': self.ename,
'job': self.job,
'deptno': self.deptno,
'comm': self.comm
})
Here is the code:
from sqlalchemy import text
income_level = case(
[
(text('(emp.sal + ifnull(emp.comm,0))<1500'), 'LOW_INCOME'),
(text('1500<=(emp.sal + ifnull(emp.comm,0))<3500'), 'MIDDLE_INCOME'),
(text('(emp.sal + ifnull(emp.comm,0))>=3500'), 'HIGH_INCOME'),
], else_='UNKNOWN'
).label('income_level')
emps = sess.query(Emp.ename, label('income', Emp.sal + func.ifnull(Emp.comm, 0)),
income_level).all()
for item in emps:
print(item.ename, item.income, item.income_level)
why did I use "text"? Because code like this in SQLAlchemy 1.2.8 can't be implemented. I have tried so long and I can't find way like this, as #van has said:
case([(orderline.c.qty > 100, item.c.specialprice),
(orderline.c.qty > 10, item.c.bulkprice)
], else_=item.c.regularprice)
case(value=emp.c.type, whens={
'engineer': emp.c.salary * 1.1,
'manager': emp.c.salary * 3,
})
hopes it will help!