peewee check automatically created id not in result of subquery - python

I have next data structure:
from enum import IntEnum, unique
from pathlib import Path
from datetime import datetime
from peewee import *
#unique
class Status(IntEnum):
CREATED = 0
FAIL = -1
SUCCESS = 1
db_path = Path(__file__).parent / "test.sqlite"
database = SqliteDatabase(db_path)
class BaseModel(Model):
class Meta:
database = database
class Unit(BaseModel):
name = TextField(unique=True)
some_field = TextField(null=True)
created_at = DateTimeField(default=datetime.now)
class Campaign(BaseModel):
id_ = AutoField()
created_at = DateTimeField(default=datetime.now)
class Task(BaseModel):
id_ = AutoField()
status = IntegerField(default=Status.CREATED)
unit = ForeignKeyField(Unit, backref="tasks")
campaign = ForeignKeyField(Campaign, backref="tasks")
Next code create units, campaign and tasks:
def fill_units(count):
units = []
with database.atomic():
for i in range(count):
units.append(Unit.create(name=f"unit{i}"))
return units
def init_campaign(count):
units = Unit.select().limit(count)
with database.atomic():
campaign = Campaign.create()
for unit in units:
Task.create(unit=unit, campaign=campaign)
return campaign
The problem appears when I'm trying to add more units into existing campaign. I need to select units which haven't been used in this campaign. In SQL I can do this using next query:
SELECT * FROM unit WHERE id NOT IN (SELECT unit_id FROM task WHERE campaign_id = 1) LIMIT 10
But how to do this using peewee?
The only way I've found yet is:
def get_new_units_for_campaign(campaign, count):
unit_names = [task.unit.name for task in campaign.tasks]
units = Unit.select().where(Unit.name.not_in(unit_names)).limit(count)
return units
It's somehow works but I'm 100% sure that it's the dumbest way to implement this. Could you show me the proper way to implement this?

Finally I found this:
Unit.select().where(Unit.id.not_in(campaign.tasks.select(Task.unit))).limit(10)
Which produces
SELECT "t1"."id", "t1"."name", "t1"."some_field", "t1"."created_at" FROM "unit" AS "t1" WHERE ("t1"."id" NOT IN (SELECT "t2"."unit_id" FROM "task" AS "t2" WHERE ("t2"."campaign_id" = 1))) LIMIT 10
Which matches with SQL query I've provided in my question.
P.S. I've done some research and it seems to be a proper implementation, but I'd appreciate if somebody correct me and show the better way (if exist).

Related

How to query additional databases using cursor in Django Pytests

I am developing a Django app (Django v3.2.10, pytest v7.0.1, pytest-django v4.5.2) which uses cursor to perform raw queries to my secondary DB: my_db2, but when running tests, all the queries return empty results, like if they were running on parallel transactions.
My test file:
#pytest.mark.django_db(transaction=True, databases=['default', 'my_db2'])
class TestItems:
def test_people(self):
person1 = PeopleFactory() # Adds 1 person to my_db2
assert fetch_all_persons() == 1 # Fails Returns 0
My Factory:
class PeopleFactory(factory.django.DjangoModelFactory):
id = factory.Sequence(lambda x: x + 1)
name = factory.Faker('first_name')
class Meta:
model = People
My function:
from django.db import connections
def fetch_all_persons():
with connections['my_db2'].cursor() as cursor:
cursor.execute(f"SELECT * FROM Persons")
return len(list(cursor.fetchall())):
According documentation transaction=True should prevent this issue, but it doesn't, does somebody know how to fix it?
Note.- Using the ORM is not an option, this is just a simplified example to represent the issue. The real queries used are way more complex.
#hoefling and #Arkadiusz Ɓukasiewicz were right, I just needed to add the corresponding DB within the factories:
class PeopleFactory(factory.django.DjangoModelFactory):
id = factory.Sequence(lambda x: x + 1)
name = factory.Faker('first_name')
class Meta:
model = People
database = 'my_db2'
Thank you both.

Sorting time column derived from values of other columns using hybrid_property

I'm trying to build an event-booking system as a side project to learn python and web development. Below are two of the models implemented in my project. An EventSlot represents a timeslot scheduled for a particular Event.
Models
from app import db
from sqlalchemy import ForeignKey
from dateutil.parser import parse
from datetime import timedelta
from sqlalchemy.ext.hybrid import hybrid_property
class Event(db.Model):
event_id = db.Column(db.Integer, primary_key=True)
title = db.Column(db.String, index=True, nullable=False)
duration = db.Column(db.Float, nullable=False)
price = db.Column(db.Float, nullable=False)
slots = db.relationship('EventSlot', cascade='all, delete', back_populates='event')
class EventSlot(db.Model):
slot_id = db.Column(db.Integer, primary_key=True)
event_date = db.Column(db.DateTime, nullable=False)
event_id = db.Column(db.Integer, ForeignKey('event.event_id'))
event = db.relationship('Event', back_populates='slots')
I've provided an admin page (Flask-Admin) for admin-users to view database records. On the EventSlot page, I included 'Start Time' and 'End Time' column which I want to make sortable. I've appended to the EventSlot model the following:
class EventSlot(db.Model):
#...
## working as intended ##
#hybrid_property
def start_time(self):
dt = parse(str(self.event_date))
return dt.time().strftime('%I:%M %p')
#start_time.expression
def start_time(cls):
return db.func.time(cls.event_date)
## DOES NOT WORK: can display derived time, but sorting is incorrect ##
#hybrid_property
def end_time(self):
rec = Event.query.filter(Event.event_id == self.event_id).first()
duration = rec.duration * 60
derived_time = self.event_date + timedelta(minutes=duration)
dt = parse(str(derived_time))
return dt.time().strftime('%I:%M %p')
#end_time.expression
def end_time(cls):
rec = Event.query.filter(Event.event_id == cls.event_id).first()
duration = '+' + str(int(rec.duration * 60)) + ' minutes'
return db.func.time(cls.event_date, duration)
As can be seen from the image below, the sort order is wrong when I sort by 'end time'. It appears to be still sorting by start time. What might be the problem here?
(Admittedly, I still don't understand hybrid_properties. I thought I had got it when got start_time working, but now it seems I still don't understand a thing...)
In the expression for end_time the cls.event_id represents a column, not a value, so the query ends up performing an implicit join between Event and EventSlot and picks the first result of that join. This of course is not what you want, but instead for an EventSlot you want to find out the duration of the related Event in SQL. This seems like a good place to use a correlated scalar subquery:
#end_time.expression
def end_time(cls):
# Get the duration of the related Event
ev_duration = Event.query.\
with_entities(Event.duration * 60).\
filter(Event.event_id == cls.event_id).\
as_scalar()
# This will form a string concatenation SQL expression, binding the strings as
# parameters to the query.
duration = '+' + ev_duration.cast(db.String) + ' minutes'
return db.func.time(cls.event_date, duration)
Note that the query is not run when the attribute is accessed in query context, but becomes a part of the parent query.

Subquery in django ORM

I have a table with next columns:
key
time
value
And I need to have a query like that:
SELECT
"time",
SUM("value")
FROM (
SELECT
"key",
django_trunc_datetime("time"),
AVG("value")
FROM my_table
GROUP BY "key", django_trunc_datetime("time")
)
GROUP BY "time"
Is it possible in Django ORM? Maybe with some fake model based on the subquery?
Thanks
UPDATED:
Looks like I have to create five database views (because there are Hour/Day/Week/Month/Year arguments for the django_trunc_datetime) but it can have a bad performance because in this case, I can't do the previous filtering. :(
I also thought about SQLAlchemy but it doesn't have universal datetime truncate function
SOLUTION
The solution with DjangoORM (not completed solution but illustrate the idea)
class TheApp(models.Model):
a = models.DateTimeField()
b = models.IntegerField()
class B(models.Model):
class Meta:
managed = False
c = models.DateTimeField()
d = models.IntegerField()
TheApp.objects.create(a=datetime.now(), b=4)
TheApp.objects.create(a=datetime.now(), b=5)
TheApp.objects.create(a=datetime.now(), b=7)
q1 = TheApp.objects.annotate(c=F('b'), d=Max('a')).values('c', 'd', 'id').query
q1.group_by = ('c',)
q2 = B.objects.annotate(a=F('c') * 2, b=Max('d')).values('a', 'b', 'id').query
q2.group_by = ('a',)
q3 = str(q2).replace('theapp_b', 'sub').replace('FROM "sub" ', f'FROM ({q1}) AS "sub" ')
print(q3)
print(list(B.objects.raw(q3)))
The solution I have chosen:
Use SQLAlchemy via aldjemy

Use cassandra inbuild `now()` function to generate TimeUUID with Model in Python driver

I have code as
import time
from uuid import uuid4
import cassandra
from cassandra.cqlengine.models import Model
from cassandra.cqlengine.query import BatchQuery
from cassandra.cqlengine import columns, connection
from cassandra.cqlengine.management import sync_table
class StudentModel(Model):
__table_name__ = 'student'
id = columns.UUID(primary_key=True, default=uuid4)
created_timestamp = columns.TimeUUID(primary_key=True,
clustering_order='DESC',
default=cassandra.util.uuid_from_time(time.time()))
name = columns.Text(required=True, default='')
class ClassRoomModel(Model):
__table_name__ = 'class_room'
id = columns.UUID(primary_key=True, default=uuid4)
created_timestamp = columns.TimeUUID(primary_key=True,
clustering_order='DESC',
default=cassandra.util.uuid_from_time(time.time()))
name = columns.Text(required=True, default='')
class StudentToClass(Model):
__table_name__ = 'student_to_class_mapping'
class_room_id = columns.UUID(primary_key=True)
created_timestamp = \
columns.TimeUUID(primary_key=True,
clustering_order='DESC',
default=cassandra.util.uuid_from_time(time.time()))
student_id = columns.UUID()
class ClassToStudent(Model):
__table_name__ = 'class_to_student_mapping'
student_id = columns.UUID(primary_key=True)
created_timestamp = \
columns.TimeUUID(primary_key=True,
clustering_order='DESC',
default=cassandra.util.uuid_from_time(time.time()))
class_room_id = columns.UUID()
if __name__ == '__main__':
connection.setup(hosts=['localhost'],
default_keyspace='test')
sync_table(StudentModel)
sync_table(ClassRoomModel)
sync_table(StudentToClass)
sync_table(ClassToStudent)
students = []
for i in xrange(100):
students.append(StudentModel.create(name='student' + str(i)))
class_room = ClassRoomModel.create(name='class1')
for student in students:
print "Creating batch for: ", student.name
with BatchQuery() as batch_query:
ClassToStudent.batch(batch_query).create(
student_id=student.id, class_room_id=class_room.id)
StudentToClass.batch(batch_query).create(
student_id=student.id, class_room_id=class_room.id)
This code works fine, and it created records too. When I check the records count, it match for 3 tables, but for test.student_to_class_mapping, it has to be 100, but it gives only 1.
cqlsh> select count(*) from test.student;
count
-------
100
(1 rows)
cqlsh> select count(*) from test.class_room ;
count
-------
1
(1 rows)
cqlsh> select count(*) from test.class_to_student_mapping;
count
-------
100
(1 rows)
cqlsh> select count(*) from test.student_to_class_mapping ;
count
-------
1
(1 rows)
I found the issue, logic wise its correct, only issue is clusturing_key in test.student_to_class_mapping.
created_timestamp = \
columns.TimeUUID(primary_key=True,
clustering_order='DESC',
default=cassandra.util.uuid_from_time(time.time()))
cassandra.util.uuid_from_time(time.time()) is not able to generate Unique uuid for each record. I can use uuid1 but I already face issue with uuid1.
I know, we can use now(), I change my code to
from cassandra.query import BatchStatement, SimpleStatement
from cassandra.cqlengine import connection
...
...
batch_query = BatchStatement()
batch_query.add(
SimpleStatement('INSERT INTO {0} '
'("student_id", "created_timestamp", "class_room_id") '
'VALUES ({1}, now(), {2})'.format(
StudentToClass.column_family_name(),
student.id, class_room.id)))
batch_query.add(
SimpleStatement('INSERT INTO {0} '
'("student_id", "created_timestamp", "class_room_id") '
'VALUES ({1}, now(), {2})'.format(
ClassToStudent.column_family_name(),
student.id, class_room.id)))
connection.session.execute(batch_query)
...
...
Now its working fine and creating all records as per logic.
I want to know that, is there any way to use now() with Model's create method?
What happend:
default = None
the default value, can be a value or a callable (no args)
(from https://datastax.github.io/python-driver/api/cassandra/cqlengine/columns.html)
Your line with
default=cassandra.util.uuid_from_time(time.time())
got evaluated at startup time and contained a single value as uuid. Try something like this:
from uuid import uuid1,uuid4
class Comment(Model):
photo_id = UUID(primary_key=True)
comment_id = TimeUUID(primary_key=True, default=uuid1) # second primary key component is a clustering key
comment = Text()
Found here. https://datastax.github.io/python-driver/api/cassandra/cqlengine/query.html
Another (pure personal) remark - generate the uuid explicit as one often need it afterwards ;)

Querying joined tables in SQLAlchemy and displaying in an ObjectListView

I have an ObjectListView that displays information retrieved from an SQLite DB with SQLAlchemy.
def setupOLV(self):
self.loanResultsOlv.SetEmptyListMsg("No Loan Records Found")
self.loanResultsOlv.SetColumns([
ColumnDefn("Date Issued", "left", 100, "date_issued",
stringConverter="%d-%m-%y"),
ColumnDefn("Card Number", "left", 100, "card_id"),
ColumnDefn("Student Number", "left", 100, "person_id"),
ColumnDefn("Forename", "left", 150, "person_fname"),
ColumnDefn("Surname", "left", 150, "person_sname"),
ColumnDefn("Reason", "left", 150, "issue_reason"),
ColumnDefn("Date Due", "left", 100, "date_due",
stringConverter="%d-%m-%y"),
ColumnDefn("Date Returned", "left", 100, "date_returned",
stringConverter="%d-%m-%y")
])
I also have three models, Loan:
class Loan(DeclarativeBase):
"""
Loan model
"""
__tablename__ = "loans"
id = Column(Integer, primary_key=True)
card_id = Column(Unicode, ForeignKey("cards.id"))
person_id = Column(Unicode, ForeignKey("people.id"))
date_issued = Column(Date)
date_due = Column(Date)
date_returned = Column(Date)
issue_reason = Column(Unicode(50))
person = relation("Person", backref="loans", cascade_backrefs=False)
card = relation("Card", backref="loans", cascade_backrefs=False)
Person:
class Person(DeclarativeBase):
"""
Person model
"""
__tablename__ = "people"
id = Column(Unicode(50), primary_key=True)
fname = Column(Unicode(50))
sname = Column(Unicode(50))
and Card:
class Card(DeclarativeBase):
"""
Card model
"""
__tablename__ = "cards"
id = Column(Unicode(50), primary_key=True)
active = Column(Boolean)
I am trying to join the tables (loans and people) in order to retrieve and display the information in my ObjectListView. Here is my query method:
def getQueriedRecords(session, filterChoice, keyword):
"""
Searches the database based on the filter chosen and the keyword
given by the user
"""
qry = session.query(Loan)
if filterChoice == "person":
result = qry.join(Person).filter(Loan.person_id=='%s' % keyword).all()
elif filterChoice == "card":
result = qry.join(Person).filter(Loan.card_id=='%s' % keyword).all()
return result
I can retrieve and display every field stored in the loans table but forename and surname (should be drawn from people table and joined on person.id) are blank in my ObjectListView. I have SQL output on so I can see the query and it is not selecting at all from the people table.
How can I modify my query/ObjectListView to retrieve and display this information. ?
UPDATE: I have created an example script that is runnable here.
You're only querying for a Loan (qry = session.query(Loan)). Why do you expect something else to be in the results besides what's in the SELECT statement?
I admit that I am pretty new to SQLAlchemy myself, but I thought I would share what I use to display results from my queries. I have a program that uses a SQLite DB with 4+ tables and I pull data from 2-3 of them in a single query and display this information in an ObjectListView. I owe Mike Driscoll for his in depth tutorials, particularly wxPython and SqlAlchemy: An Intro to MVC and CRUD.
Here is what I would possibly add/change in your code.
In your model section add a "display" class such as:
def OlvDisplay(object):
def __init__(self, date_issued, card_id, person_id, fname, sname,
issue_reason, date_due, date_returned):
self.date_issued = date_issued
self.card_id = card_id
self.person_id = person_id
self.person_fname = fname
self.person_sname = sname
self.issue_reason = issue_reason
self.date_due = date_due
self.date_returned = date_returned
This display class is used in the convertResults definition below and assists with making sure the data is formatted properly for the ObjectListView.
The adjustment to your existing query function:
def getQueriedRecords(session, filterChoice, keyword):
"""
Searches the database based on the filter chosen and the keyword
given by the user
"""
qry = session.query(Loan)
if filterChoice == "person":
result = qry.join(Person).filter(Loan.person_id=='%s' % keyword).all()
elif filterChoice == "card":
result = qry.join(Person).filter(Loan.card_id=='%s' % keyword).all()
convertedResults = convertResults(result)
return convertedResults
What we're doing here is creating a local variable that is essentially running the conversion definition and storing the results for the next line, which returns those results.
And the "Convertor" function:
def convertResults(results):
finalResults = []
for record in results:
result = OlvDisplay(
record.date_issued,
record.card_id,
record.person_id,
record.person.fname,
record.person.sname,
record.issue_reason,
record.date_due,
record.date_returned
)
finalResults.append(result)
return finalResults
The important part here are the 2 lines:
record.person.fname
record.person.sname
Since we are wanting to pull information from another table using the established relationship it is important to refer to that relationship to actually see the data.
And to populate the ObjectListView Widget:
theOutput = getQueriedRecords(session, filterChoice, keyword)
self.setupOLV.SetObjects(theOutput)
Hope this helps you out.
-MikeS

Categories