I have next data structure:
from enum import IntEnum, unique
from pathlib import Path
from datetime import datetime
from peewee import *
#unique
class Status(IntEnum):
CREATED = 0
FAIL = -1
SUCCESS = 1
db_path = Path(__file__).parent / "test.sqlite"
database = SqliteDatabase(db_path)
class BaseModel(Model):
class Meta:
database = database
class Unit(BaseModel):
name = TextField(unique=True)
some_field = TextField(null=True)
created_at = DateTimeField(default=datetime.now)
class Campaign(BaseModel):
id_ = AutoField()
created_at = DateTimeField(default=datetime.now)
class Task(BaseModel):
id_ = AutoField()
status = IntegerField(default=Status.CREATED)
unit = ForeignKeyField(Unit, backref="tasks")
campaign = ForeignKeyField(Campaign, backref="tasks")
Next code create units, campaign and tasks:
def fill_units(count):
units = []
with database.atomic():
for i in range(count):
units.append(Unit.create(name=f"unit{i}"))
return units
def init_campaign(count):
units = Unit.select().limit(count)
with database.atomic():
campaign = Campaign.create()
for unit in units:
Task.create(unit=unit, campaign=campaign)
return campaign
The problem appears when I'm trying to add more units into existing campaign. I need to select units which haven't been used in this campaign. In SQL I can do this using next query:
SELECT * FROM unit WHERE id NOT IN (SELECT unit_id FROM task WHERE campaign_id = 1) LIMIT 10
But how to do this using peewee?
The only way I've found yet is:
def get_new_units_for_campaign(campaign, count):
unit_names = [task.unit.name for task in campaign.tasks]
units = Unit.select().where(Unit.name.not_in(unit_names)).limit(count)
return units
It's somehow works but I'm 100% sure that it's the dumbest way to implement this. Could you show me the proper way to implement this?
Finally I found this:
Unit.select().where(Unit.id.not_in(campaign.tasks.select(Task.unit))).limit(10)
Which produces
SELECT "t1"."id", "t1"."name", "t1"."some_field", "t1"."created_at" FROM "unit" AS "t1" WHERE ("t1"."id" NOT IN (SELECT "t2"."unit_id" FROM "task" AS "t2" WHERE ("t2"."campaign_id" = 1))) LIMIT 10
Which matches with SQL query I've provided in my question.
P.S. I've done some research and it seems to be a proper implementation, but I'd appreciate if somebody correct me and show the better way (if exist).
I'm a newbie in Django and I have some questions about making queries by QuerySet API.
For instance, I have User, his Orders, and its Statuses
class User(models.Model):
first_name = models.CharField(max_length=100)
last_name = models.CharField(max_length=100)
is_active = models.BooleanField()
class OrderStatus(models.Model):
name = models.CharField(max_length=100)
class Order(models.Model):
number = models.CharField(max_length=10)
amount = models.DecimalField(max_digits=19, decimal_places=2)
user = models.ForeignKey(User, on_delete=models.PROTECT, related_name="orders")
order_status = models.ForeignKey(OrderStatus, on_delete=models.PROTECT)
creation_datetime = models.DateTimeField(auto_now_add=True)
# Some filtering field
filtering_field = models.IntegerField()
I combined all of my questions to this one query:
Get active users with some additional data for each user:
'Amount' of the Orders filtered by 'filtering_field' and aggregated by Min and Max
'Number' and 'Amount' of the first Order filtered by 'filtering_field'
Count of the Orders filtered by 'filtering_field', aggregated by Count and grouped by 'Order Status'. This grouping means that data from query #1 and #2 can be duplicated and it's ok.
I could make this query in T-SQL by 3 separated subquery with own grouping, filtering, ordering:
SELECT
u.id,
u.first_name,
u.last_name,
ts.min_amount,
ts.max_amount,
first_order.number as first_order_number,
first_order.amount as first_order_amount,
cnt.order_status_id,
cnt.cnt
FROM
[User] u
-- 1. 'Amount' of the Orders filtered by 'filtering_field' and aggregated by Min and Max
LEFT OUTER JOIN (
SELECT
[user_id],
MIN(amount) min_amount,
MAX(amount) max_amount
FROM
[Order]
WHERE
filtering_field = 1
GROUP BY
[user_id]
) ts ON u.id = ts.[user_id]
-- 2. 'Number' and 'Amount' of the first Order filtered by 'filtering_field'
OUTER APPLY (
SELECT TOP 1
o.number,
o.amount
FROM
[Order] o
WHERE
u.id = o.[user_id] AND
o.filtering_field = 2
ORDER BY
o.creation_datetime
) first_order
-- 3. Count of the Orders filtered by 'filtering_field', aggregated by Count and grouped by 'Order Status'.
LEFT OUTER JOIN (
SELECT
[user_id],
order_status_id,
COUNT(*) cnt
FROM
[Order]
WHERE
filtering_field = 3
GROUP BY
[user_id],
order_status_id
) cnt ON u.id = cnt.[user_id]
WHERE
u.is_active = 1
How I can do the same by QuerySet API?
Query #1 I can do Min and Max in Annotate.
data = User.objects.filter(
Q(is_active=True)
).values(
'id',
'first_name',
'last_name',
).annotate(
min_amount=Min(
'orders__amount',
filter=Q(orders__filtering_field=1)
),
max_amount=Max(
'orders__amount',
filter=Q(orders__filtering_field=1)
)
)
But what about query #2 & #3?
I've considered Subquery(), but It supports the only one output value.
I mean if you wanna get 5 fields from 1 queryset, sql server runs 5 queries. I think it's not good for performance.
How I can join the first order once to use its fields and How can I use Count() with grouping by filtered rows of child model?
I'd like to use .prefetch_related() as a substitution of Subquery in T-SQL for each query like this:
Prefetch(
'orders',
queryset=Order.objects.filter(filtering_field=1)..., #staff with .values(), annotate(Min(), Max()) and etc.
to_attr='pf_query_1'
)
And then use 'pf_query_1' like 'orders__pf_query_1__amount' in User.objects...values()...annotate().
But I can't use .values() in Prefetch as well as 'pf_query_1' as a model field.
So what is the best practice to make this one query by QuerySet API?
I'd like to see the whole QuerySet API query just like T-SQL query
Have you considered the Django Subquery as described in the docs?
Regarding your 3rd question the only approach coming to my mind is dynamically creating the annotations.
Here a (tested) code sample using your models:
def test_query(self):
# 1st question
min_order = Order.objects.filter(user=OuterRef('pk'), filtering_field=1)\
.order_by().values('user').annotate(min=Min('amount')).values('min')
max_order = Order.objects.filter(user=OuterRef('pk'), filtering_field=1)\
.order_by().values('user').annotate(max=Max('amount')).values('max')
# 2nd question
first_number = Order.objects.filter(user=OuterRef('pk'), filtering_field=1)\
.order_by().values('user').annotate(fnumber=F('number')).values('fnumber')
first_amount = Order.objects.filter(user=OuterRef('pk'), filtering_field=1)\
.order_by().values('user').annotate(fnumber=F('amount')).values('amount')
kwargs = {
'min': Subquery(min_order, output_field=DecimalField()),
'max': Subquery(max_order, output_field=DecimalField()),
'first_n': Subquery(first_number, output_field=CharField()),
'first_a': Subquery(first_amount, output_field=DecimalField())
}
# 3rd question
for o in OrderStatus.objects.all():
kwargs['%s_count' % o.name] = \
Subquery(Order.objects.filter(user=OuterRef('pk'), filtering_field=1, order_status=o)\
.order_by().values('user').annotate(c=Count('pk')).values('c'), output_field=IntegerField())
# Putting it all together
qs2 = User.objects.annotate(**kwargs)
# Testing the results
for user in qs2:
v = Order.objects.filter(user=user, filtering_field=1).aggregate(Min('amount'), Max('amount'))
self.assertEqual(v['amount__min'], user.min)
self.assertEqual(v['amount__max'], user.max)
v = Order.objects.filter(user=user, filtering_field=1).first()
self.assertEqual(v.number, user.first_n)
self.assertEqual(v.amount, user.first_a)
for o in OrderStatus.objects.all():
v = Order.objects.filter(user=user, filtering_field=1, order_status=o).count()
if v == 0:
v = None
k = '%s_count' % o.name
v1 = getattr(user, k)
self.assertEqual(v, v1)
# The sql
print(qs2.query)
Please note:
The code is part of a TestCase where I put it to check if it worked
as expected
I know some parts of the query can be generated without
Subquery using the filter attribute of the aggregation functions. As
this filter attribute was only introduced in Django 2.0 and not
supported in the LTS version 1.11 I did not use it.
EDIT: Here is another approach I came up with starting with a "base queryset" and annotating that one:
def test_query2(self):
qs = Order.objects.filter(filtering_field=1).values('user', 'order_status').distinct()
# 1st question
min_order = Order.objects.filter(user=OuterRef('user'), filtering_field=1)\
.order_by().values('user').annotate(min=Min('amount')).values('min')
max_order = Order.objects.filter(user=OuterRef('user'), filtering_field=1)\
.order_by().values('user').annotate(max=Max('amount')).values('max')
# 2nd question
first_number = Order.objects.filter(user=OuterRef('user'), filtering_field=1)\
.order_by().values('user').annotate(fnumber=F('number')).values('fnumber')
first_amount = Order.objects.filter(user=OuterRef('user'), filtering_field=1)\
.order_by().values('user').annotate(fnumber=F('amount')).values('amount')
# 3rd question
total_count = Order.objects.filter(user=OuterRef('user'), filtering_field=1, order_status=OuterRef('order_status'))\
.order_by().values('user').annotate(c=Count('pk')).values('c')
qs2 = qs.annotate(
min = Subquery(min_order, output_field=DecimalField()),
max = Subquery(max_order, output_field=DecimalField()),
first_n = Subquery(first_number, output_field=CharField()),
first_a = Subquery(first_amount, output_field=CharField()),
c = Subquery(total_count, output_field=IntegerField())
)
# Testing the results
for d in qs2:
v = Order.objects.filter(user=d['user'], filtering_field=1).aggregate(Min('amount'), Max('amount'))
self.assertEqual(v['amount__min'], d['min'])
self.assertEqual(v['amount__max'], d['max'])
v = Order.objects.filter(user=d['user'], filtering_field=1).first()
self.assertEqual(v.number, d['first_n'])
self.assertEqual(v.amount, d['first_a'])
v = Order.objects.filter(user=d['user'], filtering_field=1, order_status=d['order_status']).count()
self.assertEqual(v, d['c'])
print(qs2.query)
I've got file objects of different types, which inherit from a BaseFile, and add custom attributes, methods and maybe fields. The BaseFile also stores the File Type ID, so that the corresponding subclass model can be retrieved from any BaseFile object:
class BaseFile(models.Model):
name = models.CharField(max_length=80, db_index=True)
size= models.PositiveIntegerField()
time_created = models.DateTimeField(default=datetime.now)
file_type = models.ForeignKey(ContentType, on_delete=models.PROTECT)
class FileType1(BaseFile):
storage_path = '/path/for/filetype1/'
def custom_method(self):
<some custom behaviour>
class FileType2(BaseFile):
storage_path = '/path/for/filetype2/'
extra_field = models.CharField(max_length=12)
I also have different types of events which are associated with files:
class FileEvent(models.Model):
file = models.ForeignKey(BaseFile, on_delete=models.PROTECT)
time = models.DateTimeField(default=datetime.now)
I want to be able to efficiently get all files of a particular type which have not been involved in a particular event, such as:
unprocessed_files_type1 = FileType1.objects.filter(fileevent__isnull=True)
However, looking at the SQL executed for this query:
SELECT "app_basefile"."id", "app_basefile"."name", "app_basefile"."size", "app_basefile"."time_created", "app_basefile"."file_type_id", "app_filetype1"."basefile_ptr_id"
FROM "app_filetype1"
INNER JOIN "app_basefile"
ON("app_filetype1"."basefile_ptr_id" = "app_basefile"."id")
LEFT OUTER JOIN "app_fileevent" ON ("app_basefile"."id" = "app_fileevent"."file_id")
WHERE "app_fileevent"."id" IS NULL
It looks like this might not be very efficient because it joins on BaseFile.id instead of FileType1.basefile_ptr_id, so it will check ALL BaseFile ids to see whether they are present in FileEvent.file_id, when I only need to check the BaseFile ids corresponding to FileType1, or FileType1.basefile_ptr_ids.
This could result in a significant performance difference if there are a very large number of BaseFiles, but FileType1 is only a small subset of that, because it will be doing a large amount of unnecessary lookups.
Is there a way to force Django to join on "app_filetype1"."basefile_ptr_id" or otherwise achieve this functionality more efficiently?
Thanks for the help
UPDATE:
Using annotations and Exists subquery seems to do what I'm after, however the resulting SQL still appears strange:
unprocessed_files_type1 = FileType1.objects.annotate(file_event=Exists(FileEvent.objects.filter(file=OuterRef('pk')))).filter(file_event=False)
SELECT "app_basefile"."id", "app_basefile"."name", "app_basefile"."size", "app_basefile"."time_created", "app_basefile"."file_type_id", "app_filetype1"."basefile_ptr_id",
EXISTS(
SELECT U0."id", U0."file_id", U0."time"
FROM "app_fileevent" U0
WHERE U0."file_id" = ("app_filetype1"."basefile_ptr_id"))
AS "file_event"
FROM "app_filetype1"
INNER JOIN "app_basefile" ON ("app_filetype1"."basefile_ptr_id" = "app_basefile"."id")
WHERE EXISTS(
SELECT U0."id", U0."file_id", U0."time"
FROM "app_fileevent" U0
WHERE U0."file_id" = ("app_filetype1"."basefile_ptr_id")) = 0
It appears to be doing the WHERE EXISTS subquery twice instead of just using the annotated 'file_event' label... Maybe this is just a Django/SQLite driver bug?
I'm using Django 1.11, Postgresql 9.2, python 3.4
I want to select data based on table's column named event_type if event type is single then compare date that should be of same date (today's) date else select all dates of given (today's) date that type would be of recurring.
But can't we manage this using single query? Like we do CASE and WHEN, THEN in Aggregation? I tried using Q object but no luck.
I want to check when value is 'single' then add condition, else another condition.
I could not find any good solution, currently I've achieved using this
today = datetime.date.today().strftime('%Y-%m-%d')
single_events = crm_models.EventsMeta.objects.filter(
event_type == "single",
repeat_start=today
)
recurring_events = crm_models.EventsMeta.objects.filter(
event_type == "recurring"
repeat_start__lte=today
)
all_events = single_events | recurring_events
For more information my model is:
class EventsMeta(models.Model):
event_type = models.CharField(max_length=50, choices=(("single","Single"),("recurring","Recurring")),
null=False, blank=False,default='single',verbose_name="Event Type")
repeat_start = models.DateTimeField()
repeat_end = models.DateTimeField()
You can combine many Q objects with () signs. I your case I suppose this will work:
single_events = crm_models.EventsMeta.objects.filter(
(Q(event_type="single") & Q(repeat_start=today)) |
(Q(event_type="recurring") & Q(repeat_start__lte=today))
)
use this:-
today = datetime.date.today().strftime('%Y-%m-%d')
single_events = crm_models.EventsMeta.objects.filter(
event_type__in = ["single", "recurring"]
repeat_start=today
)
I'm using peewee as ORM and have two classes like this:
class A(Model):
name = CharField()
body = TextField()
class B(Model):
title = CharField()
body = TextField()
I would like to get all entries from A and B whose title/name start with some characters like 'abc'. According to the documentation the | operator should help, but I'm not even able to execute the resulting Expression. Obviously I would like to have a UNION and AS expression behind the scenes. How do I get this via peewee?
You should be able to get the result you want with something like
result = (
A().select(A.name.alias('name_title'), A.body).where(A.name == 'abc') |
B().select(B.title.alias('name_title'), B.body).where(B.title == 'abc')
).select().execute()
or
search_text = 'abc'
table_a_results = A().select(
A.name.alias('name_title'),
A.body
).where(A.name == search_text)
table_b_results = B().select(
B.name.alias('name_title'),
B.body
).where(B.title == search_text)
result = ( table_a_results | table_b_results ).select().execute()
The .alias() method to gets you the AS functionality as per the docs