I've got a few models and am trying to speed up the page where I list out users.
The issue is that I was leveraging model methods to display some of the data - but when I listed the Users out it was hitting the DB multiple times per User which ended up with hundreds of extra queries (thousands when there were thousands of User objects in the list) so it was a serious performance hit.
I've since began using annotate and prefetch_related which has cut the queries down significantly. I've just got one bit I can't figure out how to annotate.
I have a model method (on Summation model) I use to get a summary of Evaluation data for a user like this:
def evaluations_summary(self):
evaluations_summary = (
self.evaluation_set.all()
.values("evaluation_type__name")
.annotate(Count("evaluation_type"))
)
return evaluations_summary
I'm trying to figure out how to annotate that particular query on a User object.
So the relationship looks like this User has multiple Summations, but only one is ever 'active', which is the one we display in the User list. Each Summation has multiple Evaluations - the summary of which we're trying to show as well.
Here is a summary of the relevant parts of code (including the Summation model method which gives an example of what is currently 'working' to display the data as needed) - I have also made a pastebin example for easier viewing.
# MODELS
class User(AbstractUser):
employee_no = models.IntegerField(default=1)
...all the other usual attributes...
class Summation(CreateUpdateMixin, CreateUpdateUserMixin):
# CreateUpdateMixin adds 'created_at' & 'updated_at
# CreateUpdateUserMixin adds 'created_by' & 'updated_by'
employee = models.ForeignKey(
User, on_delete=models.PROTECT, related_name="%(class)s_employee"
)
report_url = models.CharField(max_length=350, blank=True)
...other unimportant attributes...
def evaluations_summary(self):
evaluations_summary = (
self.evaluation_set.all()
.values("evaluation_type__name")
.annotate(Count("evaluation_type"))
)
return evaluations_summary
class Evaluation(CreateUpdateMixin, CreateUpdateUserMixin):
summation = models.ForeignKey(Summation, on_delete=models.PROTECT)
evaluation_type = models.ForeignKey(
EvaluationType, on_delete=models.PROTECT
)
evaluation_level = models.ForeignKey(
EvaluationLevel, on_delete=models.PROTECT
)
evaluation_date = models.DateField(
auto_now=False, auto_now_add=False, null=True, blank=True
)
published = models.BooleanField(default=False)
class EvaluationLevel(CreateUpdateMixin):
name = models.CharField(max_length=50)
description = models.CharField(max_length=50)
class EvaluationType(CreateUpdateMixin):
name = models.CharField(max_length=50)
description = models.CharField(max_length=50)
evaluation_levels = models.ManyToManyField(EvaluationLevel)
# SERIALIZERS
class UserSerializer(serializers.HyperlinkedModelSerializer):
multiple_locations = serializers.BooleanField()
multiple_jobs = serializers.BooleanField()
summation_status_due_date = serializers.DateField()
summation_employee = SummationSerializer(many=True, read_only=True)
evaluations_summary = serializers.SerializerMethodField()
class Meta:
model = User
fields = [
"url",
"id",
"username",
"first_name",
"last_name",
"full_name",
"email",
"is_staff",
"multiple_locations",
"multiple_jobs",
"summation_status_due_date",
"summation_employee",
"evaluations_summary",
]
def get_evaluations_summary(self, obj):
return (
obj.summation_employee__evaluation_set.all()
.values("evaluation_type__name")
.annotate(Count("evaluation_type"))
)
# CURRENT ANNOTATIONS
# Subqueries for evaluation_summary
active_summations = (
Summation.objects.filter(employee=OuterRef("pk"), locked=False)
)
evaluations_set = (
Evaluation.objects.filter(summation__in=active_summations)
.order_by()
.values("evaluation_type__name")
)
summary_set = evaluations_set.annotate(Count("evaluation_type"))
# the 'summation_employee__evaluation_set' prefetch does not seem
# to make an impact on queries needed
user_list = (
User.objects.prefetch_related("summation_employee")
.prefetch_related("summation_employee__evaluation_set")
.filter(id__in=all_user_ids)
# Get the total locations and if > 1, set multiple_locations to True
.annotate(total_locations=Subquery(total_locations))
.annotate(
multiple_locations=Case(
When(total_locations__gt=1, then=Value(True)),
default=Value(False),
output_field=BooleanField(),
)
)
# Get the total jobs and if > 1 set mutiple_jobs to True
.annotate(total_jobs=Subquery(total_jobs))
.annotate(
multiple_jobs=Case(
When(total_jobs__gt=1, then=Value(True)),
default=Value(False),
output_field=BooleanField(),
)
)
# Get the due_date of the summation from the SummationStatus object
.annotate(
summation_status_due_date=Subquery(
summation_status.values("summation_due")
)
)
# I need to add the annotation here for the 'evaluations_summary' to avoid
# having the database hit for every user (which could possibly range into the
# thousands in certain cases)
# I have tried a number of ways to obtain what I'm looking for
.annotate(
evaluations_summary=Subquery(
evaluations_set.order_by()
.values("evaluation_type__name")
.annotate(Count("evaluation_type"))
)
)
# this annotation gives the error: Only one expression can be specified in the
# select list when the subquery is not introduced with EXISTS.
Is it even possible to transition that model method annotation?? Am I close?
Related
Let's say I have the following models:
class Well(TimeStampMixin, models.Model):
plate = models.ForeignKey(Plate, on_delete=models.CASCADE, related_name="wells")
row = models.TextField(null=False)
column = models.TextField(null=False)
class Meta:
unique_together = [["plate", "row", "column"]]
class Antibiotic(TimeStampMixin, models.Model):
name = models.TextField(null=True, default=None)
class WellConditionAntibiotic(TimeStampMixin, models.Model):
wells = models.ManyToManyField(Well, related_name="well_condition_antibiotics")
volume = models.IntegerField(null=True, default=None)
stock_concentration = models.IntegerField(null=True, default=None)
dosage = models.FloatField(null=True, default=None)
antibiotic = models.ForeignKey(
Antibiotic, on_delete=models.RESTRICT, related_name="antibiotics"
)
In plain english, there are a set of wells and each well can have multiple and many different types of antibiotics.
I'm trying to fetch the data of a given well and all of the antibiotics contained inside it.
I've tried WellConditionAntibiotic.objects.filter(wells__id=1).select_related('antibiotic')
which gives me this query:
SELECT
"kingdom_wellconditionantibiotic"."id",
"kingdom_wellconditionantibiotic"."created_at",
"kingdom_wellconditionantibiotic"."updated_at",
"kingdom_wellconditionantibiotic"."volume",
"kingdom_wellconditionantibiotic"."stock_concentration",
"kingdom_wellconditionantibiotic"."dosage",
"kingdom_wellconditionantibiotic"."antibiotic_id",
"kingdom_antibiotic"."id",
"kingdom_antibiotic"."created_at",
"kingdom_antibiotic"."updated_at",
"kingdom_antibiotic"."name"
FROM
"kingdom_wellconditionantibiotic"
INNER JOIN "kingdom_wellconditionantibiotic_wells" ON (
"kingdom_wellconditionantibiotic"."id" = "kingdom_wellconditionantibiotic_wells"."wellconditionantibiotic_id"
)
INNER JOIN "kingdom_antibiotic" ON (
"kingdom_wellconditionantibiotic"."antibiotic_id" = "kingdom_antibiotic"."id"
)
WHERE
"kingdom_wellconditionantibiotic_wells"."well_id" = 1
This gives me all of the antibiotic data, but none of the well data. So I tried
Well.objects.filter(pk=1).select_related(['well_condition_antibiotics', 'antibiotic']).query which errored.
How can I generate a django query to include all well data and all well antibiotic data?
Building up on your second attempt using Well, you will have to prefetch WellConditionAntibiotic and also select the related antibiotic like this:
from django.db.models import Prefetch
well = Well.objects.filter(pk=1).prefetch_related(
Prefetch(
"well_condition_antibiotics",
queryset=WellConditionAntibiotic.objects.select_related("antibiotic"),
)
)
Then you can just iterate through the related WellConditionAntibiotic entries with the corresponding antibiotic:
for well_condition_antiobiotic in well.well_condition_antibiotics.all():
print(well_condition_antiobiotic.antibiotic.name)
You can find more information about prefetch_related and Prefetch here..[Django-doc]
I'm using Django 2.2.16 and I was trying to get rid of replicated logic across querysets. I didn't manage to find a way (without impacting performance, like subquery in select clause) to reuse annotations from one model in annotations of another one.
Here is my current implementation:
# models.py
from django.contrib.gis.db import models
from querysets import PostManager, TagManager
class Tag(models.Model):
objects = TagManager()
name = CICharField(db_index=True, max_length=100)
class Post(models.Model):
objects = PostManager()
STATUS_ACCEPTED = "ACCEPTED"
STATUS_DECLINED = "DECLINED"
STATUS_CHOICES = (
(STATUS_ACCEPTED, "Accepted"),
(STATUS_DECLINED, "Declined"),
)
status = models.CharField(
default=STATUS_DECLINED,
choices=STATUS_CHOICES,
max_length=63,
verbose_name="Źródło ogłoszenia",
)
expiration_date = models.DateField(blank=True, null=True)
tags = models.ManyToManyField(Tag, blank=True, db_index=True)
# querysets.py
from django.db.models import Case, Count, Q, QuerySet, When, Manager
class PostQuerySet(QuerySet):
def annotate_statuses(self):
today = date.today()
return self.annotate(
live=Case(
When(
Q(status="ACCEPTED")
& (Q(expiration_date__isnull=True) | Q(expiration_date__gte=today)),
then=True,
),
default=False,
output_field=models.BooleanField(),
),
expired=Case(
When(
Q(status="ACCEPTED")
& (Q(expiration_date__isnull=False) & Q(expiration_date__lt=today)),
then=True,
),
default=False,
output_field=models.BooleanField(),
),
)
PostManager = Manager.from_queryset(PostQuerySet)
class TagQuerySet(QuerySet):
def annotate_live_listings():
today = date.today()
return queryset.annotate(
posts_count=Count("post"),
listings_count=Sum(
Case(
When(
Q(post__status="ACCEPTED")
& (Q(post__expiration_date__isnull=True) | Q(post__expiration_date__gte=today)),
then=1,
),
output_field=IntegerField(),
default=0,
)
),
)
TagManager = Manager.from_queryset(TagQuerySet)
My question is if it's possible to reimplement method annotate_live_listings in TagQuerySet to reuse somehow already implemented annotate_statuses from PostQuerySet. Currently, because of the code being more or less copied over, I've got logic of live annotation in two places.
Thanks in advance!
class TransactionHistory(models.Model):
from_account = models.ForeignKey(
'Account',
on_delete=models.CASCADE,
related_name='from_account'
)
to_account = models.ForeignKey(
'Account',
on_delete=models.CASCADE,
related_name='to_account'
)
amount = models.DecimalField(
max_digits=12,
decimal_places=2
)
created_at = models.DateTimeField(default=timezone.now)
#property
def way(self):
# Here I need to access a list of user's accounts
# (request.user.accounts) to mark the transaction
# as going out or in.
return
def get_own_transaction_history(me_user):
my_accounts = me_user.accounts.all()
# TODO: mark transactions with way as in and out
own_transactions = TransactionHistory.objects.filter(
Q(from_account__in=my_accounts) |
Q(to_account__in=my_accounts)
)
return own_transactions
I want to add a "way" property for the model so when I return the queryset via serializer, the user could understand if the transaction is for going out from his account or in. But if I just add property, it can not be calculated with me_user user in mind, AFAIK the property can only access the local model fields like "from_account" or "to_account".
Something like the following should work as an annotation using conditional expressions, using __in in the When expressions may give a bit of trouble though. The objects returned by this queryset will have an attribute way added to them by the annotation
from django.db.models import Case, CharField, Value, When
def get_own_transaction_history(me_user):
my_accounts = me_user.accounts.all()
return TransactionHistory.objects.filter(
Q(from_account__in=my_accounts) |
Q(to_account__in=my_accounts)
).annotate(
way=Case(
When(from_account__in=my_accounts, then=Value('out')),
When(to_account__in=my_accounts, then=Value('in')),
output_field=CharField(),
)
)
My app has a model "OptimizationResult", where I store results from mathmatical optimization. The optimization distributes timeslots over projects. I need to indicate whether the current results is different from a recent result, based on a set of attributes (in particularly not the primary key)
The attribute optimization_run is a coutner for different runs
Project is a ForeignKey to the project.
By overwriting the __hash__ and __eq__ functions on the model I can compare the different instances by
OptimizationResults.objects.filter(proj = 1).filter(optimization_run =1).first() == OptimizationResults.objects.filter(proj = 1).filter(optimization_run = 2).first()
. But as I understand __eq__ and __hash__ are not available on the database.
How would I annotate the results accordingly? Something like
OptimizationResults.objects.filter(optimization_run = 2).annotate(same_as_before = Case(When(),default=False))
Edit
Added .first() to the code, to ensure that there is only one element.
class OptimizationResult(models.Model):
project = models.ForeignKey(project, on_delete=models.CASCADE)
request_weight = models.IntegerField()
periods_to_plan = models.IntegerField()
unscheduled_periods = models.IntegerField()
scheduled_periods = models.IntegerField()
start = models.DateField(null=True, blank=True, default=None)
end = models.DateField(null=True, blank=True, default=None)
pub_date = models.DateTimeField('Erstellungsdatum', auto_now_add=True, editable=False)
optimization_run= models.ForeignKey(OptimizationRun, on_delete=models.CASCADE)
I'd like to compore different entries on the basis of start and end.
Edit 2
My fruitless attempt with Subquery:
old = OptimizationResult.objects.filter(project=OuterRef('pk')).filter(optimization_run=19)
newest = OptimizationResult.objects.filter(project=OuterRef('pk')).filter(optimization_run=21)
Project.objects.annotate(changed = Subquery(newest.values('start')[:1])== Subquery(old.values('start')[:1]))
results in TypeError: QuerySet.annotate() received non-expression(s): False
We can use a subquery here, to make an annotation:
from django.db.models import Exists, OuterRef, Subquery, Q
to_exclude = {'pk', 'id', 'project', 'project_id', 'optimization_run', 'optimization_run_id'}
subquery = OptimizationResult.objects.filter(
project_id=OuterRef('project_id')
optimization_run=1,
**{f.name: OuterRef(f.name)
for f in OptimizationResult._meta.get_fields()
if f.name not in to_exclude
}
)
OptimizationResult.objects.filter(
optimization_run=2
).annotate(
are_same=Exist(subquery)
)
Here we will thus annotate all the OptimizationResults with an optimization_run=2, with an extra attribute .are_same that checks if there exists an OptimizationResult object for optimization_run=1 and for the same project_id, where all fields are the same, except the ones in the to_exclude set.
So I am simply trying to add LectureCategory in my Lecture model, I want the user to be only able to select between Classes or Seminars. If I put choices in both models, I can see them on django admin, but I get the error:
Cannot assign "'0'": "Lecture.lecture_category" must be a "LectureCategory" instance.
If I dont put choices in second model, then in admin panel will show 0 or 1, instead of my values. Any suggestion ?
class LectureCategory(models.Model):
lecture_category = models.IntegerField(choices=((0, "Classes "),
(1, "Seminars"),
))
def __str__(self):
return str(self.lecture_category)
class Lecture(models.Model):
course = models.ForeignKey('Course', on_delete=models.CASCADE, default='', related_name='lectures', null=True, )
lecture_category = models.ForeignKey('LectureCategory', on_delete=models.CASCADE,
default='', related_name='categories',
choices=((0, "Classes "),
(1, "Seminars"),
)
)
You definitly don't need a LectureCategory model to filter Lecture queryeset on a category:
class Lecture(models.Model):
course = models.ForeignKey(
'Course',
on_delete=models.CASCADE,
default=None,
related_name='lectures',
null=True,
)
CATEGORY_CLASSES = 0
CATEGORY_SEMINARS = 1
CATEGORY_CHOICES = (
(CATEGORY_CLASSES, "Classes"),
(CATEGORY_SEMINARS, "Seminars"),
)
category = models.IntegerField(
choices=CATEGORY_CHOICES
)
# select only classes
Lecture.objects.filter(category=Lecture.CATEGORY_CLASSES)
# select only seminars
Lecture.objects.filter(category=Lecture.CATEGORY_SEMINARS)
# display the lecture's category readable label
# cf https://docs.djangoproject.com/en/2.0/ref/models/instances/#django.db.models.Model.get_FOO_display
print(lecture.get_category_display())
Also you can use custom managers here to directly have Lecture.seminars.all() and Lecture.classes.all()
Having a distinct LectureCategory model makes sense if you want to allow admins to add new categories, but then you will loose custom managers per category and actually anything that requires categories to be known in advance. In this case your LectureCategory model will need some label field:
class LectureCategory(models.Model):
label = models.CharField(
"label",
max_length=50
)
def __str__(self):
return self.label
class Lecture(models.Model):
course = models.ForeignKey(
'Course',
on_delete=models.CASCADE,
default=None,
related_name='lectures',
null=True,
)
category = models.ForeignKey(
LectureCategory,
related_name="lectures"
on_delete=models.PROTECT,
)
Then if you want to iterate on categories/lectures:
for category in Category.objects.all():
print(category)
for lecture in category.lectures.all():
print(lecture)
you don't need a separate model for category until you want to add more information regarding categories. you can simply do this like
class Lecture(models.Model):
Classes= 0
Seminars= 1
lecture_choice = (
(Classes, 'Classes'),
(Seminars, 'Seminars'),
)
course = models.ForeignKey('Course', on_delete=models.CASCADE, default='', related_name='lectures', null=True, )
lecture_category = models.IntegerField(choices=lecture_choice ,default=Classes)