Django - Annotating an aggregation of annotations on related objects - python

I have three models:
class BaseModel(Model):
deleted = BooleanField(default=False)
class Document(BaseModel):
def total_price()
return DocumentLine.objects.filter(
section__in=self.sections.filter(deleted=False),
deleted=False,
).total_price()
class Section(BaseModel):
document = ForeignKey(Document, on_delete=CASCADE, related_name='sections')
class LineQuerySet(QuerySet):
def with_total_price(self):
total_price = F('quantity') * F('price')
return self.annotate(
total_price=ExpressionWrapper(total_price, output_field=DecimalField())
)
def total_price(self):
return self.with_total_prices().aggregate(
Sum('total_price', output_field=DecimalField())
)['total_price__sum'] or Decimal(0.0)
class Line(BaseModel):
objects = LineQuerySet.as_manager()
section = ForeignKey(Section, on_delete=CASCADE, related_name='lines')
price = DecimalField()
quantity = DecimalField()
As you can see on the LineQuerySet, there is a method that will annotate the queryset with the total price of each line, based on the price and quantity.
Now I can easily get the total price of an entire document doing something like this (Note that lines and sections with deleted=True are ignored):
document = Document.objects.get(pk=1)
total_price = document.total_price()
However, now I would like to generate a queryset of multiple documents, and annotate that with each document's total price. I've tried a combination of annotates, aggregates, making use of prefetch_related (using Prefetch), and OuterRef, but I can't quite seem to be able to get the result I want without it throwing an error.
Is there some way to perform this operation in a queryset, making it then possible to filter or order by this total_price field?

You can annotate with:
from django.db.models import F, Sum
Document.objects.filter(
deleted=False,
sections__deleted=False,
section__lines__deleted=False
).annotate(
total_price=Sum(F('sections__lines__price')*F('sections__lines__quantity'))
)
Each Document that arises from this queryset will have an attribute .total_price which is the sum of the price times the quantity of all related lines of all related sections of that Document.
An alternative is to work with a Subquery expression [Django-doc] to determine the sum, so:
from django.db.models import F, OuterRef, Subquery, Sum
Document.objects.annotate(
total_price=Subquery(
Line.objects.values(
document_id=F('section__document_id')
).filter(
deleted=False, section__deleted=False, document__deleted=False
).annotate(
total_price=Sum(F('price') * F('quantity'))
).order_by('document_id').filter(document_id=OuterRef('pk')).values('total_price')[:1]
)
)

Related

How to filter not only by outerref id in a subquery?

I have a problem with filtering by boolean field in a subquery.
For example, I have two models: Good and Order.
class Good(models.Model):
objects = GoodQuerySet.as_manager()
class Order(models.Model):
good = models.FK(Good, related_name="orders")
is_completed = models.BooleanField(default=False)
I want to calculate how many completed orders has each good.
I implemented a method in Good's manager:
class GoodQuerySet(models.QuerySet):
def completed_orders_count(self):
subquery = Subquery(
Order.objects.filter(good_id=OuterRef("id"))
.order_by()
.values("good_id")
.annotate(c=Count("*"))
.values("c")
)
return self.annotate(completed_orders_count=Coalesce(subquery, 0))
This method counts all existing orders for a good, but it works when I call it like this:
Good.objects.completed_orders_count().first().completed_orders_count
To get the correct value of completed orders I tried to add filter is_completed=True. The final version looks like this:
class GoodQuerySet(models.QuerySet):
def completed_orders_count(self):
subquery = Subquery(
Order.objects.filter(good_id=OuterRef("id"), is_completed=True)
.order_by()
.values("good_id")
.annotate(c=Count("*"))
.values("c")
)
return self.annotate(completed_orders_count=Coalesce(subquery, 0))
If I try to call Good.objects.completed_orders_count().first().completed_orders_count I got an error:
django.core.exceptions.FieldError: Expression contains mixed types. You must set output_field.

Alternative way of querying through a models' method field

I have this model about Invoices which has a property method which refers to another model in order to get the cancelation date of the invoice, like so:
class Invoice(models.Model):
# (...)
#property
def cancel_date(self):
if self.canceled:
return self.records.filter(change_type = 'cancel').first().date
else:
return None
And in one of my views, i need to query every invoice that has been canceled after max_date or hasn't been canceled at all.
Like so:
def ExampleView(request):
# (...)
qs = Invoice.objects
if r.get('maxDate'):
max_date = datetime.strptime(r.get('maxDate'), r'%Y-%m-%d')
ids = list(map(lambda i: i.pk, filter(lambda i: (i.cancel_date == None) or (i.cancel_date > max_date), qs)))
qs = qs.filter(pk__in = ids) #Error -> django.db.utils.OperationalError: too many SQL variables
However, ids might give me a huge list of ids which causes the error too many SQL variables.
What's the smartest approach here?
EDIT:
I'm looking for a solution that does not involve adding cancel_date as a model field since invoice.records refers to another model where we store every date attribute of the invoice
Like so:
class InvoiceRecord(models.Model):
invoice = models.ForeignKey(Invoice, related_name = 'records', on_delete = models.CASCADE)
date = models.DateTimeField(default = timezone.now)
change_type = models.CharField(max_length = 32) # Multiple choices field
And every invoice might have more than one same date attribute. For example, one invoice might have two cancelation dates
You can annotate a Subquery() expression [Django docs] which will give you the date to do this:
from django.db.models import OuterRef, Q, Subquery
def ExampleView(request):
# (...)
qs = Invoice.objects.annotate(
cancel_date=Subquery(
InvoiceRecords.objects.filter(invoice=OuterRef("pk")).values('date')[:1]
)
)
if r.get('maxDate'):
max_date = datetime.strptime(r.get('maxDate'), r'%Y-%m-%d')
qs = qs.filter(Q(cancel_date__isnull=True) | Q(cancel_date__gt=max_date))
I would set cancel_date as database field when you set cancel flag. Then you can use single query:
qs = Invoice.objects.filter(Q(cancel_date__isnull=True) | Q(cancel_date__gt=max_date))
It's say cancel_date is NULL or greater than max_date
Not sure about your property cancel_date. It will return first record with change_type='cancel' which can be (don't know your code flow) other record then you call that property on.

Django reverse lookup to get latest

I have a model structure as below,
ACTIVE_STATUS = ['waiting', 'loading', 'starting', 'running', 'stopping']
INACTIVE_STATUS = ['stopped', 'finished', 'failed', 'lost']
ALL_STATUS = ACTIVE_STATUS + INACTIVE_STATUS
class Task(models.Model):
name = models.CharField(max_length=20)
class Job(models.Model):
task = models.ForeignKey(Task, related_name='jobs')
timestamp = models.DateTimeField(auto_now_add=True)
status = models.CharField(choices=zip(ALL_STATUS, ALL_STATUS), max_length=20)
How can I annotate the "latest timestamp and its status" into Task queryset?
I have managed to obtain the latest timestamp by,
Task.objects.annotate(latest_ts=models.Max(models.F('job__timestamp')))
So, how can I get the corresponding status?
Update-1
The utmost aim of this query is to sort the Task queryset in
with zero Jobs ( say Task.objects.filter(job__isnull=True) )
latest_job=='running'
Update-2
TaskManager class that used to obtain the sorted queryset
class TaskManager(models.Manager):
def get_queryset(self):
qs = super().get_queryset()
latest_job = models.Max(models.F('job__timestamp'))
latest_status = models.Subquery(
Job.objects.filter(
task_id=models.OuterRef('pk')
).values('status').order_by('-timestamp')[:1]
)
qs_order = models.Case(
models.When(job__isnull=True, then=models.Value(2)),
models.When(latest_status='running', then=models.Value(1)),
default=models.Value(0),
output_field=models.IntegerField()
)
return qs.annotate(latest_job=latest_job, latest_status=latest_status, qs_order=qs_order).order_by('-qs_order')
You can work with a Subquery expression [Django-doc]:
from django.db.models import OuterRef, Subquery
Task.objects.annotate(
latest_status=Subquery(
Job.objects.filter(
task_id=OuterRef('pk')
).values('status').order_by('-timestamp')[:1]
)
)
Based on this, you can probably also filter on the latest status:
from django.db.models import Q
from django.db.models import OuterRef, Subquery
Task.objects.annotate(
latest_status=Subquery(
Job.objects.filter(
task_id=OuterRef('pk')
).values('status').order_by('-timestamp')[:1]
)
).filter(
Q(jobs=None) | Q(latest_status='running')
)
or we can order by the existance of a Job, etc. with:
from django.db.models import BooleanField, Exists, ExpressionWrapper, Max, Q
from django.db.models import OuterRef, Subquery
Task.objects.annotate(
latest_status=Subquery(
Job.objects.filter(
task_id=OuterRef('pk')
).values('status').order_by('-timestamp')[:1]
),
latest_job=Max('jobs__timestamp')
).order_by(
Exists(Job.objects.filter(task_id=OuterRef('pk'))).asc(),
ExpressionWrapper(Q(latest_status='running'), output_field=BooleanField()).asc(),
'pk'
)
It might be a good idea to filter eventually on the primary key to make the ordering deterministic.
Willem's answer looks promising what I want, but, I have managed to obtain the ordering by annotating the count of jobs.
This is the model manager at the end,
class TaskManager(models.Manager):
def get_queryset(self):
qs = super().get_queryset()
latest_job = models.Max(models.F('jobs__timestamp'))
latest_status = models.Subquery(
Job.objects.filter(
task_id=models.OuterRef('pk')
).values('status').order_by('-timestamp')[:1]
)
job_count = models.Count('jobs')
qs_order = models.Case(
models.When(job_count=0, then=models.Value(2)),
models.When(latest_status='running', then=models.Value(1)),
default=models.Value(0),
output_field=models.IntegerField()
)
return qs.annotate(job_count=job_count,
latest_job=latest_job,
latest_status=latest_status,
qs_order=qs_order
).order_by('-qs_order', '-pk')
Result screenshot

Django - Select MAX of field of related query set

Say if I have models:
class User(models.Model):
name = ...
dob = ...
class Event(models.Model):
user = models.ForeignKey(User, ...)
timestamp = models.DateTimeField()
And I want to query all Users and annotate with both Count of events and MAX of Events.timestamp
I know for count I can do:
Users.objects.all().annotate(event_count=models.Count('event_set'))
But how do I do max of a related queryset field? I want it to be a single query, like:
SELECT Users.*, MAX(Events.timestamp), COUNT(Events)
FROM Users JOIN Events on Users.id = Events.user_id
You could use Query Expressions to achieve that. You might have to play around with the foreign key related name depending on your code, but it would result in something looking like this:
from django.db.models import Count, F, Func,
Users.objects.all().annotate(
event_count=Count('event_set'),
max_timestamp=Func(F('event_set__timestamp'), function='MAX')
)
You can try an aggregate query for this as follows:
from django.db.models import Max
Users.objects.all().aggregate(Max('Events.timestamp'))
See details for the above code in Django documentation here

Distinct in many-to-many relation

class Order(models.Model):
...
class OrderItem(models.Model)
order = models.ForeignKey(Order)
product = models.ForeignKey(Product)
quantity = models.PositiveIntegerField()
What I need to do is to get the Order(s) which has only one order item. How can I write it using the QuerySet objects (without writing SQL)?
The easiest way to do this would be to use the Count aggregation:
from django.db.models import Count
Order.objects.annotate(count = Count('orderitem__id')).filter(count = 1)

Categories