I am trying to build a complex(for me) query for one of my projects. Django version is 1.11.4 and PostgreSQL version is 9.6.
Here are the models.
class Event(models.Model):
...
name = models.CharField(max_length=256)
classification = models.ForeignKey("events.Classification", related_name="events", null=True, blank=True)
...
class Classification(models.Model):
...
segment = models.ForeignKey("events.ClassificationSegment", related_name="classifications", blank=True, null=True)
...
class ClassificationSegment(models.Model):
...
name = models.CharField(max_length=256)
...
I blocked somewhere here and can't go ahead.
from django.db.models import CharField, Value as V
from django.db.models.functions import Concat
from django.contrib.postgres.aggregates import ArrayAgg
from django.db.models import OuterRef, Subquery
import events.models
event_subquery = events.models.Event.objects.filter(classification__segment=OuterRef('pk')) \
.annotate(event=Concat(V('{id:'), 'id', V(', name:"'), 'name', V('"}'), output_field=CharField()))
final_list = events.models.ClassificationSegment.objects.annotate(
event_list=ArrayAgg(Subquery(event_subquery.values('event')[:6])))
I have a raw query. Here it is.
final_events = events.models.ClassificationSegment.objects.raw('SELECT "events_classificationsegment"."id", "events_classificationsegment"."name", (SELECT ARRAY(SELECT CONCAT(\'{id:\', CONCAT(U0."id", CONCAT(\',\', \'name:"\', U0."name", \'"}\'))) AS "event" FROM "events_event" U0 INNER JOIN "events_classification" U1 ON (U0."classification_id" = U1."id") WHERE U1."segment_id" = ("events_classificationsegment"."id") LIMIT 6)) AS "event_list" FROM "events_classificationsegment"')
You can see the result in the screenshot. I guess I am on the right way. Can anyone help me?
Thanks.
Postgres has a really nice way of making an array from a subquery:
SELECT foo.id, ARRAY(SELECT bar FROM baz WHERE foo_id = foo.id) AS bars
FROM foo
To do this within the ORM, you can define a subclass of Subquery:
class Array(Subquery):
template = 'ARRAY(%(subquery)s)'
and use this in your queryset:
queryset = ClassificationSegment.objects.annotate(
event_list=Array(event_subquery.values('event')[:6])
)
Related
my model looks like this
class Model(models.Model):
user_id = models.ForeignKey()
date = models.DateField()
field1 = models.FloatField()
field2 = models.FloatField()
I have a below queryset
queryset = Model.objects.filter(user_id__exact=5) \
.annotate(weekstartdate=Trunc('date', 'week')) \
.values('weekstartdate') \
.annotate(avg_field1=Avg('field1')) \
.annotate(avg_field2=Avg('field2')) \
.order_by('-weekstartdate')
which is working perfectly. now I want to add weekenddate field to above queryset which has a date = weekstartdate + 6 days. I have added below line to above query
.annotate(weekenddate=Trunc('date', 'week') + timedelta(days=7), output_field=DateField())
but it is complaining :-
TypeError: QuerySet.annotate() received non-expression(s):
<django.db.models.fields.DateField>
Relative imports
from django.db.models import Avg, Q
from django.db.models.functions import Trunc
from django.db.models import DateTimeField, ExpressionWrapper, F, DateField
Note :-
Simple for loop after queryset is not i am looking for because after assigning manually a field, queryset filter is still fetching query from old queryset due to laziness of the queryset.
If answer can be in relativedelta of dateutil library it would be much better.
You need to use ExpressionWrapper around it.
YourModel.objects.annotate(
weekenddate=models.ExpressionWrapper(
Trunc('date', 'week') + timedelta(days=7),
output_field=models.DateField(),
),
)
I have a model structure as below,
ACTIVE_STATUS = ['waiting', 'loading', 'starting', 'running', 'stopping']
INACTIVE_STATUS = ['stopped', 'finished', 'failed', 'lost']
ALL_STATUS = ACTIVE_STATUS + INACTIVE_STATUS
class Task(models.Model):
name = models.CharField(max_length=20)
class Job(models.Model):
task = models.ForeignKey(Task, related_name='jobs')
timestamp = models.DateTimeField(auto_now_add=True)
status = models.CharField(choices=zip(ALL_STATUS, ALL_STATUS), max_length=20)
How can I annotate the "latest timestamp and its status" into Task queryset?
I have managed to obtain the latest timestamp by,
Task.objects.annotate(latest_ts=models.Max(models.F('job__timestamp')))
So, how can I get the corresponding status?
Update-1
The utmost aim of this query is to sort the Task queryset in
with zero Jobs ( say Task.objects.filter(job__isnull=True) )
latest_job=='running'
Update-2
TaskManager class that used to obtain the sorted queryset
class TaskManager(models.Manager):
def get_queryset(self):
qs = super().get_queryset()
latest_job = models.Max(models.F('job__timestamp'))
latest_status = models.Subquery(
Job.objects.filter(
task_id=models.OuterRef('pk')
).values('status').order_by('-timestamp')[:1]
)
qs_order = models.Case(
models.When(job__isnull=True, then=models.Value(2)),
models.When(latest_status='running', then=models.Value(1)),
default=models.Value(0),
output_field=models.IntegerField()
)
return qs.annotate(latest_job=latest_job, latest_status=latest_status, qs_order=qs_order).order_by('-qs_order')
You can work with a Subquery expression [Django-doc]:
from django.db.models import OuterRef, Subquery
Task.objects.annotate(
latest_status=Subquery(
Job.objects.filter(
task_id=OuterRef('pk')
).values('status').order_by('-timestamp')[:1]
)
)
Based on this, you can probably also filter on the latest status:
from django.db.models import Q
from django.db.models import OuterRef, Subquery
Task.objects.annotate(
latest_status=Subquery(
Job.objects.filter(
task_id=OuterRef('pk')
).values('status').order_by('-timestamp')[:1]
)
).filter(
Q(jobs=None) | Q(latest_status='running')
)
or we can order by the existance of a Job, etc. with:
from django.db.models import BooleanField, Exists, ExpressionWrapper, Max, Q
from django.db.models import OuterRef, Subquery
Task.objects.annotate(
latest_status=Subquery(
Job.objects.filter(
task_id=OuterRef('pk')
).values('status').order_by('-timestamp')[:1]
),
latest_job=Max('jobs__timestamp')
).order_by(
Exists(Job.objects.filter(task_id=OuterRef('pk'))).asc(),
ExpressionWrapper(Q(latest_status='running'), output_field=BooleanField()).asc(),
'pk'
)
It might be a good idea to filter eventually on the primary key to make the ordering deterministic.
Willem's answer looks promising what I want, but, I have managed to obtain the ordering by annotating the count of jobs.
This is the model manager at the end,
class TaskManager(models.Manager):
def get_queryset(self):
qs = super().get_queryset()
latest_job = models.Max(models.F('jobs__timestamp'))
latest_status = models.Subquery(
Job.objects.filter(
task_id=models.OuterRef('pk')
).values('status').order_by('-timestamp')[:1]
)
job_count = models.Count('jobs')
qs_order = models.Case(
models.When(job_count=0, then=models.Value(2)),
models.When(latest_status='running', then=models.Value(1)),
default=models.Value(0),
output_field=models.IntegerField()
)
return qs.annotate(job_count=job_count,
latest_job=latest_job,
latest_status=latest_status,
qs_order=qs_order
).order_by('-qs_order', '-pk')
Result screenshot
Env:
python >= 3.6
Django >=2.1.0
Models example:
class A(models.Model):
id = models.AutoField(primary_key=True)
a_field = models.CharField(max_length=256, default="example")
class B(models.Model):
id = models.AutoField(primary_key=True)
b_field = models.CharField(max_length=256, default="example")
a = models.ForeignKey(A, related_name="b", on_delete=models.CASCADE)
Question:
How I can fetch only required fields from related models using select_related()
How it could be done with prefetch_related:
from django.db.models import Prefetch
prefetch_obj = Prefetch("a", queryset=A.objects.only("id", "a_field"))
B.objects.only("id", "b_field", "a").prefetch_related(prefetch_obj)
But it produces 2 request to DB due to prefetch_related using.
SELECT `main_b`.`id`, `main_b`.`b_field`, `main_b`.`a_id` FROM `main_b` LIMIT 21; args=()
SELECT `main_a`.`id`, `main_a`.`a_field` FROM `main_a` WHERE `main_a`.`id` IN (1); args=(1,)
If I use select_related it makes 1 DB call, but fetches all fields from A model:
models.B.objects.only("id", "b_field", "a").select_related("a")
SELECT `main_b`.`id`, `main_b`.`b_field`, `main_b`.`a_id`, `main_a`.`id`, `main_a`.`a_field`, `main_a`.`a_not_required_field` FROM `main_b` INNER JOIN `main_a` ON (`main_b`.`a_id` = `main_a`.`id`) LIMIT 21; args=()
You can use .select_related('related_obj').only('related_obj__field'). Please see the docs on Only.
models.py
from django.db import models
class UserGroup(models.Model):
members = models.ManyToManyField(User, related_name='members', through='UserGroupMember')
class UserGroupMember(models.Model):
user = models.ForeignKey(User)
usergroup = models.ForeignKey(UserGroup)
class Cohort(models.Model):
user_groups = models.ManyToManyField(UserGroup)
class Team(models.Model):
cohort = models.ForeignKey(Cohort)
members = models.ManyToManyField(User, related_name='team_members', through='TeamMembers', blank=True)
class TeamMembers(models.Model):
team = models.ForeignKey(Team)
user = models.ForeignKey(User)
Single user can be part of only one team within a cohort.
I want to annotate the new field (boolean) which tells you is the user assigned to some team in the cohort, something like:
User.objects.filter(
members__cohort=cohort
).annotate(
is_team_member=...
)
I am using Python 2.7.13 and Django 1.9.8. Thanks.
I managed to solve the problem by doing the join to the Cohort model and using conditional expressions:
from django.db.models import Case, When, Value, IntegerField
users = User.objects.filter(
members__cohort=cohort
).annotate(
is_team_member=Case(When(team_members__cohort=cohort, then=Value(1)), default=0, output_field=IntegerField())
)
Now I can easily filter, for example, users who are part of some team:
users.filter(is_team_member=1)
29 Dec: updated models
I have got three models as follows:
class Job(models.Model):
job_number = models.CharField(max_length=20, primary_key=True)
class Project(models.Model):
job = models.ForeignKey(Job, null=True) # updated (null=True)***
source = models.ForeignKey(Source) # added***
class Task(models.Model):
project = models.ForeignKey(Project)
class Source(models.Model): # added***
blahblah...
And I would like to get the job number for a task. Something like below:
job = Job.objects.all().select_related()
jobno = job[0].project.job.job_number
I'm not sure how many times the query above will hit the DB. But I guess it will be more than twice, won't it?
select_related can only pre-cache the foreign key for 2 tables to my understanding. Any one can suggest the best practice in this case to reduce the number of times hitting the DB?
select_related() joins all these three models in one query:
>>> from app.models import Task
>>> task = Task.objects.all().select_related()[0]
>>> task.project.job.job_number
u'123'
>>> from django.db import connection
>>> len(connection.queries)
1
>>> connection.queries
[{u'time': u'0.002', u'sql': u'QUERY = u\'SELECT "app_task"."id", "app_task"."project_id", "app_project"."id", "app_project"."job_id", "app_job"."job_number" FROM "app_task" INNER JOIN "app_project" ON ( "app_task"."project_id" = "app_project"."id" ) INNER JOIN "app_job" ON ( "app_project"."job_id" = "app_job"."job_number" ) LIMIT 1\' - PARAMS = ()'}]
>>>
Readable SQL:
SELECT "app_task"."id", "app_task"."project_id", "app_project"."id",
"app_project"."job_id", "app_job"."job_number"
FROM "app_task"
INNER JOIN "app_project" ON ( "app_task"."project_id" = "app_project"."id" )
INNER JOIN "app_job" ON ( "app_project"."job_id" = "app_job"."job_number" )
You can use a filter:
task = Task.objects.all().select_related().filter(
project__id__isnull=False,
job__id__isnull=False)