Convert django RawQuerySet to Queryset - python

I have 2 Django models, ModelA with an ArrayField that is used to store a large list of primary key values (possibly 50k+ list)
class ModelA(models.Model):
pk_values = ArrayField(models.IntegerField())
class CustomManager(manager.Manager):
def get_for_index(self, index_id):
qs = self.get_queryset()
obj = ModelA.objects.get(pk=index_id)
return qs.filter(id__in=obj.pk_values)
class ModelB(models.Model):
# [...] some fields
objects = CustomManager()
This works:
qs = ModelB.objects.get_for_index(index_id=1)
However, this would be super slow where "pk_values" is a large list.
So I tried doing raw SQL queries:
class CustomManager(manager.Manager):
def get_for_index(self, index_id):
qs = self.get_queryset()
sql = "SELECT * FROM myapp_model_b JOIN myapp_model_a ON myapp_model_b.id = ANY(myapp_model_a.pk_values) WHERE myapp_model_a.id = '%s'" % index_id
return qs.raw(sql)
But this returns a django.db.models.query.RawQuerySet instance.
But with this, I cant do things like queryset.values() afterwards.
How can I convert this to a normal Django queryset?
Is there a better way of doing this?
Docs:
ArrayField https://docs.djangoproject.com/en/2.0/ref/contrib/postgres/fields/#arrayfield
Custom Manager https://docs.djangoproject.com/en/2.0/topics/db/managers/#custom-managers-and-model-inheritance
Raw queries https://docs.djangoproject.com/en/2.0/topics/db/sql/#performing-raw-sql-queries

You can use a RawSQL expression:
ModelB.objects.filter(id__in=RawSQL(
'SELECT unnest(a.pk_values) FROM app_modela a WHERE a.id = %s',
[index_id]
))
Alternatively you can reproduce the exact query you have in your question with extra():
ModelB.objects.extra(
tables=['foo_modela'],
where=[
'"app_modelb"."id" = ANY("app_modela"."pk_values")',
'"app_modela"."id" = %s',
],
params=[index_id],
)

Update: I got something working using .extra()
class CustomManager(manager.Manager):
def get_for_index(self, index_id):
qs = self.get_queryset()
sql = "myapp_model_b.id IN (SELECT UNNEST(myapp_model_a.pk_values) FROM myapp_model_a WHERE myapp_model_a.id='%s')" % index_id
return qs.extra(where=[sql])
Docs: https://docs.djangoproject.com/en/2.0/ref/models/querysets/#django.db.models.query.QuerySet.extra

Related

Alternative way of querying through a models' method field

I have this model about Invoices which has a property method which refers to another model in order to get the cancelation date of the invoice, like so:
class Invoice(models.Model):
# (...)
#property
def cancel_date(self):
if self.canceled:
return self.records.filter(change_type = 'cancel').first().date
else:
return None
And in one of my views, i need to query every invoice that has been canceled after max_date or hasn't been canceled at all.
Like so:
def ExampleView(request):
# (...)
qs = Invoice.objects
if r.get('maxDate'):
max_date = datetime.strptime(r.get('maxDate'), r'%Y-%m-%d')
ids = list(map(lambda i: i.pk, filter(lambda i: (i.cancel_date == None) or (i.cancel_date > max_date), qs)))
qs = qs.filter(pk__in = ids) #Error -> django.db.utils.OperationalError: too many SQL variables
However, ids might give me a huge list of ids which causes the error too many SQL variables.
What's the smartest approach here?
EDIT:
I'm looking for a solution that does not involve adding cancel_date as a model field since invoice.records refers to another model where we store every date attribute of the invoice
Like so:
class InvoiceRecord(models.Model):
invoice = models.ForeignKey(Invoice, related_name = 'records', on_delete = models.CASCADE)
date = models.DateTimeField(default = timezone.now)
change_type = models.CharField(max_length = 32) # Multiple choices field
And every invoice might have more than one same date attribute. For example, one invoice might have two cancelation dates
You can annotate a Subquery() expression [Django docs] which will give you the date to do this:
from django.db.models import OuterRef, Q, Subquery
def ExampleView(request):
# (...)
qs = Invoice.objects.annotate(
cancel_date=Subquery(
InvoiceRecords.objects.filter(invoice=OuterRef("pk")).values('date')[:1]
)
)
if r.get('maxDate'):
max_date = datetime.strptime(r.get('maxDate'), r'%Y-%m-%d')
qs = qs.filter(Q(cancel_date__isnull=True) | Q(cancel_date__gt=max_date))
I would set cancel_date as database field when you set cancel flag. Then you can use single query:
qs = Invoice.objects.filter(Q(cancel_date__isnull=True) | Q(cancel_date__gt=max_date))
It's say cancel_date is NULL or greater than max_date
Not sure about your property cancel_date. It will return first record with change_type='cancel' which can be (don't know your code flow) other record then you call that property on.

Peewee query with join doesn't work as expected

I'm new to peewee and currently trying to migrate from normal Python SQlite3 library.
While my code generate a valid SQL query that return result as expected using a SQlite DB browser, trying to get the value of a field return AttributeError: x object has no attribute y.
Model:
class TableShows(BaseModel):
sonarr_series_id = IntegerField(column_name='sonarrSeriesId', unique=True)
title = TextField()
class Meta:
table_name = 'table_shows'
class TableHistory(BaseModel):
sonarr_series_id = ForeignKeyField(TableShows, field='sonarr_series_id', column_name='sonarrSeriesId')
class Meta:
table_name = 'table_history'
Peewee Query:
data = TableHistory.select(
TableShows.title,
TableHistory.sonarr_series_id
).join(
TableShows
).order_by(
TableShows.title.asc()
)
Resulting SQL query:
SELECT "t1"."title", "t2"."sonarrSeriesId"
FROM "table_history" AS "t2"
INNER JOIN "table_shows" AS "t1" ON ("t2"."sonarrSeriesId" = "t1"."sonarrSeriesId")
ORDER BY "t1"."title" ASC
Resulting dicts():
{'title': u'Test title', 'sonarr_series_id': 1}
Why does running this:
for item in data:
print item.title
Return this:
AttributeError: 'TableHistory' object has no attribute 'title'
http://docs.peewee-orm.com/en/latest/peewee/relationships.html#selecting-from-multiple-sources
You access the data via item.sonarr_series_id.title
You might consider naming your fields something a bit more pythonic.

How do I write a Django query that uses a complex "on" clause in its inner join?

I'm using Django, Python 3.7, and PostgreSQL 9.5. I have these models:
class Article(models.Model):
...
label = models.TextField(default='', null=True)
class Label(models.Model):
name = models.CharField(max_length=200)
I want to write a Django query that retrieves all the articles whose label contains a name from the Labels table. In PostGres, I can structure my query like so:
select a.* from myapp_article a join myapp_label l on a.label ilike '%' || l.name || '%';
but I have no idea how to pull this off in Django on account of the "on" clause and "ilike". How do I pull this off?
If you've to do a case insensitive search on Article's label for matching names, then you can use regex and pass it a flat list of all the label names like so:
Article.objects.filter(label__iregex=r'(' + '|'.join(Label.objects.all().values_list('name', flat=True)) + ')')
What the above query does is, it makes a flat list of labels:
['label1' , 'label2', 'label3']
and then the string is joined like this:
'(label1|label2|label3)'
and a similar SQL query is used:
SELECT * from FROM "app_article" WHERE "app_article"."label" ~* (label1|label2|label3)
Otherwise, for case sensitive approach, you can use this:
names_list = Label.objects.all().values_list('name', flat=True)
Article.objects.filter(label__in=names_list)
This wouldn't translate into same SQL query, but would yield the same results, using an inner query.
inner_query = Label.objects.annotate(article_label=OuterRef('label')).filter(article_label__icontains=F('name'))
articles = Article.objects.annotate(labels=Subquery(inner_query.values('name')[:1])).filter(labels__isnull=False)
This should roughly should translate to this:
select a.* from myapp_article a where exists (select l.* from myapp_label l where a.label ilike '%' || l.name || '%')
But due to a current issue in Django regarding using OuterRef's in annotations, this approach doesn't work. We need to use a workaround suggested here until the issue is fixed to make this query work, like this:
Define a custom expression first
class RawCol(Expression):
def __init__(self, model, field_name):
field = model._meta.get_field(field_name)
self.table = model._meta.db_table
self.column = field.column
super().__init__(output_field=CharField())
def as_sql(self, compiler, connection):
sql = f'"{self.table}"."{self.column}"'
return sql, []
Then build your query using this expression
articles = Article.objects.all().annotate(
labels=Subquery(
Label.objects.all().annotate(
article_label=RawCol(Article, 'label')
).filter(article_label__icontains=F('name')).values('name')[:1]
)
).filter(labels__isnull=False)
This should return instances of Article model whose label field contain a value from the name field of Label model
In your class Article you will have to declare label as foreignkey to class Label
class Article(models.Model):
...
label = models.ForeignKey(Label, default='', on_delete=models.CASCADE)
And then you can access it.

Django - Multi filtering queryset return empty queryset

I have a problem with queryset in Django 2.0, after some research, I don't find any problem looks like mine.
I think it's because of my very old legacy database create by someone I didn't know.
So, I have a sqlite database who looks like this:
Has you can see, the Table Properties don't have primary_key, so i made a models with django inspectdb command who looks like this:
from django.db import models
class Record(models.Model):
id = models.IntegerField(db_column='ID', primary_key=True)
class Meta:
db_table = 'Records'
def __str__(self):
return "%s" % self.id
class Propertie(models.Model):
id = models.ForeignKey(Record, models.DO_NOTHING, db_column='ID', primary_key=True)
item = models.CharField(db_column='Item', max_length=500)
value = models.CharField(db_column='Value', max_length=500)
class Meta:
db_table = 'Properties'
def __str__(self):
return '[%s]- %s -> %s' % (self.item, self.value, self.id)
I set Properties.id as primary_key but it's a ForeignKey and Django say to set this field as OneToOneField and it's normal and logic, but 1 Record is linked to 9 Properties so Porpertie.id can't be unique this is my first problem because I can't alter the database.
My second and real problem is when I run this query:
def my_view(request):
epoch = datetime.date(1970, 1, 1)
period_from = stat_form.cleaned_data.get("period_from")
period_to = stat_form.cleaned_data.get("period_to")
product = stat_form.cleaned_data.get("kit")
timestamp_from = period_from - epoch
timestamp_to = period_to - epoch
records = Record.objects.using("statool").filter(
propertie__item="product",
propertie__value=product,
).filter(
propertie__item="stamp",
propertie__value__gt=str(int(timestamp_from.total_seconds())),
propertie__value__lt=str(int(timestamp_to.total_seconds())),
).count()
this QuerySet is empty but it should return approximately 16XXX Record
I don't know what happens?
Because if I do this query:
records = Record.objects.using("statool").filter(
propertie__item="product",
propertie__value=product,
)
It returns a result but the second filter doesn't work ...
The goal of those request is to get the Record out with the specific date and product name.
the 9 possibilities of item field in Properties can be:
product
version
tool
stamp
user
host
site
project
args
A future query with the same logic will be applied just after to get version by product and by site.
Thank you for your help!
And sorry for my bad English :)
To answer my problem,
first i have stoped to try user multi .filter because when i run:
records = Record.objects.using("statool").filter(
propertie__item="product",
propertie__value=product,
).filter(
propertie__item="stamp",
propertie__value__gt=str(int(timestamp_from.total_seconds())),
propertie__value__lt=str(int(timestamp_to.total_seconds())),
).count()
After the first .filterRecord objects lost reference to propertie_set so i can't filter by propertie.
As say #ukemi and #Ralf, using:
.filter(
propertie__item="stamp",
propertie__value__gt=str(int(timestamp_from.total_seconds())),
propertie__value__lt=str(int(timestamp_to.total_seconds())),
)
is a really bad idea to have exact query.
So this is my solution:
def select_stats(request):
epoch = datetime.date(1970, 1, 1)
period_from = stat_form.cleaned_data.get("period_from")
period_to = stat_form.cleaned_data.get("period_to")
product = stat_form.cleaned_data.get("kit")
timestamp_from = period_from - epoch
timestamp_to = period_to - epoch
timestamp_from = int(timestamp_from.total_seconds())
timestamp_to = int(timestamp_to.total_seconds())
all_product = Propertie.objects.using("statool").filter(
item="product",
value=product
).values_list("id", flat=True)
all_stamp = Propertie.objects.using("statool").annotate(
date=Cast("value", IntegerField())
).filter(
date__gte=timestamp_from,
date__lt=timestamp_to
).values_list("id", flat=True)
all_records = Record.objects.using("statool").filter(
id__in=all_product.intersection(all_stamp)
)
all_recorded_propertie = Propertie.objects.using("statool").filter(id__in=all_records)
all_version = all_recorded_propertie.filter(
id__in=all_records,
item="version"
).values_list("value", flat=True).distinct()
all_site = all_recorded_propertie.filter(
id__in=all_records,
item="site"
).values_list("value", flat=True).distinct()
stats_site = {}
for version in all_version:
stats_site[version] = {}
id_version = all_recorded_propertie.filter(
item="version",
value=version
).values_list("id", flat=True)
for site in all_site:
id_site = all_recorded_propertie.filter(
item="site",
value=site
).values_list("id", flat=True)
stats_site[version][site] = id_version.intersection(id_site).count()
To solve timestamp problem by this way:
all_stamp = Propertie.objects.using("statool").annotate(
date=Cast("value", IntegerField())
).filter(
date__gte=timestamp_from,
date__lt=timestamp_to
).values_list("id", flat=True)
Thank's to #erikreed from this thread: Django QuerySet Cast
By the way, this is the most efficient way i've found to do my job.
But if we run this view we have this runtime:
view query runtime
As you can see, every QuerySet are very fast, but intersections between version.id and site.id are very long (more than 2 minutes).
If someone know a better way to do those query, just let us know :)
Hope I help someone.

Django queryset filter model attribute against other model attribute

I don't know if I made myself clear with this question title, but, heres my problem:
I have this model, which is just a transactional model:
class InstanceItemEvaluation(models.Model):
instance = models.ForeignKey(Instance)
item = models.ForeignKey(InstanceItem)
user = models.ForeignKey(User)
factor = models.ForeignKey(Factor)
measure = models.ForeignKey(Measure)
measure_value = models.ForeignKey(MeasureValue, null=True, blank=True)
evaluated_at = models.DateTimeField(null=True, blank=True)
Here is a query I must run to only retrieve valid values from the database:
#staticmethod
def get_user_evaluations_by_instance(user, instance):
qs = InstanceItemEvaluation.objects.filter(
user=user,
instance=instance,
factor__is_active=True,
measure__is_active=True).exclude(
factor__measure=None)
return qs
The query set speaks for itself, I am just filtering the user, and the working instance and so on. This query set output this SQL:
SELECT "workspace_instanceitemevaluation"."id",
"workspace_instanceitemevaluation"."instance_id",
"workspace_instanceitemevaluation"."item_id",
"workspace_instanceitemevaluation"."user_id",
"workspace_instanceitemevaluation"."factor_id",
"workspace_instanceitemevaluation"."measure_id",
"workspace_instanceitemevaluation"."measure_value_id",
"workspace_instanceitemevaluation"."evaluated_at"
FROM "workspace_instanceitemevaluation"
INNER JOIN "measures_measure" ON ( "workspace_instanceitemevaluation"."measure_id" = "measures_measure"."id" )
INNER JOIN "factors_factor" ON ( "workspace_instanceitemevaluation"."factor_id" = "factors_factor"."id" )
WHERE ("measures_measure"."is_active" = True
AND "workspace_instanceitemevaluation"."user_id" = 1
AND "factors_factor"."is_active" = True
AND "workspace_instanceitemevaluation"."instance_id" = 5
AND NOT ("factors_factor"."measure_id" IS NULL));
So far so good. But now I need to put this clause on the query:
AND "factors_factor"."measure_id" = "measures_measure"."id"
Which would mean I am only looking for measure values that are currently associated with my factors. Anyway, I tried to do something like this (look at the last filter):
#staticmethod
def get_user_evaluations_by_instance(user, instance):
qs = InstanceItemEvaluation.objects.filter(
user=user,
instance=instance,
factor__is_active=True,
measure__is_active=True).exclude(
factor__measure=None).filter(
factor__measure=measure)
return qs
But that doesn't even make sense. Now I am kinda stuck, and couldn't find a solution. Of course that's something I can do iterating the result and removing the results I don't need. But I am trying to figure out if it is possible to achieve this SQL query I mentioned using the Django queryset API.
I'm not sure if it will work in this case, but generally you can use F() objects for this.
from django.db.models import F
qs = InstanceItemEvaluation.objects.filter(
user=user,
instance=instance,
factor__is_active=True,
measure__is_active=True).exclude(
factor__measure=None).filter(
factor__measure=F('measure_id'))

Categories