Queryset ordered by must frequent values - python

I'm trying to order a simple queryset by the must frequent value in a column. For exemple, I have those models:
class Keyword(models.Model):
keyword = models.CharField(verbose_name='Keyword', null=False, blank=False, max_length=20)
class BigText(models.Model):
text = models.CharField(verbose_name='Big Text', null=False, blank=False, max_length=1000)
class BigTextKeyword(models.Model):
keyword = models.ForeignKey(Keyword, verbose_name='Keyword', null=False, on_delete=models.CASCADE)
bigtext = models.ForeignKey(BigText, verbose_name='Big Text', null=False, on_delete=models.CASCADE)
Then, I'm searching for the keywords passed on query params and returning the BigTextKeywords result found like this:
class BigTextKeywordViewSet(mixins.RetrieveModelMixin, mixins.ListModelMixin, viewsets.GenericViewSet):
queryset = BigTextKeyword.objects.all()
serializer_class = BigTextKeywordSerializer
def get_queryset(self):
keyword_filter = Q()
search_content = self.request.query_params.get('search_content', '')
for term in search_content.split(' '):
keyword_filter |= Q(keyword__icontains=term)
keywords = Keyword.objects.filter(keyword_filter)
result = self.queryset.filter(keyword__in=keywords)
return result
I want to order the result by the must frequent bigtext field. For example, if a bigtext occurs 3 times on the result, it should appears first than a bigtext that occurs 2 times. With the similar result below:
keyword_id
bigtext_id
15
5
19
5
1
5
15
10
13
10
87
2
19
1

You can use Django's annotate() method to add a computed field to each BigTextKeyword object that represents the frequency count of its associated BigText. Then, you can use the order_by() method to sort the queryset in descending order based on this computed field.
Here’s an example:
from django.db.models import Count
class BigTextKeywordViewSet(mixins.RetrieveModelMixin, mixins.ListModelMixin, viewsets.GenericViewSet):
queryset = BigTextKeyword.objects.all()
serializer_class = BigTextKeywordSerializer
def get_queryset(self):
keyword_filter = Q()
search_content = self.request.query_params.get('search_content', '')
for term in search_content.split(' '):
keyword_filter |= Q(keyword__icontains=term)
keywords = Keyword.objects.filter(keyword_filter)
# Use annotate to add a computed field for the frequency count of each BigText
result = self.queryset.filter(keyword__in=keywords).annotate(
bigtext_count=Count('bigtext')
)
# Order the results based on the frequency count in descending order
result = result.order_by('-bigtext_count')
return result

Related

How to write Django query to grab all objects in descending order according to two number fields?

I'm using Django. i'm trying to write query according to the top rated products. i have product table. as you can see below.
class Product(models.Model):
user = models.ForeignKey(User, verbose_name=_("Owner"), on_delete=models.CASCADE)
name = models.CharField(_("Name"), max_length=150,null=True)
average_rating =models.DecimalField(_("average rating"), max_digits=10, decimal_places=2,null=True,blank=True)
total_reviews = models.IntegerField(_("total reviews "),default=0,null=True,blank=True)
is_remove = models.BooleanField(_("Remove"), default=False)
create_time = models.DateTimeField(_("Create time"), default=timezone.now)
Now i want to get all objects which have highest average rating and total count.
I have tried many things below. but none of them worked.
1 -
def get_all_top_rated_products(self):
query = self.filter(is_remove=False).order_by("total_reviews","average_rating")
print(query)
return query
2
def get_all_top_rated_products(self):
query = self.filter(is_remove=False).aggregate(Max('average_rating'),Max('total_reviews'))
print(query)
return query
You should order in descending order, you can do this by prefixing the fieldname with a minus (-):
def get_all_top_rated_products(self):
return self.filter(is_remove=False).order_by(
'-average_rating', '-total_reviews'
)

how to filtered 2 Q conditions in query django?

so,i have a List of all mobile phones whose brand name is one of the incoming brands. The desired brand names will be entered. The number of entries is unknown and may be empty. If the input is empty, the list of all mobile phones must be returned.
model:
from django.db import models
class Brand(models.Model):
name = models.CharField(max_length=32)
nationality = models.CharField(max_length=32)
def __str__(self):
return self.name
class Mobile(models.Model):
brand = models.ForeignKey(Brand, on_delete=models.CASCADE)
model = models.CharField(max_length=32, default='9T Pro', unique=True)
price = models.PositiveIntegerField(default=2097152)
color = models.CharField(max_length=16, default='Black')
display_size = models.SmallIntegerField(default=4)
is_available = models.BooleanField(default=True)
made_in = models.CharField(max_length=20, default='China')
def __str__(self):
return '{} {}'.format(self.brand.name, self.model)
query:
from django.db.models import F, Q
def some_brand_mobiles(*brand_names):
query = Mobile.objects.filter(Q(brand__name__in=brand_names) | ~Q(brand__name=[]))
return query
If the input is empty, the list of all mobile phones will be returned, but i cant use *brand_names to return the list.
for example
query = Mobile.objects.filter(Q(brand_name_in=['Apple', 'Xiaomi']))
return query
and
query = Mobile.objects.filter(~Q(brand__name=[]))
return query
Both of these conditions work by example alone, but it does not check both conditions with the function I wrote.
how to fix it ?
It is simpler to just check the list of brand_names and filter if it contains at least one element:
def some_brand_mobiles(*brand_names):
if brand_names:
return Mobile.objects.filter(brand__name__in=brand_names)
else:
return Mobile.objects.all()
Try this solution:
def some_brand_mobiles(*brand_names):
queryset = Mobile.objects.all()
if brand_names:
queryset = queryset.filter(brand__name__in=brand_names)
return queryset
In that way you can add more filters based on any other condition over the queryset.

How to order queryset based on best match in django-rest-framework?

I am trying to order results of a query with parameters by number of matches.
For example, let's say we have a Model:
class Template(models.Model):
headline = CharField(max_length=300)
text = TextField()
image_text = TextField(max_length=500, blank=True, null=True)
tags = TaggableManager(through=TaggedItem)
...
With a Serializer:
class TemplateSerializer(serializers.HyperlinkedModelSerializer):
class Meta:
model = Template
fields = (...)
And a ViewSet:
class TemplateViewSet(viewsets.ModelViewSet):
"""
API endpoint that allows Templates to be viewed or edited.
"""
queryset = Template.objects.all()
serializer_class = TemplateSerializer
def get_queryset(self):
queryset = Template.objects.all()
tags = self.request.query_params.getlist('tags', None)
search_text = self.request.query_params.getlist('search_text', None)
if tags is not None:
queries = [Q(groost_tags__name__iexact=tag) for tag in tags]
query = queries.pop()
for item in queries:
query |= item
queryset = queryset.filter(query).distinct()
if search_tags is not None:
queries = [Q(image_text__icontains=string) |
Q(text__icontains=string) |
Q(headline__icontains=string) for string in search_tags]
query = queries.pop()
for item in queries:
query |= item
queryset = queryset.filter(query).distinct()
What I need to do is count every match the filter finds and then order the queryset by that number of matches for each template. For example:
I want to find all the templates that have "hello" and "world" strings in their text, image_text or headline. So I set the query parameter "search_text" to hello,world. Template with headline="World" and text="Hello, everyone." would have 2 matches. Another one with headline="Hello" would have 1 match. The template with 2 matches would be the first in the queryset. The same behaviour should work for tags and tags with search_text combined.
I tried to calculate these numbers right in the ViewSet and then return a sorted(queryset, key=attrgetter('matches')) but encountered several issues with the DRF, like Template has no attribute 'matches'. Or 404 when directly accessing a Template instance through API.
Any ideas?
Give a try to annotation where each matching pair returns 1 or 0 that are summarized into rank:
from django.db.models import Avg, Case, F, FloatField, Value, When
Template.objects.annotate(
k1=Case(
When(image_text__icontains=string, then=Value(1.0)),
default=Value(0.0),
output_field=FloatField(),
),
k2=Case(
When(text__icontains=string, then=Value(1.0)),
default=Value(0.0),
output_field=FloatField(),
),
k3=Case(
When(headline__icontains=string, then=Value(1.0)),
default=Value(0.0),
output_field=FloatField(),
),
rank=F("k1") + F("k2") + F("k3"),
).order_by("-rank")

convert field to integer before using in admin, ordering by that field

At the moment, in my admin, i am ordering YelpCompanys by annual_revenue. Some of the annual revenues contain characters that are not numbers. I have a function on the model that converts annual_revenues to integers. How do I use this function in my ordering function in the admin? Any help is appreciated -- thanks in advance. Here is my code:
models.py
class YelpCompany(models.Model):
title = models.CharField(max_length=255)
url = models.URLField(max_length=255, unique=True)
messaged = models.BooleanField(default=False)
date_created = models.DateTimeField(auto_now_add=True)
city = models.CharField(max_length=255, blank=True, null=True)
company_type = models.CharField(max_length=255,blank=True,null=True)
not_available = models.BooleanField(default=False)
annual_revenue = models.CharField(max_length=255, blank=True,null=True)
def __str__(self):
return self.title
def revenue_to_int(self):
try:
return int(self.annual_revenue)
except Exception as e:
if 'less than' in self.revenue:
return self.revenue.split('less than ')[1].replace('$','').strip()
elif 'million' in self.revenue:
return self.revenue.split('to')[0].replace('$','').strip()
else:
return 0
admin.py
#admin.register(YelpCompany)
class YelpCompanyAdmin(admin.ModelAdmin):
def get_queryset(self, request):
qs = super().get_queryset(request)
return qs.filter(messaged=False,not_available=False)
list_display = ('title','url','messaged','city','annual_revenue','not_available')
ordering = ('annual_revenue',)
#Arakkl_Abu answer is correct. I played around a bit and tried rebuilding your python method with database functions. It yould be something similar to this:
#admin.py
from django.db.models import Case, Value, When, IntegerField, FloatField, F
from django.db.models.functions import Substr, StrIndex, Cast, Replace, Length
class YelpCompanyAdmin(admin.ModelAdmin):
list_display = ('title','url','messaged','city','annual_revenue','not_available', 'show_annual_revenue1')
def get_queryset(self, request):
qs = YelpCompany.objects.filter(messaged=False,not_available=False).annotate(
field_case=Case(
When(annual_revenue__contains="less than", then=Value(1)),
When(annual_revenue__contains="million", then=Value(2)),
When(annual_revenue__iregex=r'^[a-zA-Z]+/$', then=Value(3)),
When(annual_revenue=None, then=Value(3)),
When(annual_revenue=r'^[/w]+/$', then=Value(3)),
default=Value(4),
output_field=IntegerField()
)
).annotate(
index_nr=Case(
When(field_case=1, then=StrIndex('annual_revenue', Value('less than')) + Length(Value('less than'))),
When(field_case=2, then=StrIndex('annual_revenue', Value('up to')) + Length(Value('up to'))),
When(field_case=3, then=Value(0)),
default=Value(-1),
output_field=IntegerField()
)
).annotate(
annual_revenue1=Case(
When(index_nr__gt=0, then=Cast(Replace(
Substr(F('annual_revenue'), F("index_nr")), Value('$'), Value('')),
output_field=FloatField())),
When(index_nr=0, then=Value(0)),
default=Cast(F('annual_revenue'), output_field=FloatField())
)
)
return qs
def show_annual_revenue1(self, inst):
return inst.annual_revenue1
show_annual_revenue1.admin_order_field = 'annual_revenue1'
The annotations create a new annotation annual_revenue1 containing the numeric value of annual_revenue only. This cn be used for ordering. The ModelAdmin above has a new column in the list display called show_annual_revenue1, which is used for default ordering.
A post dealing with using annotations for ordering is here.
A few words of explanation:
The first 'Case' annotation sorts the 'annual_revenue' entries into groups: 1. The field contains 'less than', 2. The field contains 'million', 3. The field contains letters, is None or empty, 4. If none of the above apply it is assumed the field contains a numeric value. You might need to adapt this to your special use-case if other cases apply.
In the second 'Case' annotation we find the index for exracting the substring, similar to your split() command. Fields which do not contain a valid numeric value as defined in the first Case annotation or which are numeric values are marked with index_nr '0' or '-1'.
In the third annotation block we extract the numeric substring or just return 0 (if the field does not hold a valid numeric value) or return the value of the field if it can be used as is. The returned values are cast to numeric values so we get the correct sorting.
Approach 2
The drawback of the approach above is that it is not very flexible and rather "longish". Another approach could be removing all the strings at the beginning:
#admin.py
from django.db.models import Case, Value, When, F, FloatField
from django.db.models.functions import Cast, Replace
#admin.py
from django.db.models import Case, Value, When, IntegerField, FloatField, F
from django.db.models.functions import Substr, StrIndex, Cast, Replace, Length
class YelpCompanyAdmin(admin.ModelAdmin):
list_display = ('title','url','messaged','city','annual_revenue','not_available', 'show_annual_revenue1')
def get_queryset(self, request):
replace_strings = ['million', 'milion', 'up to', '$', 'less than', 'False']
qs = YelpCompany.objects.filter(messaged=False,not_available=False)
qs = qs.annotate(
annual_revenue1 = F('annual_revenue')
)
for s in replace_strings:
qs = qs.annotate(
annual_revenue1 = Replace(F('annual_revenue1'), Value(s), Value(''))
)
qs = qs.annotate(
annual_revenue1 = Case(
When(annual_revenue1=None, then=Value(0.0)),
default=Cast(Trim(F('annual_revenue1')), FloatField())
)
)
return qs.order_by('annual_revenue1')
def show_annual_revenue1(self, inst):
return inst.annual_revenue1
show_annual_revenue1.admin_order_field = 'annual_revenue1'
This give the fleixbility to add whatever strings can appear to the list. You could event create your list of replace strings dynamically from you initial queryset if you like:
replace_strings2 = []
for q in qs.values_list('annual_revenue', flat=True):
if not q is None:
s = ''.join([x for x in q if not q.isdigit()])
replace_strings2.extend(s.split())
replace_strings = []
for s in replace_strings2:
try:
float(s)
except ValueError:
replace_strings.append(s)

Custom query filter in django admin

Here is my models code:
class Quote(models.Model):
"""Quote model."""
quote_text = models.TextField(unique=True)
author = models.ForeignKey(Author)
topic = models.ForeignKey(Topic)
tags = models.ManyToManyField(Tag)
language = models.ForeignKey(Language)
hit = models.IntegerField(default=0)
published = models.BooleanField(default=False)
created = models.DateTimeField(auto_now_add=True)
modified = models.DateTimeField(auto_now=True)
I want filter all Quote based on characters length, and this is my query in django admin.
class QuoteCountFilter(admin.SimpleListFilter):
"""Filter based on quote_text characters count."""
title = _('Quote Text Char Count')
parameter_name = 'quotelength'
def lookups(self, request, model_admin):
return (
('lessthan50', _('Less than 50')),
('morethan50', _('More than 50')),
)
def queryset(self, request, queryset):
if self.value() == 'lessthan50':
return queryset.extra(select={"val": "SELECT id FROM web_quote WHERE character_length(quote_text) < 50"})
However, it returns Programming error more than one row returned by a subquery used as an expression
Any ideas how to fix?
What I am trying is to find all Quotes where quote_text length is less than 50 characters
Say goodbye to extra and say hello to Length
from django.db.models.functions import Length
queryset.annotate(len=Length('quote_text').filter(len__lt=50)
much neater, safer and shorter

Categories