Can you optimize this code? (Django, python) - python

I'm adding 'added' field to check which categories User's Post(Outfit) is added to. It sounds horrible, so let's dive in to the code.
I want to optimize get_categories(self, obj) function.
class CategorySerializer(serializers.ModelSerializer):
added = serializers.BooleanField()
class Meta:
model = Category
fields = (
'id',
'name',
'added'
)
class OutfitDetailSerializer(serializers.ModelSerializer):
def get_categories(self, obj):
user = self.context['request'].user
categories = Category.objects.filter(owner=user)
added = categories.extra(select={'added': '1'}).filter(outfits__pk=obj.pk)
added = list(added.values('added', 'name', 'id'))
added_f = categories.extra(select={'added': '0'}).exclude(outfits__pk=obj.pk)
added_f = list(added_f.values('added', 'name', 'id'))
categories = added + added_f
return CategorySerializer(categories, many=True).data
The output is below!
"categories": [{
"id": 1,
"name": "Gym",
"added": true
}, {
"id": 2,
"name": "School",
"added": false
}, {
"id": 3,
"name": "hollymo",
"added": true
}, {
"id": 4,
"name": "Normal",
"added": false
}, {
"id": 6,
"name": "New Category",
"added": false
}
]
Here is models.py
class Outfit(models.Model):
...
user = models.ForeignKey(settings.AUTH_USER_MODEL, null=True, blank=True)
content = models.CharField(max_length=30)
...
class Category(models.Model):
name = models.CharField(max_length=20)
owner = models.ForeignKey(settings.AUTH_USER_MODEL, null=True, blank=True)
outfits = models.ManyToManyField(Outfit, related_name="categories", blank=True)
main_img = models.ImageField(
upload_to=upload_location_category,
null=True,
blank=True)
...
here the repo for test

If i get you right, you can get necessary data with django raw sql:
q = """\
SELECT yourappname_category.id,
yourappname_category.name,
COUNT(outfit_id) > 0 as added
FROM yourappname_category
LEFT JOIN yourappname_category_outfits
ON yourappname_category.id = yourappname_category_outfits.category_id
AND yourappname_category_outfits.outfit_id=%s
WHERE yourappname_category.owner_id=%s
GROUP BY yourappname_category.id, yourappname_category.name"""
categories = Category.objects.raw(q, [obj.id, user.id])
results = [{'id': c.id, 'name': c.name, 'added': c.added} for c in categories]

If I understand your use case correctly you just want "to check which categories User's Post(Outfit) is added to". For that you would only need to return the ones with added = true right? and then you could leave the added key out.
as in:
"categories": [{
"id": 1,
"name": "Gym"
}, {
"id": 3,
"name": "hollymo"
}
]
If so, you could just use:
import Category from category.models
class CategoriesSerializer(serializers.ModelSerializer):
class Meta:
model = Category
fields = ('id', 'name')
class OutfitDetailSerializer(serializers.ModelSerializer):
categories = CategoriesSerializer(many=True)
If instead your use case is to show a list of all categories and then do something with just the ones that the current outfit is added to, I'd suggest doing 2 API calls instead of your current logic; One with the answer I supplied above and one to get all categories. Then do that 'added' logic in your front-end as its presentation layer logic imo.
I'd certainly try to avoid doing raw SQL queries in Django, it cuts the purpose of migrations and is rarely necessary.

Related

Is there a way to add custom data into ListAPIView in django rest framework

So I've built an API for movies dataset which contain following structure:
Models.py
class Directors(models.Model):
id = models.IntegerField(primary_key=True)
first_name = models.CharField(max_length=100, blank=True, null=True)
last_name = models.CharField(max_length=100, blank=True, null=True)
class Meta:
db_table = 'directors'
ordering = ['-id']
class Movies(models.Model):
id = models.IntegerField(primary_key=True)
name = models.CharField(max_length=100, blank=True, null=True)
year = models.IntegerField(blank=True, null=True)
rank = models.FloatField(blank=True, null=True)
class Meta:
db_table = 'movies'
ordering = ['-id']
class Actors(models.Model):
id = models.IntegerField(primary_key=True)
first_name = models.CharField(max_length=100, blank=True, null=True)
last_name = models.CharField(max_length=100, blank=True, null=True)
gender = models.CharField(max_length=20, blank=True, null=True)
class Meta:
db_table = 'actors'
ordering = ['-id']
class DirectorsGenres(models.Model):
director = models.ForeignKey(Directors,on_delete=models.CASCADE,related_name='directors_genres')
genre = models.CharField(max_length=100, blank=True, null=True)
prob = models.FloatField(blank=True, null=True)
class Meta:
db_table = 'directors_genres'
ordering = ['-director']
class MoviesDirectors(models.Model):
director = models.ForeignKey(Directors,on_delete=models.CASCADE,related_name='movies_directors')
movie = models.ForeignKey(Movies,on_delete=models.CASCADE,related_name='movies_directors')
class Meta:
db_table = 'movies_directors'
ordering = ['-director']
class MoviesGenres(models.Model):
movie = models.ForeignKey(Movies,on_delete=models.CASCADE,related_name='movies_genres')
genre = models.CharField(max_length=100, blank=True, null=True)
class Meta:
db_table = 'movies_genres'
ordering = ['-movie']
class Roles(models.Model):
actor = models.ForeignKey(Actors,on_delete=models.CASCADE,related_name='roles')
movie = models.ForeignKey(Movies,on_delete=models.CASCADE,related_name='roles')
role = models.CharField(max_length=100, blank=True, null=True)
class Meta:
db_table = 'roles'
ordering = ['-actor']
urls.py
from django.urls import path, include
from . import views
from api.views import getMovies, getGenres, getActors
urlpatterns = [
path('', views.getRoutes),
path('movies/', getMovies.as_view(), name='movies'),
path('movies/genres/', getGenres.as_view(), name='genres'),
path('actor_stats/<pk>', getActors.as_view(), name='actor_stats'),
]
serializer.py
from rest_framework import serializers
from movies.models import *
class MoviesSerializer(serializers.ModelSerializer):
class Meta:
model = Movies
fields = '__all__'
class DirectorsSerializer(serializers.ModelSerializer):
class Meta:
model = Directors
fields = '__all__'
class ActorsSerializer(serializers.ModelSerializer):
class Meta:
model = Actors
fields = '__all__'
class DirectorsGenresSerializer(serializers.ModelSerializer):
class Meta:
model = DirectorsGenres
fields = '__all__'
class MoviesDirectorsSerializer(serializers.ModelSerializer):
movie = MoviesSerializer(many = False)
director = DirectorsSerializer(many = False)
class Meta:
model = MoviesDirectors
fields = '__all__'
class MoviesGenresSerializer(serializers.ModelSerializer):
movie = MoviesSerializer(many = False)
class Meta:
model = MoviesGenres
fields = '__all__'
class RolesSerializer(serializers.ModelSerializer):
movie = MoviesSerializer(many = False)
actor = ActorsSerializer(many = False)
class Meta:
model = Roles
fields = '__all__'
views.py
class getMovies(ListAPIView):
directors = Directors.objects.all()
queryset = MoviesDirectors.objects.filter(director__in=directors)
serializer_class = MoviesDirectorsSerializer
pagination_class = CustomPagination
filter_backends = [DjangoFilterBackend]
filterset_fields = ['director__first_name', 'director__last_name']
class getGenres(ListAPIView):
movies = Movies.objects.all()
queryset = MoviesGenres.objects.filter(movie__in=movies).order_by('-genre')
serializer_class = MoviesGenresSerializer
pagination_class = CustomPagination
filter_backends = [DjangoFilterBackend]
filterset_fields = ['genre']
class getActors(ListAPIView):
queryset = Roles.objects.all()
serializer_class = RolesSerializer
pagination_class = CustomPagination
def get_queryset(self):
return super().get_queryset().filter(
actor_id=self.kwargs['pk']
)
Now I want to count number of movies by genre that actor with specific pk played in getActors class.
Like the number of movies by genre that actor participated in. E.g. Drama: 2, Horror: 3
Right now I am getting the overall count of movies count: 2:
GET /api/actor_stats/17
HTTP 200 OK
Allow: GET, HEAD, OPTIONS
Content-Type: application/json
Vary: Accept
{
"count": 2,
"next": null,
"previous": null,
"results": [
{
"id": 800480,
"movie": {
"id": 105231,
"name": "Everybody's Business",
"year": 1993,
"rank": null
},
"actor": {
"id": 17,
"first_name": "Luis Roberto",
"last_name": "Formiga",
"gender": "M"
},
"role": "Grandfather"
},
{
"id": 800481,
"movie": {
"id": 242453,
"name": "OP Pro 88 - Barra Rio",
"year": 1988,
"rank": null
},
"actor": {
"id": 17,
"first_name": "Luis Roberto",
"last_name": "Formiga",
"gender": "M"
},
"role": "Himself"
}
]
}
What is the optimized way of achieving the following:
number_of_movies_by_genre
Drama: 2
Horror: 3
UPDATE
class RolesSerializer(serializers.Serializer):
id = serializers.SerializerMethodField()
name = serializers.SerializerMethodField()
top_genre = serializers.SerializerMethodField()
number_of_movies = serializers.SerializerMethodField()
number_of_movies_by_genre = serializers.SerializerMethodField()
most_frequent_partner = serializers.SerializerMethodField()
class Meta:
model = Roles
fields = '__all__'
def get_id(self, obj):
return obj.actor.id
def get_name(self, obj):
return f'{obj.actor.first_name} {obj.actor.last_name}'
def get_top_genre(self, obj):
number_by_genre = Roles.objects.filter(actor = obj.actor.id
).values('movie__movies_genres__genre').annotate(
genre = F('movie__movies_genres__genre'),
number_of_movies=Count('movie__movies_genres__genre'),
)
data = [s['number_of_movies'] for s in number_by_genre]
highest = max(data)
result = [s for s in data if s == highest]
return result
def get_number_of_movies(self, obj):
number_of_movies = Roles.objects.filter(actor = obj.actor.id
).values('movie__name').count()
return number_of_movies
def get_number_of_movies_by_genre(self, obj):
number_of_movies_by_genre = Roles.objects.filter(actor = obj.actor.id
).values('movie__movies_genres__genre').annotate(
genre=F('movie__movies_genres__genre'),
number_of_movies=Count('movie__movies_genres__genre'),
).values('genre', 'number_of_movies')
return number_of_movies_by_genre
def get_most_frequent_partner(self, obj):
partners = Roles.objects.filter(actor = obj.actor.id
).values('movie__id')
result = Roles.objects.filter(movie__in = partners
).values('actor').exclude(actor=obj.actor.id).annotate(
partner_actor_id = F('actor'),
partner_actor_name = Concat(F('actor__first_name'), Value(' '), F('actor__last_name')),
number_of_shared_movies =Count('actor'),
).values('partner_actor_id', 'partner_actor_name', 'number_of_shared_movies')
return result
The problem with that code is: It repeats the results by the number of movies. For instance if the actor have 5 movies the results will be repeated 5 times. Another issue is: in order to get top_genre and most_frequent_partner I'm using max() but then I just get the numbers and not the actual name of genre in (top_genre) and actor name in (most_frequent_partner). Since I use max() in a way to get more than one value. For instance in the top_genre: If the actor have 3 Drama, 3 Comedy, 1 Horror, 1 Documentary, I get the max in that way: [3,3], but how can I get the actual names out of these results? Same goes to most_frequent_partner.
Results looks like this so far:
{
"next": null,
"previous": null,
"count": 4,
"pagenum": null,
"results": [
{
"id": 36,
"name": "Benjamin 2X",
"top_genre": [
2,
2
],
"number_of_movies": 4,
"number_of_movies_by_genre": [
{
"movie__movies_genres__genre": null,
"genre": null,
"number_of_movies": 0
},
{
"movie__movies_genres__genre": "Documentary",
"genre": "Documentary",
"number_of_movies": 2
},
{
"movie__movies_genres__genre": "Music",
"genre": "Music",
"number_of_movies": 2
}
],
"most_frequent_partner": []
},
{
"id": 36,
"name": "Benjamin 2X",
"top_genre": [
2,
2
],
"number_of_movies": 4,
"number_of_movies_by_genre": [
{
"movie__movies_genres__genre": null,
"genre": null,
"number_of_movies": 0
},
{
"movie__movies_genres__genre": "Documentary",
"genre": "Documentary",
"number_of_movies": 2
},
{
"movie__movies_genres__genre": "Music",
"genre": "Music",
"number_of_movies": 2
}
],
"most_frequent_partner": []
},
{
"id": 36,
"name": "Benjamin 2X",
"top_genre": [
2,
2
],
"number_of_movies": 4,
"number_of_movies_by_genre": [
{
"movie__movies_genres__genre": null,
"genre": null,
"number_of_movies": 0
},
{
"movie__movies_genres__genre": "Documentary",
"genre": "Documentary",
"number_of_movies": 2
},
{
"movie__movies_genres__genre": "Music",
"genre": "Music",
"number_of_movies": 2
}
],
"most_frequent_partner": []
},
{
"id": 36,
"name": "Benjamin 2X",
"top_genre": [
2,
2
],
"number_of_movies": 4,
"number_of_movies_by_genre": [
{
"movie__movies_genres__genre": null,
"genre": null,
"number_of_movies": 0
},
{
"movie__movies_genres__genre": "Documentary",
"genre": "Documentary",
"number_of_movies": 2
},
{
"movie__movies_genres__genre": "Music",
"genre": "Music",
"number_of_movies": 2
}
],
"most_frequent_partner": []
}
]
}
What I want to see in the end:
{
"next": null,
"previous": null,
"count": 2,
"results": [
{
"id": 18 (actor_id),
"name": Bruce Buffer (actor_name),
"number of movies": 2,
"top genre": Drama, Documentary,
"number of movies by genre": Drama: 1, Documentary: 1,
"most frequent partner": partner_actor_id, partner_actor_name, number_of_shared_movies,
}
]
}
If you want, the number of movies by genre for a given actor what you can do is annotate and count aggregate
return Roles.objects.filter(
actor_id=self.kwargs['pk']
).values('movie__movies_genres__genre').annotate(
no_of_movies=Count('movie__movies_genres__genre'),
genre=F('movie__movies_genres__genre'),
)
Here first we filtered roles for a given actor
then values will group by genre then annotation is computed over all members of the group that count and get genre
and you can use SerializerMethodField to these calculated results
if you have a huge dataset it will not perform well, but you can create indexes accordingly still it will cost you 2-3 queries
you can learn more about Django queryset API
There many ways to implement this route, it depends on many criteria and how much it will be used .
i think a correct way is to create a dedicated model that would store actor stats with a one to one relation to actor and recompute the value each time a movie is added. But If you add movie often it could slow down your database.
You can also accept to have some outdated data for a while and update the table regularly using a background job and maybe using custom sql query that will ensure you better performance (bulk update).
I would start from your model, you have genres defined as a CharField in two of your models. By not isolating them anywhere, you need to look in both tables for all types of genres. If do not, then you are just supposing that all the genres you have in one table is also on the other one, which could not be true.
Also, querying string fields is not very efficient when in comparison to a int PK, so from the point of view of scaling this is bad. (Of course, i am saying that in general, as a good practice and not focused specifically in movie genres)
Your best option would be to have either a Genre Model or a choice field, where you define all possible genres.
As for the counting, you would do that inside your serializer class, by using a serializermethodfield.

Django Rest Framework- retrieving a related field on reverse foreign key efficiently

I have the following models that represent a working group of users. Each working group has a leader and members:
class WorkingGroup(models.Model):
group_name = models.CharField(max_length=255)
leader = models.ForeignKey(User, null=True, on_delete=models.SET_NULL)
class WorkingGroupMember(models.Model):
group = models.ForeignKey(WorkingGroup, on_delete=models.CASCADE)
user = models.ForeignKey(User, on_delete=models.CASCADE)
In DRF, I want to efficiently retrieve all groups (there are several hundred) as an array of the following json objects:
{
'id': <the_group_id>
'group_name': <the_group_name>
'leader': <id_of_leader>
'members': [<id_of_member_1>, <id_of_member_2>, ...]
}
To do so, I have set up the following serializer:
class WorkingGroupSerializer(serializers.ModelSerializer):
members = serializers.SerializerMethodField()
class Meta:
model = WorkingGroup
fields = ('id', 'group_name', 'leader', 'members',)
def get_members(self, obj):
return obj.workinggroupmember_set.all().values_list('user_id', flat=True)
So that in my view, I can do something like:
groups = WorkingGroup.objects.all().prefetch_related('workinggroupmember_set')
group_serializer = WorkingGroupSerializer(groups, many=True)
This works, and gives the desired result, however I am finding it does not scale well at all, as the prefetching workinggroupmember_set does not seem to be used inside of the get_members method (Silky is showing a single query to grab all WorkingGroup objects, and then a query for each workinggroupmember_set call in the get_members method). Is there a way to set up the members field in the serializer to grab a flattened/single field version of workinggroupmember_set without using a SerializerMethodField? Or some other way of doing this that lets me properly use prefetch?
Problem here that you are doing values_list on top of all which nullifies your prefetch_related. There is currently no way to do prefetch with values_list see https://code.djangoproject.com/ticket/26565. What you can do is to transition this into python code instead of SQL
class WorkingGroupSerializer(serializers.ModelSerializer):
members = serializers.SerializerMethodField()
class Meta:
model = WorkingGroup
fields = ('id', 'group_name', 'leader', 'members',)
def get_members(self, obj):
return [wgm.user_id for wgm in obj.workinggroupmember_set.all()]
In a recent project with DRF v3.9.1 and django 2.1, I needed to recursively expose all the children of an object, by having only a direct connection to the parent, which could have had multiple children.
Before, if I was to request the "tree" of an object, I was getting:
{
"uuid": "b85385c0e0a84785b6ca87ce50132659",
"name": "a",
"parent": null
}
By applying the serialization shown below I get:
{
"uuid": "b85385c0e0a84785b6ca87ce50132659",
"name": "a",
"parent": null
"children": [
{
"uuid": "efd26a820b4e4f7c8e56c812a7791fcb",
"name": "aa",
"parent": "b85385c0e0a84785b6ca87ce50132659"
"children": [
{
"uuid": "ca2441fc7abf49b6aa1f3ebbc2dae251",
"name": "aaa",
"parent": "efd26a820b4e4f7c8e56c812a7791fcb"
"children": [],
}
],
},
{
"uuid": "40e09c85775d4f1a8578bba9c812df0e",
"name": "ab",
"parent": "b85385c0e0a84785b6ca87ce50132659"
"children": [],
}
],
}
Here is the models.py of the recursive object:
class CategoryDefinition(BaseModelClass):
name = models.CharField(max_length=100)
parent = models.ForeignKey('self', related_name='children',
on_delete=models.CASCADE,
null=True, blank=True)
To get all the reverse objects in the foreign key, apply a field to the serializer class:
class DeepCategorySerializer(serializers.ModelSerializer):
children = serializers.SerializerMethodField()
class Meta:
model = models.CategoryDefinition
fields = '__all__'
def get_children(self, obj):
return [DeepCategorySerializer().to_representation(cat) for cat in obj.children.all()]
Then apply this serializer to a DRF view function or generics class, such as:
re_path(r'categories/(?P<pk>[\w\d]{32})/',
generics.RetrieveUpdateDestroyAPIView.as_view(
queryset=models.CategoryDefinition.objects.all(),
serializer_class=serializers.DeepCategorySerializer),
name='category-update'),

Ways to save django model using nested dictionary

I'll try to make this as simple as I can:
I have 2 models
from django.db import models
class OrderDetail(models.Model):
product = models.CharField(max_length=100)
price = models.CharField(max_length=50)
class Order(models.Model):
url = models.CharField(max_length=255, unique=True)
loaded_info = models.BooleanField(default=False)
status = models.CharField(max_length=100, null=True)
details = models.OneToOneField(OrderDetail)
And I whish to save using a dict like this:
data = {
"order": {
"url": "http://stackoverflow.com/",
"loaded_info": True,
"status": "complete",
"details": {
"product": "Fresh answer",
"price": "50 points"
}
}
}
I'd like to do something close to:
order = Order(**data).save()
And get Order and OrderDetail saved using a single line.
Have a look at https://docs.djangoproject.com/en/1.9/topics/serialization/
In this case, you would do something like:
qs = Order.objects.select_related('details').get(pk=1)
data = serializers.serialize("json", qs)

Django Rest Framework nested model as list instead of dict

Is there a way to have a nested model as a List instead of Dict?
I'm trying to implement it with ListField but having a hard time.
Following a sample to better explain what I'm trying to do.
Sample Models:
class Album(models.Model):
name = models.CharField(max_length=250)
class Track(models.Model):
title = models.CharField(max_length=250)
number = models.IntegerField()
album = models.ForeignKey(Album, related_name="tracks")
Sample Serializer:
Class TrackSerializer(serializers.ModelSerializer):
class Meta:
model = Track
fields = ['number', 'title']
class AlbumSerializer(serializers.ModelSerializer):
tracks = TrackSerializer(many=True)
model = Album
fields = ['name', 'tracks']
Wrong output resulted from the code above:
{
"name": "ALBUM NAME",
"tracks": [
{
"number": 1,
"title": "TRACK TITLE"
},
{
"number": 2,
"title": "OTHER TRACK TITLE"
}
]
}
Desired output:
{
"name": "ALBUM NAME",
"tracks": [
[1, "TRACK TITLE"],
[2, "OTHER TRACK TITLE"]
]
}
Solution: an album serializer with one field that is the result of a method and can be anything (SerializerMethodField)
serializers.py
class AlbumSerializer(serializers.ModelSerializer):
track_list = serializers.SerializerMethodField()
class Meta:
model = Album
fields = ['name', 'track_list']
class get_track_list(self, obj):
output = []
for i in Track.objects.filter(album = obj.id):
output.append([i.number, i.title])
return output
This returns me an JSON with attributes "name" and "track_list", just the way I needed:
{
"name": "ALBUM NAME",
"track_list": [
[1, "TRACK TITLE"],
[2, "OTHER TRACK TITLE"]
]
}

Serializing a recursive ManyToMany model in Django

I'm writing a REST API for my Django app, and am having problems on serializing a recursive many-to-many relationship. I found some help on the Internet, but it all seems to be applicable only to recursive many-to-many relationships with no through model specified.
My models are as follows:
class Place(models.Model):
name = models.CharField(max_length=60)
other_places = models.ManyToManyField('self', through='PlaceToPlace', symmetrical=False)
def __str__(self):
return self.name
class PlaceToPlace(models.Model):
travel_time = models.BigIntegerField()
origin_place = models.ForeignKey(Place, related_name="destination_places")
destination_place = models.ForeignKey(Place, related_name="origin_places")
And I tried writing this serializer:
class PlaceToPlaceSerializer(serializers.HyperlinkedModelSerializer):
id = serializers.Field(source='destination_places.id')
name = serializers.Field(source='destination_places.name')
class Meta:
model = PlaceToPlace
fields = ('id', 'name', 'travel_time')
class PlaceFullSerializer(serializers.ModelSerializer):
class Meta:
model = Place
fields = ('id', 'name')
And so I have to write something to serialize the related Place instances, so I'd get something like this:
[
{
"id": 1,
"name": "Place 1",
"places":
[
{
"id": 2,
"name": "Place 2",
"travel_time": 300
}
]
},
{
"id": 2,
"name": "Place 2",
"places":
[
{
"id": 1,
"name": "Place 1",
"travel_time": 300
}
]
}
]
But I can't figure how to write the serializer, so some help would be very appreciated.

Categories