So I've built an API for movies dataset which contain following structure:
Models.py
class Directors(models.Model):
id = models.IntegerField(primary_key=True)
first_name = models.CharField(max_length=100, blank=True, null=True)
last_name = models.CharField(max_length=100, blank=True, null=True)
class Meta:
db_table = 'directors'
ordering = ['-id']
class Movies(models.Model):
id = models.IntegerField(primary_key=True)
name = models.CharField(max_length=100, blank=True, null=True)
year = models.IntegerField(blank=True, null=True)
rank = models.FloatField(blank=True, null=True)
class Meta:
db_table = 'movies'
ordering = ['-id']
class Actors(models.Model):
id = models.IntegerField(primary_key=True)
first_name = models.CharField(max_length=100, blank=True, null=True)
last_name = models.CharField(max_length=100, blank=True, null=True)
gender = models.CharField(max_length=20, blank=True, null=True)
class Meta:
db_table = 'actors'
ordering = ['-id']
class DirectorsGenres(models.Model):
director = models.ForeignKey(Directors,on_delete=models.CASCADE,related_name='directors_genres')
genre = models.CharField(max_length=100, blank=True, null=True)
prob = models.FloatField(blank=True, null=True)
class Meta:
db_table = 'directors_genres'
ordering = ['-director']
class MoviesDirectors(models.Model):
director = models.ForeignKey(Directors,on_delete=models.CASCADE,related_name='movies_directors')
movie = models.ForeignKey(Movies,on_delete=models.CASCADE,related_name='movies_directors')
class Meta:
db_table = 'movies_directors'
ordering = ['-director']
class MoviesGenres(models.Model):
movie = models.ForeignKey(Movies,on_delete=models.CASCADE,related_name='movies_genres')
genre = models.CharField(max_length=100, blank=True, null=True)
class Meta:
db_table = 'movies_genres'
ordering = ['-movie']
class Roles(models.Model):
actor = models.ForeignKey(Actors,on_delete=models.CASCADE,related_name='roles')
movie = models.ForeignKey(Movies,on_delete=models.CASCADE,related_name='roles')
role = models.CharField(max_length=100, blank=True, null=True)
class Meta:
db_table = 'roles'
ordering = ['-actor']
urls.py
from django.urls import path, include
from . import views
from api.views import getMovies, getGenres, getActors
urlpatterns = [
path('', views.getRoutes),
path('movies/', getMovies.as_view(), name='movies'),
path('movies/genres/', getGenres.as_view(), name='genres'),
path('actor_stats/<pk>', getActors.as_view(), name='actor_stats'),
]
serializer.py
from rest_framework import serializers
from movies.models import *
class MoviesSerializer(serializers.ModelSerializer):
class Meta:
model = Movies
fields = '__all__'
class DirectorsSerializer(serializers.ModelSerializer):
class Meta:
model = Directors
fields = '__all__'
class ActorsSerializer(serializers.ModelSerializer):
class Meta:
model = Actors
fields = '__all__'
class DirectorsGenresSerializer(serializers.ModelSerializer):
class Meta:
model = DirectorsGenres
fields = '__all__'
class MoviesDirectorsSerializer(serializers.ModelSerializer):
movie = MoviesSerializer(many = False)
director = DirectorsSerializer(many = False)
class Meta:
model = MoviesDirectors
fields = '__all__'
class MoviesGenresSerializer(serializers.ModelSerializer):
movie = MoviesSerializer(many = False)
class Meta:
model = MoviesGenres
fields = '__all__'
class RolesSerializer(serializers.ModelSerializer):
movie = MoviesSerializer(many = False)
actor = ActorsSerializer(many = False)
class Meta:
model = Roles
fields = '__all__'
views.py
class getMovies(ListAPIView):
directors = Directors.objects.all()
queryset = MoviesDirectors.objects.filter(director__in=directors)
serializer_class = MoviesDirectorsSerializer
pagination_class = CustomPagination
filter_backends = [DjangoFilterBackend]
filterset_fields = ['director__first_name', 'director__last_name']
class getGenres(ListAPIView):
movies = Movies.objects.all()
queryset = MoviesGenres.objects.filter(movie__in=movies).order_by('-genre')
serializer_class = MoviesGenresSerializer
pagination_class = CustomPagination
filter_backends = [DjangoFilterBackend]
filterset_fields = ['genre']
class getActors(ListAPIView):
queryset = Roles.objects.all()
serializer_class = RolesSerializer
pagination_class = CustomPagination
def get_queryset(self):
return super().get_queryset().filter(
actor_id=self.kwargs['pk']
)
Now I want to count number of movies by genre that actor with specific pk played in getActors class.
Like the number of movies by genre that actor participated in. E.g. Drama: 2, Horror: 3
Right now I am getting the overall count of movies count: 2:
GET /api/actor_stats/17
HTTP 200 OK
Allow: GET, HEAD, OPTIONS
Content-Type: application/json
Vary: Accept
{
"count": 2,
"next": null,
"previous": null,
"results": [
{
"id": 800480,
"movie": {
"id": 105231,
"name": "Everybody's Business",
"year": 1993,
"rank": null
},
"actor": {
"id": 17,
"first_name": "Luis Roberto",
"last_name": "Formiga",
"gender": "M"
},
"role": "Grandfather"
},
{
"id": 800481,
"movie": {
"id": 242453,
"name": "OP Pro 88 - Barra Rio",
"year": 1988,
"rank": null
},
"actor": {
"id": 17,
"first_name": "Luis Roberto",
"last_name": "Formiga",
"gender": "M"
},
"role": "Himself"
}
]
}
What is the optimized way of achieving the following:
number_of_movies_by_genre
Drama: 2
Horror: 3
UPDATE
class RolesSerializer(serializers.Serializer):
id = serializers.SerializerMethodField()
name = serializers.SerializerMethodField()
top_genre = serializers.SerializerMethodField()
number_of_movies = serializers.SerializerMethodField()
number_of_movies_by_genre = serializers.SerializerMethodField()
most_frequent_partner = serializers.SerializerMethodField()
class Meta:
model = Roles
fields = '__all__'
def get_id(self, obj):
return obj.actor.id
def get_name(self, obj):
return f'{obj.actor.first_name} {obj.actor.last_name}'
def get_top_genre(self, obj):
number_by_genre = Roles.objects.filter(actor = obj.actor.id
).values('movie__movies_genres__genre').annotate(
genre = F('movie__movies_genres__genre'),
number_of_movies=Count('movie__movies_genres__genre'),
)
data = [s['number_of_movies'] for s in number_by_genre]
highest = max(data)
result = [s for s in data if s == highest]
return result
def get_number_of_movies(self, obj):
number_of_movies = Roles.objects.filter(actor = obj.actor.id
).values('movie__name').count()
return number_of_movies
def get_number_of_movies_by_genre(self, obj):
number_of_movies_by_genre = Roles.objects.filter(actor = obj.actor.id
).values('movie__movies_genres__genre').annotate(
genre=F('movie__movies_genres__genre'),
number_of_movies=Count('movie__movies_genres__genre'),
).values('genre', 'number_of_movies')
return number_of_movies_by_genre
def get_most_frequent_partner(self, obj):
partners = Roles.objects.filter(actor = obj.actor.id
).values('movie__id')
result = Roles.objects.filter(movie__in = partners
).values('actor').exclude(actor=obj.actor.id).annotate(
partner_actor_id = F('actor'),
partner_actor_name = Concat(F('actor__first_name'), Value(' '), F('actor__last_name')),
number_of_shared_movies =Count('actor'),
).values('partner_actor_id', 'partner_actor_name', 'number_of_shared_movies')
return result
The problem with that code is: It repeats the results by the number of movies. For instance if the actor have 5 movies the results will be repeated 5 times. Another issue is: in order to get top_genre and most_frequent_partner I'm using max() but then I just get the numbers and not the actual name of genre in (top_genre) and actor name in (most_frequent_partner). Since I use max() in a way to get more than one value. For instance in the top_genre: If the actor have 3 Drama, 3 Comedy, 1 Horror, 1 Documentary, I get the max in that way: [3,3], but how can I get the actual names out of these results? Same goes to most_frequent_partner.
Results looks like this so far:
{
"next": null,
"previous": null,
"count": 4,
"pagenum": null,
"results": [
{
"id": 36,
"name": "Benjamin 2X",
"top_genre": [
2,
2
],
"number_of_movies": 4,
"number_of_movies_by_genre": [
{
"movie__movies_genres__genre": null,
"genre": null,
"number_of_movies": 0
},
{
"movie__movies_genres__genre": "Documentary",
"genre": "Documentary",
"number_of_movies": 2
},
{
"movie__movies_genres__genre": "Music",
"genre": "Music",
"number_of_movies": 2
}
],
"most_frequent_partner": []
},
{
"id": 36,
"name": "Benjamin 2X",
"top_genre": [
2,
2
],
"number_of_movies": 4,
"number_of_movies_by_genre": [
{
"movie__movies_genres__genre": null,
"genre": null,
"number_of_movies": 0
},
{
"movie__movies_genres__genre": "Documentary",
"genre": "Documentary",
"number_of_movies": 2
},
{
"movie__movies_genres__genre": "Music",
"genre": "Music",
"number_of_movies": 2
}
],
"most_frequent_partner": []
},
{
"id": 36,
"name": "Benjamin 2X",
"top_genre": [
2,
2
],
"number_of_movies": 4,
"number_of_movies_by_genre": [
{
"movie__movies_genres__genre": null,
"genre": null,
"number_of_movies": 0
},
{
"movie__movies_genres__genre": "Documentary",
"genre": "Documentary",
"number_of_movies": 2
},
{
"movie__movies_genres__genre": "Music",
"genre": "Music",
"number_of_movies": 2
}
],
"most_frequent_partner": []
},
{
"id": 36,
"name": "Benjamin 2X",
"top_genre": [
2,
2
],
"number_of_movies": 4,
"number_of_movies_by_genre": [
{
"movie__movies_genres__genre": null,
"genre": null,
"number_of_movies": 0
},
{
"movie__movies_genres__genre": "Documentary",
"genre": "Documentary",
"number_of_movies": 2
},
{
"movie__movies_genres__genre": "Music",
"genre": "Music",
"number_of_movies": 2
}
],
"most_frequent_partner": []
}
]
}
What I want to see in the end:
{
"next": null,
"previous": null,
"count": 2,
"results": [
{
"id": 18 (actor_id),
"name": Bruce Buffer (actor_name),
"number of movies": 2,
"top genre": Drama, Documentary,
"number of movies by genre": Drama: 1, Documentary: 1,
"most frequent partner": partner_actor_id, partner_actor_name, number_of_shared_movies,
}
]
}
If you want, the number of movies by genre for a given actor what you can do is annotate and count aggregate
return Roles.objects.filter(
actor_id=self.kwargs['pk']
).values('movie__movies_genres__genre').annotate(
no_of_movies=Count('movie__movies_genres__genre'),
genre=F('movie__movies_genres__genre'),
)
Here first we filtered roles for a given actor
then values will group by genre then annotation is computed over all members of the group that count and get genre
and you can use SerializerMethodField to these calculated results
if you have a huge dataset it will not perform well, but you can create indexes accordingly still it will cost you 2-3 queries
you can learn more about Django queryset API
There many ways to implement this route, it depends on many criteria and how much it will be used .
i think a correct way is to create a dedicated model that would store actor stats with a one to one relation to actor and recompute the value each time a movie is added. But If you add movie often it could slow down your database.
You can also accept to have some outdated data for a while and update the table regularly using a background job and maybe using custom sql query that will ensure you better performance (bulk update).
I would start from your model, you have genres defined as a CharField in two of your models. By not isolating them anywhere, you need to look in both tables for all types of genres. If do not, then you are just supposing that all the genres you have in one table is also on the other one, which could not be true.
Also, querying string fields is not very efficient when in comparison to a int PK, so from the point of view of scaling this is bad. (Of course, i am saying that in general, as a good practice and not focused specifically in movie genres)
Your best option would be to have either a Genre Model or a choice field, where you define all possible genres.
As for the counting, you would do that inside your serializer class, by using a serializermethodfield.
I have the following models that represent a working group of users. Each working group has a leader and members:
class WorkingGroup(models.Model):
group_name = models.CharField(max_length=255)
leader = models.ForeignKey(User, null=True, on_delete=models.SET_NULL)
class WorkingGroupMember(models.Model):
group = models.ForeignKey(WorkingGroup, on_delete=models.CASCADE)
user = models.ForeignKey(User, on_delete=models.CASCADE)
In DRF, I want to efficiently retrieve all groups (there are several hundred) as an array of the following json objects:
{
'id': <the_group_id>
'group_name': <the_group_name>
'leader': <id_of_leader>
'members': [<id_of_member_1>, <id_of_member_2>, ...]
}
To do so, I have set up the following serializer:
class WorkingGroupSerializer(serializers.ModelSerializer):
members = serializers.SerializerMethodField()
class Meta:
model = WorkingGroup
fields = ('id', 'group_name', 'leader', 'members',)
def get_members(self, obj):
return obj.workinggroupmember_set.all().values_list('user_id', flat=True)
So that in my view, I can do something like:
groups = WorkingGroup.objects.all().prefetch_related('workinggroupmember_set')
group_serializer = WorkingGroupSerializer(groups, many=True)
This works, and gives the desired result, however I am finding it does not scale well at all, as the prefetching workinggroupmember_set does not seem to be used inside of the get_members method (Silky is showing a single query to grab all WorkingGroup objects, and then a query for each workinggroupmember_set call in the get_members method). Is there a way to set up the members field in the serializer to grab a flattened/single field version of workinggroupmember_set without using a SerializerMethodField? Or some other way of doing this that lets me properly use prefetch?
Problem here that you are doing values_list on top of all which nullifies your prefetch_related. There is currently no way to do prefetch with values_list see https://code.djangoproject.com/ticket/26565. What you can do is to transition this into python code instead of SQL
class WorkingGroupSerializer(serializers.ModelSerializer):
members = serializers.SerializerMethodField()
class Meta:
model = WorkingGroup
fields = ('id', 'group_name', 'leader', 'members',)
def get_members(self, obj):
return [wgm.user_id for wgm in obj.workinggroupmember_set.all()]
In a recent project with DRF v3.9.1 and django 2.1, I needed to recursively expose all the children of an object, by having only a direct connection to the parent, which could have had multiple children.
Before, if I was to request the "tree" of an object, I was getting:
{
"uuid": "b85385c0e0a84785b6ca87ce50132659",
"name": "a",
"parent": null
}
By applying the serialization shown below I get:
{
"uuid": "b85385c0e0a84785b6ca87ce50132659",
"name": "a",
"parent": null
"children": [
{
"uuid": "efd26a820b4e4f7c8e56c812a7791fcb",
"name": "aa",
"parent": "b85385c0e0a84785b6ca87ce50132659"
"children": [
{
"uuid": "ca2441fc7abf49b6aa1f3ebbc2dae251",
"name": "aaa",
"parent": "efd26a820b4e4f7c8e56c812a7791fcb"
"children": [],
}
],
},
{
"uuid": "40e09c85775d4f1a8578bba9c812df0e",
"name": "ab",
"parent": "b85385c0e0a84785b6ca87ce50132659"
"children": [],
}
],
}
Here is the models.py of the recursive object:
class CategoryDefinition(BaseModelClass):
name = models.CharField(max_length=100)
parent = models.ForeignKey('self', related_name='children',
on_delete=models.CASCADE,
null=True, blank=True)
To get all the reverse objects in the foreign key, apply a field to the serializer class:
class DeepCategorySerializer(serializers.ModelSerializer):
children = serializers.SerializerMethodField()
class Meta:
model = models.CategoryDefinition
fields = '__all__'
def get_children(self, obj):
return [DeepCategorySerializer().to_representation(cat) for cat in obj.children.all()]
Then apply this serializer to a DRF view function or generics class, such as:
re_path(r'categories/(?P<pk>[\w\d]{32})/',
generics.RetrieveUpdateDestroyAPIView.as_view(
queryset=models.CategoryDefinition.objects.all(),
serializer_class=serializers.DeepCategorySerializer),
name='category-update'),
Is there a way to have a nested model as a List instead of Dict?
I'm trying to implement it with ListField but having a hard time.
Following a sample to better explain what I'm trying to do.
Sample Models:
class Album(models.Model):
name = models.CharField(max_length=250)
class Track(models.Model):
title = models.CharField(max_length=250)
number = models.IntegerField()
album = models.ForeignKey(Album, related_name="tracks")
Sample Serializer:
Class TrackSerializer(serializers.ModelSerializer):
class Meta:
model = Track
fields = ['number', 'title']
class AlbumSerializer(serializers.ModelSerializer):
tracks = TrackSerializer(many=True)
model = Album
fields = ['name', 'tracks']
Wrong output resulted from the code above:
{
"name": "ALBUM NAME",
"tracks": [
{
"number": 1,
"title": "TRACK TITLE"
},
{
"number": 2,
"title": "OTHER TRACK TITLE"
}
]
}
Desired output:
{
"name": "ALBUM NAME",
"tracks": [
[1, "TRACK TITLE"],
[2, "OTHER TRACK TITLE"]
]
}
Solution: an album serializer with one field that is the result of a method and can be anything (SerializerMethodField)
serializers.py
class AlbumSerializer(serializers.ModelSerializer):
track_list = serializers.SerializerMethodField()
class Meta:
model = Album
fields = ['name', 'track_list']
class get_track_list(self, obj):
output = []
for i in Track.objects.filter(album = obj.id):
output.append([i.number, i.title])
return output
This returns me an JSON with attributes "name" and "track_list", just the way I needed:
{
"name": "ALBUM NAME",
"track_list": [
[1, "TRACK TITLE"],
[2, "OTHER TRACK TITLE"]
]
}
I'm writing a REST API for my Django app, and am having problems on serializing a recursive many-to-many relationship. I found some help on the Internet, but it all seems to be applicable only to recursive many-to-many relationships with no through model specified.
My models are as follows:
class Place(models.Model):
name = models.CharField(max_length=60)
other_places = models.ManyToManyField('self', through='PlaceToPlace', symmetrical=False)
def __str__(self):
return self.name
class PlaceToPlace(models.Model):
travel_time = models.BigIntegerField()
origin_place = models.ForeignKey(Place, related_name="destination_places")
destination_place = models.ForeignKey(Place, related_name="origin_places")
And I tried writing this serializer:
class PlaceToPlaceSerializer(serializers.HyperlinkedModelSerializer):
id = serializers.Field(source='destination_places.id')
name = serializers.Field(source='destination_places.name')
class Meta:
model = PlaceToPlace
fields = ('id', 'name', 'travel_time')
class PlaceFullSerializer(serializers.ModelSerializer):
class Meta:
model = Place
fields = ('id', 'name')
And so I have to write something to serialize the related Place instances, so I'd get something like this:
[
{
"id": 1,
"name": "Place 1",
"places":
[
{
"id": 2,
"name": "Place 2",
"travel_time": 300
}
]
},
{
"id": 2,
"name": "Place 2",
"places":
[
{
"id": 1,
"name": "Place 1",
"travel_time": 300
}
]
}
]
But I can't figure how to write the serializer, so some help would be very appreciated.