Count number of objects by date in daterange - python

In a Django project, I have these simplified models defined:
class People(models.Model):
name = models.CharField(max_length=96)
class Event(models.Model):
name = models.CharField(verbose_name='Nom', max_length=96)
date_start = models.DateField()
date_end = models.DateField()
participants = models.ManyToManyField(to='People', through='Participation')
class Participation(models.Model):
"""Represent the participation of 1 people to 1 event, with information about arrival date and departure date"""
people = models.ForeignKey(to=People, on_delete=models.CASCADE)
event = models.ForeignKey(to=Event, on_delete=models.CASCADE)
arrival_d = models.DateField(blank=True, null=True)
departure_d = models.DateField(blank=True, null=True)
Now, I need generate a participation graph: for each single event day, I want the corresponding total number of participations.
Currently, I use this awful code:
def daterange(start, end, include_last_day=False):
"""Return a generator for each date between start and end"""
days = int((end - start).days)
if include_last_day:
days += 1
for n in range(days):
yield start + timedelta(n)
class ParticipationGraph(DetailView):
template_name = 'events/participation_graph.html'
model = Event
def get_context_data(self, **kwargs):
labels = []
data = []
for d in daterange(self.object.date_start, self.object.date_end):
labels.append(formats.date_format(d, 'd/m/Y'))
total_participation = self.object.participation_set
.filter(arrival_d__lte=d, departure_d__gte=d).count()
data.append(total_participation)
kwargs.update({
'labels': labels,
'data': data,
})
return super(ParticipationGraph, self).get_context_data(**kwargs)
Obviously, I run a new SQL query for each day between Event.date_start and Event.date_end. Is there a way to get the same result with a reduced number of SQL query (ideally, only one)?
I tried many aggregation tools from Django orm (values(), distinct(), etc.) but I always fall to the same issue: I don't have a field with a simple date value, I only have start and end date (in Event) and departure and arrival date (in Participation), so I can't find a way to group my results by date.

I agree that the current approach is expensive because, for each day, you are re-querying the DB for participants that you already retrieved earlier. I would instead approach this by doing a one-time query to the DB to get the participants and then use that data to populate your result data structure.
One structural change I would make to your solution is that instead of tracking two lists where each index corresponds to a day and the participation, aggregate the data in a dictionary mapping the day to the number of participants. If we aggregate results this way, we can always convert this to the two-lists at the end if needed.
Here is what my general (pseudo-codeish) approach is:
def formatDate(d):
return formats.date_format(d, 'd/m/Y')
def get_context_data(self, **kwargs):
# initialize the results with dates in question
result = {}
for d in daterange(self.object.date_start, self.object.date_end):
result[formatDate(d)] = 0
# for each participant, add 1 to each date that they are there
for participant in self.object.participation_set:
for d in daterange(participant.arrival_d, participant.departure_d):
result[formatDate(d)] += 1
# if needed, convert result to appropriate two-list format here
kwargs.update({
'participation_amounts': result
})
return super(ParticipationGraph, self).get_context_data(**kwargs)
In terms of performance, both approaches do the same number of operations. In your approach, for every day, d, you filter over every participant, p. Thus, the number of operations is O(dp). In my approach, for each participant I go through every day they attended (worse cast every day, d). Thus, it is also O(dp).
The reason to prefer my approach is what you pointed out. It only hits the database once to retrieve the participant list. Thus, it is less dependent on network latency. It does sacrifice some of the perf benefits that you get from SQL queries over python code. However, the python code is not too complex and should be fairly easy to process for events that even have hundreds of thousands of people.

I saw this question few days ago and honoured it with an upvote, since it is really well written and the problematics is very interesting. Finally I found some time to dedicate to its solution.
Django is a variation of a Model-View-Controller called Model-Template-View. My approach would follow thus the paradigm "fat model and thin controllers" (or translated to conform with Django "fat model and thin views").
Here is how I would rewrite the models:
import pandas
from django.db import models
from django.utils.functional import cached_property
class Person(models.Model):
name = models.CharField(max_length=96)
class Event(models.Model):
name = models.CharField(verbose_name='Nom', max_length=96)
date_start = models.DateField()
date_end = models.DateField()
participants = models.ManyToManyField(to='Person', through='Participation')
#cached_property
def days(self):
days = pandas.date_range(self.date_start, self.date_end).tolist()
return [day.date() for day in days]
#cached_property
def number_of_participants_per_day(self):
number_of_participants = []
participations = self.participation_set.all()
for day in self.days:
count = len([par for par in participations if day in par.days])
number_of_participants.append((day, count))
return number_of_participants
class Participation(models.Model):
people = models.ForeignKey(to=Person, on_delete=models.CASCADE)
event = models.ForeignKey(to=Event, on_delete=models.CASCADE)
arrival_d = models.DateField(blank=True, null=True)
departure_d = models.DateField(blank=True, null=True)
#cached_property
def days(self):
days = pandas.date_range(self.arrival_d, self.departure_d).tolist()
return [day.date() for day in days]
All calculations are placed in the models. Information that depends on the data stored in the database is made available as cached_property.
Let's see an example for Event:
djangocon = Event.objects.create(
name='DjangoCon Europe 2018',
date_start=date(2018,5,23),
date_end=date(2018,5,28)
)
djangocon.days
>>> [datetime.date(2018, 5, 23),
datetime.date(2018, 5, 24),
datetime.date(2018, 5, 25),
datetime.date(2018, 5, 26),
datetime.date(2018, 5, 27),
datetime.date(2018, 5, 28)]
I used pandas for generating the date range, which is probably an overkill for your application, but it has nice syntax and is good for demonstrational purposes. You can generate the date range in your own way.
To get this result there was only one query. The days is available as any other field.
The same thing I made in Participation, here are some examples:
antwane = Person.objects.create(name='Antwane')
rohan = Person.objects.create(name='Rohan Varma')
cezar = Person.objects.create(name='cezar')
They all want to visit DjangoCon Europe in 2018, but not all of them are attending all days:
p1 = Participation.objects.create(
people=antwane,
event=djangocon,
arrival_d=date(2018,5,23),
departure_d=date(2018,5,28)
)
p2 = Participation.objects.create(
people=rohan,
event=djangocon,
arrival_d=date(2018,5,23),
departure_d=date(2018,5,26)
)
p3 = Participation.objects.create(
people=cezar,
event=djangocon,
arrival_d=date(2018,5,25),
departure_d=date(2018,5,28)
)
Now we want to see how many participants there are for every day the event is going on. We track the number of SQL queries too.
from django.db import connection
djangocon = Event.objects.get(pk=1)
djangocon.number_of_participants_per_day
>>> [(datetime.date(2018, 5, 23), 2),
(datetime.date(2018, 5, 24), 2),
(datetime.date(2018, 5, 25), 3),
(datetime.date(2018, 5, 26), 3),
(datetime.date(2018, 5, 27), 2),
(datetime.date(2018, 5, 28), 2)]
connection.queries
>>>[{'time': '0.000', 'sql': 'SELECT "participants_event"."id", "participants_event"."name", "participants_event"."date_start", "participants_event"."date_end" FROM "participants_event" WHERE "participants_event"."id" = 1'},
{'time': '0.000', 'sql': 'SELECT "participants_participation"."id", "participants_participation"."people_id", "participants_participation"."event_id", "participants_participation"."arrival_d", "participants_participation"."departure_d" FROM "participants_participation" WHERE "participants_participation"."event_id" = 1'}]
There are two queries. The first one fetches the object Event and the second gets the number of participants per day for the event.
Now it's up to you to use it in your views as you please. And thanks to the cached properties you won't need to repeat the database query to get the result.
You can follow the same principle and maybe add property to list all participants for each day of an event. It could look like:
class Event(models.Model):
# ... snip ...
#cached_property
def participants_per_day(self):
participants = []
participations = self.participation_set.all().select_related('people')
for day in self.days:
people = [par.people for par in participations if day in par.days]
participants.append((day, people))
return participants
# refactor the number of participants per day
#cached_property
def number_of_participants_per_day(self):
return [(day, len(people)) for day, people in self.participants_per_day]
I hope you like this solution.

Related

Django can't lookup datetime field. int() argument must be a string, a bytes-like object or a number, not datetime.datetime

I'm trying to group objects 'Event' by their 'due' field, and finally return a dict of day names with a list of events on that day. {'Monday': [SomeEvent, SomeOther]} - that's the idea. However while looking up event's due__day I get: int() argument must be a string, a bytes-like object or a number, not datetime.datetime.
Here's manager's code:
# models.py
class EventManager(models.Manager):
def get_week(self, group_object):
if datetime.datetime.now().time() > datetime.time(16, 0, 0):
day = datetime.date.today() + datetime.timedelta(1)
else:
day = datetime.date.today()
friday = day + datetime.timedelta((4 - day.weekday()) % 7)
events = {}
while day != friday + datetime.timedelta(1):
events[str(day.strftime("%A"))] = self.get_queryset().filter(group=group_object, due__day=day)
# ^^^ That's where the error happens, from what I understood it tries to convert this datetime to int() to be displayed by template
day += datetime.timedelta(1)
return events
Here is the model:
# models.py
class Event(models.Model):
title = models.CharField(max_length=30)
slug = models.SlugField(blank=True)
description = models.TextField()
subject = models.CharField(max_length=20, choices=SUBJECTS)
event_type = models.CharField(max_length=8, choices=EVENT_TYPES)
due = models.DateTimeField()
author = models.ForeignKey(User, blank=True)
group = models.ForeignKey(Group, related_name='events')
objects = EventManager()
I created a template filter to call this method:
#register.filter
def get_week(group_object):
return Event.objects.get_week(group_object=group_object)
And I called it in event_list.html (Using an index because it's a list view, also this is just temporary to see if the returned dict will be correct. Looping over it is to be implemented.)
{{ event_list.0.group|get_week }}
My guess is: I've broken something with this weird lookup. .filter(due__day=day) However I cannot find the solution.
I also tried to look for due__lte=(day - datetime.timedelta(hours=12)), due__gte=(day + datetime.timedelta(hours=12)) something like this but that doesn't work. Any solution is pretty much fine and appreciated.
Right, so the solution for my project is to check for due__day=day(the one from the loop).day <- datetimes property to compare day agains day not day against the datetime. That worked for me, however it does not necessarily answer the question since I don't exactly what caused the error itself. Feel free to answer for the future SO help seekers or just my information :)
This works for me:
Event.objects.filter(due__gte=datetime.date(2016, 8, 29), due__lt=datetime.date(2016, 8, 30))
Note that it uses date instead of datetime to avoid too much extraneous code dealing with the times (which don't seem to matter here).

How to reorganize data in a django view

I have the following model for a Measurement belonging to a user:
class Measurement(models.Model):
user = models.ForeignKey(User, related_name='measurements')
timestamp = models.DateTimeField(default=timezone.now, db_index=True)
def get_date(self):
return self.timestamp.date()
And for one of my views I need the measurements grouped by day. I have done the following:
def group_measurements_by_days(user_id):
measurements = Measurement.objects.filter(user=user_id)
daysd = {}
for measurement in measurements:
day = measurement.get_date()
if not day in daysd:
daysd[day] = []
mymeas = daysd[day]
mymeas.append(measurement)
days = [{'day': day, 'measurements': measurements} for day, measurements in daysd.items()]
days = sorted(days, key=itemgetter('day'), reverse=True)
return days
So that I get a list like:
[
{ 'day': date1, 'measurements': [ Measurement, Measurement, ...]},
{ 'day': date2, 'measurements': [ Measurement, Measurement, ...]},
...
]
But this is completely wrong, because I have a mixture of plain python types and django ORM types. This works when I use this data in templates, but is giving me trouble in other areas.
For example, when trying to reuse this code for a django rest api, I suddenly am unable to serialize those objects.
How can I process the data coming from the django ORM (Measurement.objects.filter(user=user_id)), but still keep a consistent format for my API?
Hopefully, you are able to use Django 1.8. Problem solved in this post credits goes there.
You can filter the queryset using grouping by date:
from django.db.models.expressions import Date
queryset = Measurements.objects.filter(user=user_id).annotate(day=Date('timestamp'), measurements=Count(0)).values('day', 'measurements')
How about using this idea ...
mymeas.append(measurement.__dict__)

Django/Python: How to group queryset results by date?

I have a model for image uploads, that looks something like this:
from django.db import models
from django.contrib.auth.models import User
import datetime
class ImageItem(models.Model):
user = models.ForeignKey(User)
upload_date = models.DateTimeField(auto_now_add = True)
last_modified = models.DateTimeField(auto_now = True)
original_img = models.ImageField(upload_to = img_get_file_path)
I want to query all instances of ImageItem that belong to a particular user, and group them according to date uploaded. For example, for some user, I want a group for April 9 2013, another for April 12 2013, etc. (assuming that they uploaded one or more images on those dates).
I'm thinking I run a simple query, like,
joes_images = ImageItem.objects.filter(user__username='joe')
But then how could I group them by day published? (assuming he did not publish every day, only on some days)
The function would have to return all the groups of images.
why don't you do as following?
joes_images = ImageItem.objects.filter(user__username='joe') # your code
upload_dates = set([(i.year, i.month, i.day) for i in joes_images]) # set of upload_date
joes_images_separated = dict([(d, []) for d in upload_dates])
for d in upload_dates:
for i in joes_images:
if (i.year, i.month, i.day) == d:
joes_images_separated[d].append(i)
Here, upload_dates is a set of dates in joes_images and you get joes_images_separated as a dict (keys are dates, values are lists of joes_images for each date).
I'm sorry for a little dirty code. I think this works. for your information.

django, group model by days

I have models.py with:
class Game(models.Model):
date = models.DateTimeField()
I want to have a function that returns all games grouped by days. Result should be a dict with keys: 'day' and 'games' (list of games for the day). I do it in such way:
from itertools import groupby
def get_games_grouped_by_days():
def get_day(obj):
return obj.date.day
games = Game.objects.all()
grouped_games = []
for day, games_in_day in groupby(games, key=get_day):
grouped_games.append({
'day': day,
'games': list(games_in_day)
})
return grouped_games
This function works fine, but I would like to know if there is a more query-way to do such group thing.
what about this:
query = Game.objects.all().query
query.group_by = ['date']
results = QuerySet(query=query, model=Game)
Then you can iterate on results, and get the day and game from each item.

Want to Add all amounts for each country

Hello I have a django app dealing with Sales/purchases. What I want to do is add amounts depending on thier country type.
Looking at the sales table, there are two different country types. Uk and EU (I know EU is not a country but nevermind :) )
models.py
COUNTRY_TYPE_CHOICES = (
(1, 'UK'),
(2, 'EU'),
)
class Sale(models.Model):
country_type = models.IntegerField(verbose_name = "Location", choices = COUNTRY_TYPE_CHOICES)
date = models.DateField()
amount = models.DecimalField(max_digits=20, decimal_places=2)
description = models.TextField(max_length = 400)
def __unicode__(self):
return unicode(self.amount)
Now I want to display the amount of all sales. I want two results. The sum of all amount from the UK, and the sum of amount that are from the EU. I'm slight confused how you would add all amounts because of the two different choice types.
Here is also my views file which may help as well.
views.py
def home(request):
sales = Sale.objects.all()
return render_to_response('home.html', {'sales':sales}, context_instance=RequestContext(request))
Update: I have done so far
uk_sales = Sale.objects.filter(country_type='1')
{{uk_sales}}
On screen Gives me: <Sale: 467.99>, <Sale: 699.99>, <Sale: 499.99>]
Now It wold be good If I could add all these values. Not counting them.
from django.db.models import Sum
Sale.objects.values('country_type').annotate(Sum('amount'))
This work quiet well for me.
uk_sales = Sale.objects.filter(country_type='1')
uk_amount = uk_sales.aggregate(price = Sum('amount'))['price']
If you are using Django 1.1 or newer, the you can use Django Aggregate Support with something like:
query_amount = Item.objects.extra(select={'sum': 'sum(amount)'}).values('sum', 'amount')
query_amount.query.group_by = ['country_type']
Here's Django official documentation on the topic. And here's a nice tutorial.

Categories