Django - Complex query - python

Assuming I have two models:
class Profile(models.Model):
#some fields here
class Ratings(models.Model):
profile = models.ForeignKey(profile)
category = models.IntegerField()
points = models.IntegerField()
Assuming following examle of MySQL table "ratings":
profile | category | points
1 1 10
1 1 4
1 2 10
1 3 0
1 4 10
1 4 10
1 4 10
1 5 0
I have following values in my POST data and also other fields values:
category_1_avg_val = 7
category_2_avg_val = 5
category_3_avg_val = 5
category_4_avg_val = 7
category_5_avg_val = 9
I want to filter profiles that have the average ratings calculated for categories higher or equal to required values.
Some filters are applied initially as:
q1 = [('associated_with', search_for),
('profile_type__slug__exact', profile_type),
('gender__in', gender),
('rank__in', rank),
('styles__style__in', styles),
('age__gte', age_from),
('age__lte', age_to)]
q1_list = [Q(x) for x in q1 if x[1]]
q2 = [('user__first_name__icontains', search_term),
('user__last_name__icontains', search_term),
('profile_type__name__icontains', search_term),
('styles__style__icontains', search_term),
('rank__icontains', search_term)]
q2_list = [Q(x) for x in q2 if x[1]]
if q1_list:
objects = Profile.objects.filter(
reduce(operator.and_, q1_list))
if q2_list:
if objects:
objects = objects.filter(
reduce(operator.or_, q2_list))
else:
objects = Profile.objects.filter(
reduce(operator.or_, q2_list))
if order_by_ranking_level == 'desc':
objects = objects.order_by('-ranking_level').distinct()
else:
objects = objects.order_by('ranking_level').distinct()
Now i want to filter profiles whose (average of points) (group by category) >= (avg values of category coming in post)
I tried to do this one by one as
objects = objects.filter(
ratings__category=1) \
.annotate(avg_points=Avg('ratings__points'))\
.filter(avg_points__gte=category_1_avg_val)
objects = objects.filter(
ratings__category=2) \
.annotate(avg_points=Avg('ratings__points'))\
.filter(avg_points__gte=category_2_avg_val)
But this is wrong I think. Please help me out. If return is a queryset that would be great.
Edited
Using the answer posted by hynekcer I came up with slightly different solution as I have already queryset of profiles which needs to be filtered more based on rating.
def check_ratings_avg(pr, rtd):
ok = True
qr = Ratings.objects.filter(profile__id=pr.id) \
.values('category')\
.annotate(points_avg=Avg('points'))
qr = {i['category']:i['points_avg'] for i in qr}
for cat in rtd:
val = rtd[cat]
if qr[cat] >= val:
pass
else:
ok = False
break
return ok
rtd = {1: category_1_avg_val, 2: category_2_avg_val, 3: category_3_avg_val,
4: category_4_avg_val, 5: category_5_avg_val}
objects = [i for i in objects if check_ratings_avg(i, rtd)]

Your complex query require a subquery in the principle. Possible solutions are:
A subquery written by 'extra' queryset method or raw SQL query. It is not DRY and it was unsupported by some db backends, e.g. by some versions of MySQL, however subqueries are by some limited way used since Django 1.1.
Saving intermediate results into a temporary table in the database. It is not nice in Django.
Emulation of the outer query by loop in Python. The best universal solution. A loop in Python over database data aggregated by the first query can aggregate and filter the data fast enough.
A) Subquery emulated by Python
from django.db.models import Q, Avg
from itertools import groupby
from myapp.models import Profile, Ratings
def iterator_filtered_by_average(dictionary):
qr = Ratings.objects.values('profile', 'category', 'points').order_by(
'profile', 'category').annotate(points_avg=Avg('points'))
f = Q()
for k, v in dictionary.iteritems():
f |= Q(category=k, points_avg__gte=v)
for profile, grp in groupby(qr.filter(f).values('profile')):
if len(list(grp)) == len(dictionary):
yield profile
#example
FILTER_DATA = {1:category_1_avg_val, 2:category_2_avg_val, 3:category_3_avg_val,
4:category_4_avg_val, 5:category_5_avg_val}
for row in iterator_filtered_by_average(FILTER_DATA):
print row
This is a simple solution for the original question without later additional requirements.
B) Solution with subqueries:
It is necessary for the more detailed version of question because if the initial filters are based on some field of type ManyToManyField and also because it contains a distinct clause:
# objects: QuerySet that you get from your initial filters. Not yet executed.
if rtd:
# Method `as_nested_sql` removes the `order_by` clase, unlike `as_sql`
subquery3 = objects.values('id').query \
.get_compiler(connection=connection).as_nested_sql()
subquery2 = ("""SELECT profile_id, category, avg(points) AS points_avg
FROM myapp_ratings
WHERE profile_id in
( %s
) GROUP BY profile_id, category
""" % subquery3[0], subquery3[1]
)
where_sql = ' OR '.join(
'category = %d AND points_avg >= %%s' % cat for cat in rtd.keys()
)
subquery = (
"""SELECT profile_id
FROM
( %s
) subquery2
WHERE %s
GROUP BY profile_id
HAVING count(*) = %s
""" % (subquery2[0], where_sql, len(rtd)),
subquery2[1] + tuple(rtd.values())
)
assert order_by_ranking_level in ('asc', 'desc')
mainquery = ("""SELECT myapp_profile.* FROM myapp_profile
INNER JOIN
( %s
) subquery ON subquery.profile_id=myapp_profile.id
ORDER BY ranking_level %s"""
% (subquery[0], order_by_ranking_level), subquery[1]
)
objects = Profile.objects.raw(mainquery[0], params=mainquery[1])
return objects
Replace please all strings myapp by name_of_your_application.
Example of SQL generated by this code
SELECT myapp_profile.* FROM myapp_profile
INNER JOIN
( SELECT profile_id
FROM
( SELECT profile_id, category, avg(points) AS points_avg
FROM myapp_ratings
WHERE profile_id IN
( SELECT U0.`id` FROM `myapp_profile` U0 WHERE U0.`ranking_level` >= 4
) GROUP BY profile_id, category
) subquery2
WHERE category = 1 AND points_avg >= 7 OR category = 2 AND points_avg >= 5
OR category = 3 AND points_avg >= 5 OR category = 4 AND points_avg >= 7
OR category = 5 AND points_avg >= 9
GROUP BY profile_id
HAVING count(*) = 5
) subquery ON subquery.profile_id=myapp_profile.id
ORDER BY ranking_level asc
(This SQL is for better readability parsed manually with strings %s replaced by parameters, however the database engine receive parameters unparsed for security reasons.)
Your problem is due to little support of subqueries generated by Django. Only examples from documentation of more complicated queries create a subquery. (e.g. aggregate after annotate or count after annotate or aggregate after distinct, but no annotate after distinct or after annotate) Complicated nested aggregations are simplified to one query which is unexpected.
All other solutions that execute a new individual SQL query for every object filtered by the first query are discouraged for production although they can be very useful for testing results of any better solution.

You could add methods to a manager
# Untested code
class ProfileManager(models.Manager):
def with_category_average(self, cat, avg):
# Give each filter a unique annotation key
key = 'avg_pts_' + str(cat)
return self.filter(ratings__category=cat) \
.annotate(**{key: Avg('ratings__points')}) \
.filter(**{key + '__gte': avg})
# Expects a dict of `cat: avg` pairs
def filter_by_averages(self, avg_dict):
qs = self.get_query_set()
for key, val in avg_dict.items():
qs &= self.with_category_average(key, val)
return qs

Related

Django: Understanding QuerySet API

I'm a newbie in Django and I have some questions about making queries by QuerySet API.
For instance, I have User, his Orders, and its Statuses
class User(models.Model):
first_name = models.CharField(max_length=100)
last_name = models.CharField(max_length=100)
is_active = models.BooleanField()
class OrderStatus(models.Model):
name = models.CharField(max_length=100)
class Order(models.Model):
number = models.CharField(max_length=10)
amount = models.DecimalField(max_digits=19, decimal_places=2)
user = models.ForeignKey(User, on_delete=models.PROTECT, related_name="orders")
order_status = models.ForeignKey(OrderStatus, on_delete=models.PROTECT)
creation_datetime = models.DateTimeField(auto_now_add=True)
# Some filtering field
filtering_field = models.IntegerField()
I combined all of my questions to this one query:
Get active users with some additional data for each user:
'Amount' of the Orders filtered by 'filtering_field' and aggregated by Min and Max
'Number' and 'Amount' of the first Order filtered by 'filtering_field'
Count of the Orders filtered by 'filtering_field', aggregated by Count and grouped by 'Order Status'. This grouping means that data from query #1 and #2 can be duplicated and it's ok.
I could make this query in T-SQL by 3 separated subquery with own grouping, filtering, ordering:
SELECT
u.id,
u.first_name,
u.last_name,
ts.min_amount,
ts.max_amount,
first_order.number as first_order_number,
first_order.amount as first_order_amount,
cnt.order_status_id,
cnt.cnt
FROM
[User] u
-- 1. 'Amount' of the Orders filtered by 'filtering_field' and aggregated by Min and Max
LEFT OUTER JOIN (
SELECT
[user_id],
MIN(amount) min_amount,
MAX(amount) max_amount
FROM
[Order]
WHERE
filtering_field = 1
GROUP BY
[user_id]
) ts ON u.id = ts.[user_id]
-- 2. 'Number' and 'Amount' of the first Order filtered by 'filtering_field'
OUTER APPLY (
SELECT TOP 1
o.number,
o.amount
FROM
[Order] o
WHERE
u.id = o.[user_id] AND
o.filtering_field = 2
ORDER BY
o.creation_datetime
) first_order
-- 3. Count of the Orders filtered by 'filtering_field', aggregated by Count and grouped by 'Order Status'.
LEFT OUTER JOIN (
SELECT
[user_id],
order_status_id,
COUNT(*) cnt
FROM
[Order]
WHERE
filtering_field = 3
GROUP BY
[user_id],
order_status_id
) cnt ON u.id = cnt.[user_id]
WHERE
u.is_active = 1
How I can do the same by QuerySet API?
Query #1 I can do Min and Max in Annotate.
data = User.objects.filter(
Q(is_active=True)
).values(
'id',
'first_name',
'last_name',
).annotate(
min_amount=Min(
'orders__amount',
filter=Q(orders__filtering_field=1)
),
max_amount=Max(
'orders__amount',
filter=Q(orders__filtering_field=1)
)
)
But what about query #2 & #3?
I've considered Subquery(), but It supports the only one output value.
I mean if you wanna get 5 fields from 1 queryset, sql server runs 5 queries. I think it's not good for performance.
How I can join the first order once to use its fields and How can I use Count() with grouping by filtered rows of child model?
I'd like to use .prefetch_related() as a substitution of Subquery in T-SQL for each query like this:
Prefetch(
'orders',
queryset=Order.objects.filter(filtering_field=1)..., #staff with .values(), annotate(Min(), Max()) and etc.
to_attr='pf_query_1'
)
And then use 'pf_query_1' like 'orders__pf_query_1__amount' in User.objects...values()...annotate().
But I can't use .values() in Prefetch as well as 'pf_query_1' as a model field.
So what is the best practice to make this one query by QuerySet API?
I'd like to see the whole QuerySet API query just like T-SQL query
Have you considered the Django Subquery as described in the docs?
Regarding your 3rd question the only approach coming to my mind is dynamically creating the annotations.
Here a (tested) code sample using your models:
def test_query(self):
# 1st question
min_order = Order.objects.filter(user=OuterRef('pk'), filtering_field=1)\
.order_by().values('user').annotate(min=Min('amount')).values('min')
max_order = Order.objects.filter(user=OuterRef('pk'), filtering_field=1)\
.order_by().values('user').annotate(max=Max('amount')).values('max')
# 2nd question
first_number = Order.objects.filter(user=OuterRef('pk'), filtering_field=1)\
.order_by().values('user').annotate(fnumber=F('number')).values('fnumber')
first_amount = Order.objects.filter(user=OuterRef('pk'), filtering_field=1)\
.order_by().values('user').annotate(fnumber=F('amount')).values('amount')
kwargs = {
'min': Subquery(min_order, output_field=DecimalField()),
'max': Subquery(max_order, output_field=DecimalField()),
'first_n': Subquery(first_number, output_field=CharField()),
'first_a': Subquery(first_amount, output_field=DecimalField())
}
# 3rd question
for o in OrderStatus.objects.all():
kwargs['%s_count' % o.name] = \
Subquery(Order.objects.filter(user=OuterRef('pk'), filtering_field=1, order_status=o)\
.order_by().values('user').annotate(c=Count('pk')).values('c'), output_field=IntegerField())
# Putting it all together
qs2 = User.objects.annotate(**kwargs)
# Testing the results
for user in qs2:
v = Order.objects.filter(user=user, filtering_field=1).aggregate(Min('amount'), Max('amount'))
self.assertEqual(v['amount__min'], user.min)
self.assertEqual(v['amount__max'], user.max)
v = Order.objects.filter(user=user, filtering_field=1).first()
self.assertEqual(v.number, user.first_n)
self.assertEqual(v.amount, user.first_a)
for o in OrderStatus.objects.all():
v = Order.objects.filter(user=user, filtering_field=1, order_status=o).count()
if v == 0:
v = None
k = '%s_count' % o.name
v1 = getattr(user, k)
self.assertEqual(v, v1)
# The sql
print(qs2.query)
Please note:
The code is part of a TestCase where I put it to check if it worked
as expected
I know some parts of the query can be generated without
Subquery using the filter attribute of the aggregation functions. As
this filter attribute was only introduced in Django 2.0 and not
supported in the LTS version 1.11 I did not use it.
EDIT: Here is another approach I came up with starting with a "base queryset" and annotating that one:
def test_query2(self):
qs = Order.objects.filter(filtering_field=1).values('user', 'order_status').distinct()
# 1st question
min_order = Order.objects.filter(user=OuterRef('user'), filtering_field=1)\
.order_by().values('user').annotate(min=Min('amount')).values('min')
max_order = Order.objects.filter(user=OuterRef('user'), filtering_field=1)\
.order_by().values('user').annotate(max=Max('amount')).values('max')
# 2nd question
first_number = Order.objects.filter(user=OuterRef('user'), filtering_field=1)\
.order_by().values('user').annotate(fnumber=F('number')).values('fnumber')
first_amount = Order.objects.filter(user=OuterRef('user'), filtering_field=1)\
.order_by().values('user').annotate(fnumber=F('amount')).values('amount')
# 3rd question
total_count = Order.objects.filter(user=OuterRef('user'), filtering_field=1, order_status=OuterRef('order_status'))\
.order_by().values('user').annotate(c=Count('pk')).values('c')
qs2 = qs.annotate(
min = Subquery(min_order, output_field=DecimalField()),
max = Subquery(max_order, output_field=DecimalField()),
first_n = Subquery(first_number, output_field=CharField()),
first_a = Subquery(first_amount, output_field=CharField()),
c = Subquery(total_count, output_field=IntegerField())
)
# Testing the results
for d in qs2:
v = Order.objects.filter(user=d['user'], filtering_field=1).aggregate(Min('amount'), Max('amount'))
self.assertEqual(v['amount__min'], d['min'])
self.assertEqual(v['amount__max'], d['max'])
v = Order.objects.filter(user=d['user'], filtering_field=1).first()
self.assertEqual(v.number, d['first_n'])
self.assertEqual(v.amount, d['first_a'])
v = Order.objects.filter(user=d['user'], filtering_field=1, order_status=d['order_status']).count()
self.assertEqual(v, d['c'])
print(qs2.query)

Django; how to find all bugs that have certain flags set

I am writing a Django app that queries a Bugzilla database for reporting. I am trying to build a query that can get all of the bugs that have specific flags set.
The model representing the flags table.
class Bugzilla_flags(models.Model):
class Meta:
db_table = 'flags'
type_id = models.IntegerField()
status = models.CharField(max_length=50)
bug_id = models.IntegerField()
creation_date = models.DateTimeField()
modification_date = models.DateTimeField()
setter_id = models.IntegerField()
requestee_id = models.IntegerField()
def __unicode__(self):
return str(self.bug_id)
I have a dictionary that represents the flags I want to look for (type_id : status).
flags = {'36':'?','12':'+'}
I tried using the reduce function but I don't think it will work because I is checking that all of the flags to be present in the same row. If I run the query with a dictionary with just a single k,v pair, it works fine, but not with more than 1.
query = reduce(operator.and_, (Q(type_id=flag,status=val) for (flag,val) in flags.items()))
I will then take the results of that query, and use it as the search for the actual bugs database.
inner = Bugzilla_flags.objects.using('bugzilla').filter(query)
bugs = Bugzilla_bugs.objects.using('bugzilla').filter(bug_id__in=inner)
For some history, I am currently using a series of steps to generate some sql which I send as a raw query, but I am trying to see if I can do it in Django. The resulting sql is like this:
select b.bug_id, b.priority, b.bug_severity, b.bug_status, b.resolution, b.cf_verified_in, b.assigned_to, b.qa_contact, b.short_desc, b.cf_customercase,
MAX(CASE WHEN f.type_id = 31 THEN f.status ELSE NULL END) as Unlocksbranch1,
MAX(CASE WHEN f.type_id = 31 THEN f.status ELSE NULL END) as Unlocksbranch2,
MAX(CASE WHEN f.type_id = 33 THEN f.status ELSE NULL END) as Unlocksbranch3,
MAX(CASE WHEN f.type_id = 34 THEN f.status ELSE NULL END) as Unlocksbranch4,
MAX(CA5E WHEN f.type_id = 36 THEN f.status ELSE NULL END) as Unlocksbranch5,
MAX(CASE WHEN f.type_id = 41 THEN f.status ELSE NULL END) as Unlocksbranch6,
MAX(CASE WHEN f.type_id = 12 THEN f.status ELSE NULL END) as CodeReviewed
from bugs b
inner join flags f on f.bug_id = b.bug_id
where ( b.bug_status = 'RESOLVED' or b.bug_status = 'VERIFIED' or b.bug_status = 'CLOSED' )
and b.resolution = 'FIXED'
group by b.bug_id
having CodeReviewed = '+' and Unlocksbranch1 = '?' and Unlocksbranch2 = '+'
The result of this gives me a single queryset that has all of the flags I care about as columns, which I can then do my analysis on. The last "having" section is what I am actually querying on, and is what I am trying to get with the above Django queries.
EDIT
Basically what I need to do is like this:
flags1 = {'36':'?'}
flags2 = {'12':'+'}
query1 = reduce(operator.and_, (Q(type_id=flag,status=val) for (flag,val) in flags1.items()))
query2 = reduce(operator.and_, (Q(type_id=flag,status=val) for (flag,val) in flags2.items()))
inner1 = Bugzilla_flags.objects.using('bugzilla').filter(query1)
inner2 = Bugzilla_flags.objects.using('bugzilla').filter(query2)
inner1_bugs = [row.bug_id for row in inner1] # list of just the bug_ids
inner2_bugs = [row.bug_id for row in inner2] # list of just the bug_ids
intersect = set(inner1_bugs) & set(inner2_bugs)
The intersect is a set that has all of the bug_ids that I can then use in the Bugzilla_bugs query to get the actual bug data.
How can I do the 3 operations (query, inner, inner_bugs) and then the intersect using a variable length dictionary input such as:
flags = {'36':'?','12':'+','15','?',etc}
Your inner query looks right to me. To find bugs that have all those flags, not just any one, you can either use reduce again to and together a bunch of flag= Q objects, or iterate and build up multiple filter clauses.
inner = Bugzilla_flags.objects.using('bugzilla').filter(query)
flag_filter = reduce(operator.and_, (Q(flag=flag) for flag in inner))
bugs = Bugzilla_bugs.objects.using('bugzilla').filter(flag_filter)
Or:
inner = Bugzilla_flags.objects.using('bugzilla').filter(query)
bugs = Bugzilla_bugs.objects.using('bugzilla').all()
for flag in inner:
bugs = bugs.filter(flag=flag)
Or, for that matter, take advantage of the fact that multiple Q objects are anded together:
inner = Bugzilla_flags.objects.using('bugzilla').filter(query)
flag_filters = [Q(flag=flag) for flag in inner]
bugs = Bugzilla_bugs.objects.using('bugzilla').filter(*flag_filters)

Django advanced query

I'm using Django ORM to handle my database queries. I have the following db tables:
resource
resource_pool
resource_pool_elem
reservation
and the following models:
class Resource(models.Model):
name = models.CharField(max_length=200)
class Reservation(models.Model):
pass
class ResourcePool(models.Model):
reservation = models.ForeignKey(Reservation, related_name="pools", db_column="reservation")
resources = models.ManyToManyField(Resource, through="ResourcePoolElem")
mode = models.IntegerField()
class ResourcePoolElem(models.Model):
resPool = models.ForeignKey(ResourcePool)
resource = models.ForeignKey(Resource)
Currently, I need to query the resources used in a set of reservations. I use the following query:
resourcesNames = []
reservations = []
resources = models.Resource.objects.filter(
name__in=resourcesNames, resPool__reservation__in=reservations).all()
which I think matches to a sql query similar to this one:
select *
from resource r join resource_pool rp join resource_pool_elem rpe join reservation reserv
where r.id = rpe.resource and
rpe.pool = rp.id and
reserv.id = rp.reservation and
r.name in (resourcesNames[0], ..., resourcesNames[n-1])
reserv.id in (reservations[0], ..., reservations[n-1])
Now, I want to add a restriction to this query. Each pool may have a exclusive mode boolean flag. There will be an extra input list with the requested exclusive flags of each pool and I only want to query the resources of pools which exclusive flag match the requested exclusive flag if exclusive = true OR resources of pools which exclusive flag is false. I could build the SQL query using Python with a code similar to this:
query = "select *
from resource r join resource_pool rp join resource_pool_elem rep
join reservation reserv
where r.id = rpe.resource and
rpe.pool = rp.id and
reserv.id = rp.reservation and
reserv.id in (reservations[0], ..., reservations[n-1]) and ("
for i in resourcesNames[0:len(resourcesNames)]
if i > 0:
query += " or "
query += "r.name = " + resourcesNames[i]
if (exclusive[i])
query += " and p.mode == 0"
query += ")"
Is there a way to express this sql query in a Django query?
Perhaps you can do this with Q objects. I have some issues wrapping my head around your example, but lets look at it with a simpler model.
class Garage(models.Model):
name = models.CharField()
class Vehicle(models.Model):
wheels = models.IntegerField()
gears = models.IntegerField()
garage = models.ForeignKey(Garage)
Say you want to get all "multiple-wheeled" vehicles in the garage (e.g. all motorcycles and cars, but no unicycles), but for cars, you only want those with a CVT transmission, meaning they only have a single gear. (How this came up, no clue, but bear with me... ;) The following should give you that:
from django.db.models import Q
garage = Garage.objects.all()[0]
query = Vehicle.objects.filter(Q(garage=garage))
query = query.filter(Q(wheels=2) | (Q(wheels=4) & Q(gears=1)))
Given the following available data:
for v in Vehicle.objects.filter(garage=garage):
print 'Wheels: {}, Gears: {}'.format(v.wheels, v.gears)
Wheels: 1, Gears: 1
Wheels: 2, Gears: 4
Wheels: 2, Gears: 5
Wheels: 4, Gears: 1
Wheels: 4, Gears: 5
Running the query will give us:
for v in query:
print 'Wheels: {}, Gears: {}'.format(v.wheels, v.gears)
Wheels: 2, Gears: 4
Wheels: 2, Gears: 5
Wheels: 4, Gears: 1
Finally, to adapt it to your case, you might be able to use something along the following lines:
query = models.Resource.objects.filter(Q(resPool__reservation__in=reservations))
query = query.filter(Q(name__in(resourcesNames))
query = query.filter(Q(resPool__exclusive=True) & Q(resPool__mode=0))
You could use a django cursor to make queries , for instance
see documentation :
https://docs.djangoproject.com/en/dev/topics/db/sql/
from django.db import connection
def my_custom_sql(self):
cursor = connection.cursor()
cursor.execute("UPDATE bar SET foo = 1 WHERE baz = %s", [self.baz])
cursor.execute("SELECT foo FROM bar WHERE baz = %s", [self.baz])
row = cursor.fetchone()
return row

SQLAlchemy session query with INSERT IGNORE

I'm trying to do a bulk insert/update with SQLAlchemy. Here's a snippet:
for od in clist:
where = and_(Offer.network_id==od['network_id'],
Offer.external_id==od['external_id'])
o = session.query(Offer).filter(where).first()
if not o:
o = Offer()
o.network_id = od['network_id']
o.external_id = od['external_id']
o.title = od['title']
o.updated = datetime.datetime.now()
payout = od['payout']
countrylist = od['countries']
session.add(o)
session.flush()
for country in countrylist:
c = session.query(Country).filter(Country.name==country).first()
where = and_(OfferPayout.offer_id==o.id,
OfferPayout.country_name==country)
opayout = session.query(OfferPayout).filter(where).first()
if not opayout:
opayout = OfferPayout()
opayout.offer_id = o.id
opayout.payout = od['payout']
if c:
opayout.country_id = c.id
opayout.country_name = country
else:
opayout.country_id = 0
opayout.country_name = country
session.add(opayout)
session.flush()
It looks like my issue was touched on here, http://www.mail-archive.com/sqlalchemy#googlegroups.com/msg05983.html, but I don't know how to use "textual clauses" with session query objects and couldn't find much (though admittedly I haven't had as much time as I'd like to search).
I'm new to SQLAlchemy and I'd imagine there's some issues in the code besides the fact that it throws an exception on a duplicate key. For example, doing a flush after every iteration of clist (but I don't know how else to get an the o.id value that is used in the subsequent OfferPayout inserts).
Guidance on any of these issues is very appreciated.
The way you should be doing these things is with session.merge().
You should also be using your objects relation properties. So the o above should have o.offerpayout and this a list (of objects) and your offerpayout has offerpayout.country property which is the related countries object.
So the above would look something like
for od in clist:
o = Offer()
o.network_id = od['network_id']
o.external_id = od['external_id']
o.title = od['title']
o.updated = datetime.datetime.now()
payout = od['payout']
countrylist = od['countries']
for country in countrylist:
opayout = OfferPayout()
opayout.payout = od['payout']
country_obj = Country()
country_obj.name = country
opayout.country = country_obj
o.offerpayout.append(opayout)
session.merge(o)
session.flush()
This should work as long as all the primary keys are correct (i.e the country table has a primary key of name). Merge essentially checks the primary keys and if they are there merges your object with one in the database (it will also cascade down the joins).

Union and Intersect in Django

class Tag(models.Model):
name = models.CharField(maxlength=100)
class Blog(models.Model):
name = models.CharField(maxlength=100)
tags = models.ManyToManyField(Tag)
Simple models just to ask my question.
I wonder how can i query blogs using tags in two different ways.
Blog entries that are tagged with "tag1" or "tag2":
Blog.objects.filter(tags_in=[1,2]).distinct()
Blog objects that are tagged with "tag1" and "tag2" : ?
Blog objects that are tagged with exactly "tag1" and "tag2" and nothing else : ??
Tag and Blog is just used for an example.
You could use Q objects for #1:
# Blogs who have either hockey or django tags.
from django.db.models import Q
Blog.objects.filter(
Q(tags__name__iexact='hockey') | Q(tags__name__iexact='django')
)
Unions and intersections, I believe, are a bit outside the scope of the Django ORM, but its possible to to these. The following examples are from a Django application called called django-tagging that provides the functionality. Line 346 of models.py:
For part two, you're looking for a union of two queries, basically
def get_union_by_model(self, queryset_or_model, tags):
"""
Create a ``QuerySet`` containing instances of the specified
model associated with *any* of the given list of tags.
"""
tags = get_tag_list(tags)
tag_count = len(tags)
queryset, model = get_queryset_and_model(queryset_or_model)
if not tag_count:
return model._default_manager.none()
model_table = qn(model._meta.db_table)
# This query selects the ids of all objects which have any of
# the given tags.
query = """
SELECT %(model_pk)s
FROM %(model)s, %(tagged_item)s
WHERE %(tagged_item)s.content_type_id = %(content_type_id)s
AND %(tagged_item)s.tag_id IN (%(tag_id_placeholders)s)
AND %(model_pk)s = %(tagged_item)s.object_id
GROUP BY %(model_pk)s""" % {
'model_pk': '%s.%s' % (model_table, qn(model._meta.pk.column)),
'model': model_table,
'tagged_item': qn(self.model._meta.db_table),
'content_type_id': ContentType.objects.get_for_model(model).pk,
'tag_id_placeholders': ','.join(['%s'] * tag_count),
}
cursor = connection.cursor()
cursor.execute(query, [tag.pk for tag in tags])
object_ids = [row[0] for row in cursor.fetchall()]
if len(object_ids) > 0:
return queryset.filter(pk__in=object_ids)
else:
return model._default_manager.none()
For part #3 I believe you're looking for an intersection. See line 307 of models.py
def get_intersection_by_model(self, queryset_or_model, tags):
"""
Create a ``QuerySet`` containing instances of the specified
model associated with *all* of the given list of tags.
"""
tags = get_tag_list(tags)
tag_count = len(tags)
queryset, model = get_queryset_and_model(queryset_or_model)
if not tag_count:
return model._default_manager.none()
model_table = qn(model._meta.db_table)
# This query selects the ids of all objects which have all the
# given tags.
query = """
SELECT %(model_pk)s
FROM %(model)s, %(tagged_item)s
WHERE %(tagged_item)s.content_type_id = %(content_type_id)s
AND %(tagged_item)s.tag_id IN (%(tag_id_placeholders)s)
AND %(model_pk)s = %(tagged_item)s.object_id
GROUP BY %(model_pk)s
HAVING COUNT(%(model_pk)s) = %(tag_count)s""" % {
'model_pk': '%s.%s' % (model_table, qn(model._meta.pk.column)),
'model': model_table,
'tagged_item': qn(self.model._meta.db_table),
'content_type_id': ContentType.objects.get_for_model(model).pk,
'tag_id_placeholders': ','.join(['%s'] * tag_count),
'tag_count': tag_count,
}
cursor = connection.cursor()
cursor.execute(query, [tag.pk for tag in tags])
object_ids = [row[0] for row in cursor.fetchall()]
if len(object_ids) > 0:
return queryset.filter(pk__in=object_ids)
else:
return model._default_manager.none()
I've tested these out with Django 1.0:
The "or" queries:
Blog.objects.filter(tags__name__in=['tag1', 'tag2']).distinct()
or you could use the Q class:
Blog.objects.filter(Q(tags__name='tag1') | Q(tags__name='tag2')).distinct()
The "and" query:
Blog.objects.filter(tags__name='tag1').filter(tags__name='tag2')
I'm not sure about the third one, you'll probably need to drop to SQL to do it.
Please don't reinvent the wheel and use django-tagging application which was made exactly for your use case. It can do all queries you describe, and much more.
If you need to add custom fields to your Tag model, you can also take a look at my branch of django-tagging.
This will do the trick for you
Blog.objects.filter(tags__name__in=['tag1', 'tag2']).annotate(tag_matches=models.Count(tags)).filter(tag_matches=2)

Categories