How to concat two columns of table django model - python

I am implementing search in my project what I want is to concat to column in where clause to get results from table.
Here is what I am doing:
from django.db.models import Q
if 'search[value]' in request.POST and len(request.POST['search[value]']) >= 3:
search_value = request.POST['search[value]'].strip()
q.extend([
Q(id__icontains=request.POST['search[value]']) |
(Q(created_by__first_name=request.POST['search[value]']) & Q(created_for=None)) |
Q(created_for__first_name=request.POST['search[value]']) |
(Q(created_by__last_name=request.POST['search[value]']) & Q(created_for=None)) |
Q(created_for__last_name=request.POST['search[value]']) |
(Q(created_by__email__icontains=search_value) & Q(created_for=None)) |
Q(created_for__email__icontains=search_value) |
Q(ticket_category=request.POST['search[value]']) |
Q(status__icontains=request.POST['search[value]']) |
Q(issue_type__icontains=request.POST['search[value]']) |
Q(title__icontains=request.POST['search[value]']) |
Q(assigned_to__first_name__icontains=request.POST['search[value]']) |
])
Now I want to add another OR condition like:
CONCAT(' ', created_by__first_name, created_by__last_name) like '%'search_value'%'
But when I add this condition to the queryset it becomes AND
where = ["CONCAT_WS(' ', profiles_userprofile.first_name, profiles_userprofile.last_name) like '"+request.POST['search[value]']+"' "]
tickets = Ticket.objects.get_active(u, page_type).filter(*q).extra(where=where).exclude(*exq).order_by(*order_dash)[cur:cur_length]
How do I convert this into an OR condition?

Advanced filters can be solved by
Q() object and
Query expressions like Func(), Value() and F().
The only used trick is a
Custom Lookup "rhs_only" that uses the right-hand-side of the lookup and ignores the left side, because it is easier to use all concatenated fields directly on the right side. A memorable function concat_like encapsulates that all to be easily used in queries.
from django.db.models import F, Func, Lookup, Q, Value
from django.db.models.fields import Field
def concat_like(columns, pattern):
"""Lookup filter: CONCAT_WS(' ', column_0, column_1...) LIKE pattern"""
lhs = '%s__rhs_only' % columns[0]
expr = Func(*(F(x) for x in columns), template="CONCAT_WS(' ', %(expressions)s)")
return Q(**{lhs: Like(expr, Value(pattern))})
class Like(Func):
def as_sql(self, compiler, connection):
arg_sql, arg_params = zip(*[compiler.compile(x) for x in self.source_expressions])
return ("%s LIKE '%s'" % tuple(arg_sql)), arg_params[0] + arg_params[1]
#Field.register_lookup
class RhsOnly(Lookup):
"""Skip the LHS and evaluate the boolean RHS only"""
lookup_name = 'rhs_only'
def as_sql(self, compiler, connection):
return self.process_rhs(compiler, connection)
All boolean expression and related objects are supported by this code. All arguments are correctly escaped.
Example usage:
>>> qs = MyModel.objects.filter(Q(id=1) | concat_like(('first_name', 'surname'), 'searched'))
>>> str(qs.query) # sql output simplified here
"SELECT .. WHERE id=1 OR (CONCAT_WS(' ', first_name, surname) LIKE 'searched')"

Relevant documentation:
https://docs.djangoproject.com/en/1.11/ref/models/expressions/#func-expressions
See also... Value() and F()
https://docs.djangoproject.com/en/1.11/topics/db/aggregation/#aggregations-and-other-queryset-clauses
You can reference annotated fields from inside the filter method. As such, you can filter against two concatenated fields and add it as another OR condition like this:
from django.db.models import F, Func, Value
# Because we added user_full_name as an annotation below,
# we can refer to it in the filters
q.extend([
Q(id__icontains=request.POST['search[value]']) |
(Q(created_by__first_name=request.POST['search[value]']) & Q(created_for=None)) |
Q(created_for__first_name=request.POST['search[value]']) |
(Q(created_by__last_name=request.POST['search[value]']) & Q(created_for=None)) |
Q(created_for__last_name=request.POST['search[value]']) |
(Q(created_by__email__icontains=search_value) & Q(created_for=None)) |
Q(created_for__email__icontains=search_value) |
Q(ticket_category=request.POST['search[value]']) |
Q(status__icontains=request.POST['search[value]']) |
Q(issue_type__icontains=request.POST['search[value]']) |
Q(title__icontains=request.POST['search[value]']) |
Q(assigned_to__first_name__icontains=request.POST['search[value]']) |
Q(user_full_name__icontains=request.POST['search[value]']) # <------
])
# Add the annotation to your queryset
# I'm not actually sure what the related_name or field_name for your user
# profiles are, so I'm pretending that tickets have a profile foreignkey field
# to where the first_name and last_name fields are
user_full_name_expr = Func(Value(' '), F('profile__first_name'), F('profile__last_name'), function='CONCAT_WS')
# The next two lines can be combined as long as the annotation comes first.
tickets = Ticket.objects.annotate(user_full_name=user_full_name_expr)
tickets = tickets.get_active(u, page_type).filter(*q).exclude(*exq).order_by(*order_dash)[cur:cur_length]
For fun, here's a working example based on the User model.
from django.contrib.auth.models import User
from django.db.models import F, Func, Value
User.objects.create(username='john', first_name='John', last_name='Jingleheimer-Schmidt')
User.objects.create(username='mike', first_name='Michael', last_name='Finnigan')
foo = User.objects.annotate(full_name=Func(Value(' '), F('first_name'), F('last_name'), function='CONCAT_WS'))
print(foo.filter(full_name__icontains='john'))
# outputs: [<User: john>]

What you need is, for create search FullText. I recommend use (http://haystacksearch.org/)
See documentation of Django (https://docs.djangoproject.com/en/1.11/ref/contrib/postgres/search/)

Related

Union over fields having different names using peewee

I'm using peewee as ORM and have two classes like this:
class A(Model):
name = CharField()
body = TextField()
class B(Model):
title = CharField()
body = TextField()
I would like to get all entries from A and B whose title/name start with some characters like 'abc'. According to the documentation the | operator should help, but I'm not even able to execute the resulting Expression. Obviously I would like to have a UNION and AS expression behind the scenes. How do I get this via peewee?
You should be able to get the result you want with something like
result = (
A().select(A.name.alias('name_title'), A.body).where(A.name == 'abc') |
B().select(B.title.alias('name_title'), B.body).where(B.title == 'abc')
).select().execute()
or
search_text = 'abc'
table_a_results = A().select(
A.name.alias('name_title'),
A.body
).where(A.name == search_text)
table_b_results = B().select(
B.name.alias('name_title'),
B.body
).where(B.title == search_text)
result = ( table_a_results | table_b_results ).select().execute()
The .alias() method to gets you the AS functionality as per the docs

Django -- Generate form based on queryset

I've got a question which I'll ask two ways: short & generic, so future generations of StackOverflow readers will benefit, and Long & Detailed, so I can get my work done without screwing anything up.
Short & Generic Version:
How do I make Django generate a table-like form where some info in the table is from the database, and the user fills in the rest? On form submission, each row in the table should become a record in the database (after it's validated, of course).
What's the cleanest way to do this? What's this mode of interaction cannonically called?
Example Form
|=============================================================|
| QueriedValue | CalculatedValue | User_data | More_User_data |
|_____________________________________________________________|
| Foo 1 | Bar 1 | | |
| Foo 2 | Bar 2 | | |
| Foo 3 | Bar 3 | | |
... ... ... ... |
| Foo n | Bar n | | |
|=============================================================|
++++++++++
| Submit |
++++++++++
Resulting Database Records
TimeStamp + fk_Foo = natural primary key for this table
________________
/ \
|===========================================================|
| TimeStamp | fk_Foo | User_data | More_User_data |
|___________________________________________________________|
| submit_time | Foo 1 | Datum 1 | AnotherDatum 1 |
| submit_time | Foo 2 | Datum 2 | AnotherDatum 2 |
| submit_time | Foo 3 | Datum 3 | AnotherDatum 3 |
|... ... ... ... |
| submit_time | Foo n | Datum n | AnotherDatum n |
|===========================================================|
Long Version
I'm writing a web app to track gas cylinder usage at my company. We have a bunch of gas plumbing in our building, and we need to know which gas cylinder was hooked up to which gas line at what time.
I'd like two forms for the technicians to fill out:
Daily Inventory: Every morning, the stockroom guy needs to look at each gas line and record the line's pressure, and the reference number of the bottle. This generates bunch of 4-tuple records (time, line, bottle, psi); one for each line, every morning.
As-Needed Bottle Change: After doing the daily inventory, if a bottle is almost out it needs to be changed, and that change needs to be logged. This should add another entry to the table of bottles for the new bottle, and another 4-tuple with the new (time, line, bottle, psi) info for the new connection. This happens to a random line a few times a week, but not every day.
So to keep track of this I'm writing a Django application. I've got the following models:
# models.py
class GasFarm(models.Model):
"""
Represents a gas farm -- a collection of lines that are grouped together and managed as a unit.
"""
name = models.CharField(max_length=30, unique=True)
def __unicode__(self):
return self.name
class Bottle(models.Model):
"""
Represents a gas bottle -- the physical cylinder -- that contains a mixture of gases.
"""
# Options
get_latest_by = 'date_added'
# Fields
BACKGROUND_TYPES = (
('h2/n2', "H2/N2"),
('h2/air', "H2/Air"),
('h2', "H2"),
('n2', "N2"),
('other', "Other"),
)
ppm = models.FloatField()
mix = models.CharField(max_length=50, choices=BACKGROUND_TYPES, default='n2')
ref = models.CharField(max_length=50, unique=True) # Every bottle has a unique ref or somebody fucked up.
cert_date = models.DateTimeField()
date_added = models.DateTimeField(default=timezone.now())
def pct(self):
return float(self.ppm)/10**4
def __unicode__(self):
return "{} ({}% {})".format(self.ref, self.pct(), self.mix,)
class Line(models.Model):
"""
Represents a gas line -- the physical plumbing -- that delivers gas from the bottles to the test stations.
It is assumed that a gas line can have zero or one gas bottles attached to it at any given time. The Line model
maps bottle objects and time-sensitive Reading objects to test stations.
"""
# Fields
gasfarm = models.ForeignKey(GasFarm)
number = models.CharField(max_length=10, unique=True)
bottles = models.ManyToManyField(Bottle, through='Reading')
# Calculated fields. "current" is definitely not optional -- that's a super common query. The others? I'm not so
# sure...
def current(self):
"""
Returns the most recently recorded Reading object associated with the line
"""
return self.reading_set.latest(field_name='time')
current.short_description = "latest reading"
def last_checked(self):
"""
Returns the date & time at which the most recent Reading object associated with this line was logged
"""
return self.current().time
last_checked.short_description = "last updated"
def has_recent_reading(self):
"""
Boolean flag for whether the reading is probably valid, or if someone needs to go out and take a new one.
"""
latest_reading = self.current().time
return timezone.now() - latest_reading < datetime.timedelta(days=3)
has_recent_reading.boolean = True
has_recent_reading.short_description = "Is data current?"
def __unicode__(self):
return self.number
class Reading(models.Model):
"""
A Reading links a Bottle to a Line at a given time, and provides a snapshot of the pressure at that time.
"""
# Options
get_latest_by = 'time'
# Fields
line = models.ForeignKey(Line)
bottle = models.ForeignKey(Bottle)
time = models.DateTimeField()
psi = models.IntegerField(validators=[MaxValueValidator(2500)])
def ref(self):
"""
The reference number of the bottle listed in the reading
"""
return self.bottle.ref
def ppm(self):
"""
The PPM concentration of the bottle listed in the reading
"""
return self.bottle.ppm
def pct(self):
"""
The % concentration of the bottle listed in the reading
"""
return self.bottle.pct()
def mix(self):
"""
The gas mix (e.g. H2/N2) of the associated bottle
"""
return self.bottle.mix
def __unicode__(self):
# Example:
# A0: 327.3 PPM H2/N2 2300 psi
return "{}, {}: {} PPM {} {} psi".format(self.line, self.time, self.ppm(), self.mix(), self.psi)
I've populated the database with our back-log of data using some scripts, and I've written a few views to pull data out of the databse; I'm happy with them so far, and the results look very promising -- at least for displaying stored data.
But I'm not sure how to cleanly populate the database using HTML forms. I'd like the forms to be basically two separate "worksheets" -- like the kind the DMV gives you, with nice clear instructions #justkidding.
Form 1: Daily Inventory
The form would list all the lines in a given farm, display what bottle should be on each line (based on previous readings/updates), and then prompt the user to enter a value. This would require that the technician update the pressure of every bottle on every line each time they submit the form -- we want a global snapshot of the whole gas system. In a perfect world, the form would pre-fill the current time and each line's most recent pressure reading into the Reading Time and Pressure fields to ease data entry.
# Cells with brackets [] are system-supplied, non-editable data displayed in the table.
# Cells without brackets are pre-filled with sensible defaults, but are user editable.
| [Line] | [Current Bottle] | Reading Time | Pressure (psi) |
===============================================================
| [A0] | [15-1478334] | 2014-7-14 9:34 | 2400 |
| [A1] | [15-1458661] | 2014-7-14 9:34 | 500 |
| [A2] | [15-4851148] | 2014-7-14 9:34 | 1850 |
| [A3] | [15-1365195] | 2014-7-14 9:34 | 700 |
...
...
| [C18] | [15-9555813] | 2014-7-14 9:34 | 2350 |
|=====================================================================|
After reading through the Django docs on Forms, ModelForms, and Formsets, I've written some code that does almost everything I want -- but the Line and Bottle information are editable form fields, and I need them to be static guideposts for filling in the rest of the form. They do need to be present in the generated database records, though.
I am dimly aware of the readonly and disabled attributes, and of what appear to be kludgy solutions to clean data from the POST variable in the response when you want to have read-only stuff in forms, but I'm still not clear on how those work or why they're necessary. I'm wondering if there's a cleaner way to get what I"m after? Perhaps forms with programmatically generated headings, or annotations? That's all I really want: an auto-generated guide to filling out the form.
# Forms.py
class PressureReadingUpdate(forms.ModelForm):
class Meta:
model = models.Reading
PsiReadingFormset = formset_factory(PressureReadingUpdate, extra=0)
# views.py
def update_pressure(request):
if request.method == 'POST':
formset = forms.PsiReadingFormset(request.POST)
if formset.is_valid():
cd = formset.cleaned_data
# do something? I'm not here yet...
else:
lines = models.Line.objects.all()
now = datetime.datetime.now()
initial = [{'line': l,
'psi': l.current().psi,
"bottle": l.current().bottle,
'time': now} for l in lines]
formset = forms.PsiReadingFormset(initial=initial,)
return render(request, 'gas_tracking/gasfarm_form_psi_reading.html', {'formset': formset})
Form 2: Change a Gas Bottle
I'd like a list of all the gas lines, with the current bottle & pressure (easy-- this is done elsewhere), and then a button that makes a pop-up window where you can submit a new bottle, much like you find in the admin interface. How do I make pop-up windows? How do I make buttons? I'm not even sure where to start with this one yet
I'm still very new to Django, and I've searched high and low, but haven't found anything that answers my question -- maybe I'm just not using the right keywords?
Thanks for your help.
-Matt
Dynamically generating Forms using FormSets
So I figured this out (after much googling and swearing and gnashing of teeth). Malcolm Tredinnick made a blog post about exactly what I wanted to do, which a kind soul preserved on Django Snippets
Using Malcom's code as a model, I solved my problem and it works like a charm
class PressureReadingUpdate(forms.Form):
"""
Form that asks for the pressure of a line given some attributes of that line.
"""
psi = forms.IntegerField(widget=forms.NumberInput)
def __init__(self, *args, **kwargs):
self.line = kwargs.pop('line')
kwargs['auto_id'] = "%s".format(self.line.number)
super(PressureReadingUpdate, self).__init__(*args, **kwargs)
class BasePsiReadingFormset(BaseFormSet):
"""
Formset that constructs a group of PressureReadingUpdate forms by taking a queryset
of Line objects and passing each one in turn to a PressureReadingUpdate form as it
gets constructed
"""
def __init__(self, *args, **kwargs):
self.lines = kwargs.pop('lines')
super(BasePsiReadingFormset, self).__init__(*args, **kwargs)
self.extra = len(self.lines)
self.max_num = len(self.lines)
def _construct_form(self, i, **kwargs):
kwargs['line'] = self.lines[i]
form = super(BasePsiReadingFormset, self)._construct_form(i, **kwargs)
return form
PsiReadingFormset = formset_factory(form=PressureReadingUpdate, formset=BasePsiReadingFormset)
This gives you a Formset with an extra kwarg you can pass down the chain of constructors. You can use it in a view (along with a more typical initial= kwarg) with:
formset = PsiReadingFormset(lines=lines,
initial=[{'psi': l.current().psi} for l in lines],
So here's the best explanation I can think of:
Any kwargs passed to a FormSet like (which gets made by the formset_factory function using a non-default 'BaseFormSet' as a blueprint) get passed along -- mostly unaltered -- to the __init__ method of whatever BaseFormSet the FormSet is based on.
This means you can define custom behavior in BaseFormSet.__init__, and you can relay runtime data to the BaseFormSet.__init__ method by passing it as a keyword argument to FormSet (that's the lines= kwarg in my example above). I use it to set an attribute on on formset (an instance of a FormSet based on 'BasePsiReadingFormset').
Confused yet? I was too at first.
But the real magic is understanding how _construct_forms works: A FormSet calls this function each time it wants to make a new Form in the set. It will relay any unrecognized kwargs to to the constructor of the Form it is supposed to manage a set of.
So you just need to overload your custom BaseFormSet's ._construct_forms to wrap the original _construct_forms of the superclass and inject a new kwarg. This kwarg will then be passed through to the constructor of your custom Form class, and shape the new Form instance according to the Form's initialization function.
And that, ladies and gentlemen, is how you can have a FormSet that has a bunch of similarly-defined but dynamically-generated-and-slightly different forms in it.
After understanding this, I can see the elegance of it. However, it uses some intermediate-to-advanced python, and is neither immediately obvious nor well documented. If you are struggling with this like I did, feel free to message me.

Django aggregation across multiple tables in ModelAdmin queryset

Django Code & Reference to Django Bug Report
Given three models as follows (simplified excessively for demonstration...not actually identical related models)
class derp(models.Model):
...
class derp_related_1(models.Model):
fk = models.ForeignKey(derp)
amount = models.DecimalField(max_digits=15, decimal_places=2)
class derp_related_2(models.Model):
fk = models.ForeignKey(derp)
amount = models.DecimalField(max_digits=15, decimal_places=2)
And overriding a queryset in the model admin as follows. (It isn't working because of this django bug.)
class DerpAdmin(admin.ModelAdmin):
...
list_display = ['derp_r1_sum', 'derp_r2_sum']
...
def queryset(self, request):
qs = super(DerpAdmin, self).queryset(request)
qs = qs.annotate(derp_r1_sum=models.Sum('derp_r1__amount', distinct=True))
qs = qs.annotate(derp_r2_sum=models.Sum('derp_r2__amount', distinct=True))
def derp_r1_sum(self, obj):
return u'%s' % obj.derp_r1_sum
def derp_r2_sum(self, obj):
return u'%s' % obj.derp_r2_sum
Example of Unexpected Database Result
Running annotations individually would render something like (with grouping & sums removed)
+---------+--------+
| derp.id | r1_sum |
+---------+--------+
| 2 | 500.00 |
| 2 | 100.00 |
+---------+--------+
r1_sum would be 600.00
and
+---------+--------+
| derp.id | r1_sum |
+---------+--------+
| 2 | 100.00 |
| 2 | 250.00 |
+---------+--------+
r2_sum would be 350.00
If you take qs.query with both annotations included and remove the sums and the grouping it is obvious what the problem is. In this case we're counting everything twice. Get more relations and we have an increasingly ugly increase in both sum columns.
+---------+--------+--------+
| derp.id | r1_sum | r2_sum |
+---------+--------+--------+
| 2 | 500.00 | 100.00 |
| 2 | 500.00 | 250.00 |
| 2 | 100.00 | 100.00 |
| 2 | 100.00 | 250.00 |
+---------+--------+--------+
r1_sum would incorrectly be 1200.00
r2_sum would incorrectly be 700.00
Question, is there a route other than custom SQL?
I can write the query myself easy enough, but if anyone has a suggestion which would avoid the writing of custom SQL that would be awesome.
Thanks for the help.
Edit: Here is a link to the annotations section of the Django documentation. One commenter mentioned the distinct option. This does not work, and I believe it is what is warned about at the bottom of the annotation section in the django documentation on annotation.
Edit2: The raw SQL idea likely is more difficult than I thought as derp.objects.raw('sql here') does not return the queryset object necessary for the admin to use it. Is there a way to use two queries (the real queryset plus a custom one doing the sums) and populate the listview from both? One suggestion I found (which I cannot find again now :S) suggested creating a view that maps to a Model definition which is then set to unmanaged by django (for syncdb). I could then write my custom code, and reference it for inclusion in the original query. This sounds messy. Thoughts?
If you want to stay within Django's queryset, I would consider creating a model superclass that shares the related and common fields and sub-classing for further distinctions. Otherwise, you need to either write custom SQL or get out of the database ORM entirely and manipulate your data in python with Queryset.values or Queryset.values_list
The best way I found to return the correct results was by using queryset.extra().
derp_r1_sum_select = """
select sum(`derp_related_1`.`amount`)
from `derp_related_1`
where `derp_related_1`.`fk` = `derp`.`pk`
"""
derp_r2_sum_select = """
select sum(`derp_related_2`.`amount`)
from `derp_related_2`
where `derp_related_2`.`fk` = `derp`.`pk`"
"""
def queryset(self, request):
qs = super(DerpAdmin, self).queryset(request)
qs = qs.extra(select={'derp_r1_sum': derp_r1_sum_select,
'derp_r2_sum': derp_r2_sum_select})
return qs

How to create two mutually dependent objects in SQLAlchemy?

I have two Python classes Note and Link mapping to PostgresQL tables. Note has a foreign-key reference to Link, while Link points back to the node through a piece of JSON text. Links point to other things besides Notes but that doesn't matter here.
Note
+------+------------------+---------+
| ID | NAME | NOTE_ID |
+------+------------------+---------+
| 1 | Alice | 5 |
| 2 | Bob | 20 |
| 3 | Carla | 6 |
+------+------------------+---------+
Link
+------+--------------+
| ID | CONTENT |
+------+--------------+
| ... | ... |
| 5 | {"t":1} |
| 6 | {"t":3} |
| ... | ... |
| 20 | {"t":2} |
+------+--------------+
Now what I would like is that whenever I create a new Note
note = Note('Danielle')
it would automatically enter the row
(4, 'Danielle', 21)
into Note, AND enter
(21, '{"t":4}')
into Link. Here's what I have tried so far: I create the Note object and THEN try to create the Link in the #events.after_insert event:
class Note(Entity):
name = Field(Unicode)
link = ManyToOne('Link')
. . .
#events.after_insert
def create_link(self):
"""
Create and persist the short link for this note. Must be done
in this after_insert event because the link table has a foreign
key that points back to the note. We need the note to be
already inserted so we can use its id.
"""
self.link = Link.build_link_for_note(self)
elixir.session.flush()
print("NOTE %s GOT LINK %s" % (self, self.link))
In the Link class I have
class Link(Entity):
. . .
#classmethod
def build_link_for_note(cls, note):
return Link(content='{"id": %d}' % note.id)
Both tables have autoincremented primary keys, so no worries there. The error that I get with this code is:
File ".../sqlalchemy/orm/session.py", line 1469, in flush
raise sa_exc.InvalidRequestError("Session is already flushing")
InvalidRequestError: Session is already flushing
I'll buy that. The #after_insert event gets called (I think) after the Note got stored to the database, which happened during the current session flush. And of course if I remove the elixir.session.flush() call, then of course it prints
NOTE <Note id:4 name:Danielle> GOT LINK <id:None content:{"t": 4}>
which again makes sense since I haven't been able to persist the link!
So my question is, how can I, create both a Note and a Link in a single request, so that the mutually dependent ids are available and properly recorded?
P.S. I understand that the schema here is a little unusal, and that I can solve this issue by either (1) spawning a task to create the Link asynchronously or (2) making the Link.content method create the link lazily. These solutions require some concurrency attention, so I am really hoping that a simple, direct SQLAlchemy solution with one session can work.
I'd advise against using Elixir's methods such as "save()" which mis-uses SQLAlchemy's API. Here is the aforementioned approach using standard SQLAlchemy events. Everything is achieved in one flush as well.
from sqlalchemy import *
from sqlalchemy.orm import *
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import event
import json
Base = declarative_base()
class Note(Base):
__tablename__ = "note"
id = Column(Integer, primary_key=True)
name = Column(String)
note_id = Column(Integer, ForeignKey('link.id'))
link = relationship("Link")
# if using __init__ here is distasteful to you,
# feel free to use the "init" event illustrated
# below instead
def __init__(self, name):
self.name = name
self.link = Link()
class Link(Base):
__tablename__ = "link"
id = Column(Integer, primary_key=True)
content = Column(String)
# using an event instead of Note.__init__
##event.listens_for(Note, "init")
#def init(target, args, kwargs):
# target.link = Link()
#event.listens_for(Note, "after_insert")
def after_insert(mapper, connection, target):
connection.execute(
Link.__table__.update().\
values(content=json.dumps({"t": target.id}))
)
e = create_engine("sqlite://", echo=True)
Base.metadata.create_all(e)
s = Session(e)
note = Note('Danielle')
s.add(note)
s.commit()
note = s.query(Note).first()
assert s.query(Link.content).scalar() == ('{"t": %d}' % note.id)
Since both objects have autogenerated IDs that come from the DB and want to store each other's IDs, you need to save both objects first, then save one of the objects once more, with the updated ID of other object.
So I'd go with removing the flush call and maybe calling save explicitly for each of the objects involved.

Django - Complex query

Assuming I have two models:
class Profile(models.Model):
#some fields here
class Ratings(models.Model):
profile = models.ForeignKey(profile)
category = models.IntegerField()
points = models.IntegerField()
Assuming following examle of MySQL table "ratings":
profile | category | points
1 1 10
1 1 4
1 2 10
1 3 0
1 4 10
1 4 10
1 4 10
1 5 0
I have following values in my POST data and also other fields values:
category_1_avg_val = 7
category_2_avg_val = 5
category_3_avg_val = 5
category_4_avg_val = 7
category_5_avg_val = 9
I want to filter profiles that have the average ratings calculated for categories higher or equal to required values.
Some filters are applied initially as:
q1 = [('associated_with', search_for),
('profile_type__slug__exact', profile_type),
('gender__in', gender),
('rank__in', rank),
('styles__style__in', styles),
('age__gte', age_from),
('age__lte', age_to)]
q1_list = [Q(x) for x in q1 if x[1]]
q2 = [('user__first_name__icontains', search_term),
('user__last_name__icontains', search_term),
('profile_type__name__icontains', search_term),
('styles__style__icontains', search_term),
('rank__icontains', search_term)]
q2_list = [Q(x) for x in q2 if x[1]]
if q1_list:
objects = Profile.objects.filter(
reduce(operator.and_, q1_list))
if q2_list:
if objects:
objects = objects.filter(
reduce(operator.or_, q2_list))
else:
objects = Profile.objects.filter(
reduce(operator.or_, q2_list))
if order_by_ranking_level == 'desc':
objects = objects.order_by('-ranking_level').distinct()
else:
objects = objects.order_by('ranking_level').distinct()
Now i want to filter profiles whose (average of points) (group by category) >= (avg values of category coming in post)
I tried to do this one by one as
objects = objects.filter(
ratings__category=1) \
.annotate(avg_points=Avg('ratings__points'))\
.filter(avg_points__gte=category_1_avg_val)
objects = objects.filter(
ratings__category=2) \
.annotate(avg_points=Avg('ratings__points'))\
.filter(avg_points__gte=category_2_avg_val)
But this is wrong I think. Please help me out. If return is a queryset that would be great.
Edited
Using the answer posted by hynekcer I came up with slightly different solution as I have already queryset of profiles which needs to be filtered more based on rating.
def check_ratings_avg(pr, rtd):
ok = True
qr = Ratings.objects.filter(profile__id=pr.id) \
.values('category')\
.annotate(points_avg=Avg('points'))
qr = {i['category']:i['points_avg'] for i in qr}
for cat in rtd:
val = rtd[cat]
if qr[cat] >= val:
pass
else:
ok = False
break
return ok
rtd = {1: category_1_avg_val, 2: category_2_avg_val, 3: category_3_avg_val,
4: category_4_avg_val, 5: category_5_avg_val}
objects = [i for i in objects if check_ratings_avg(i, rtd)]
Your complex query require a subquery in the principle. Possible solutions are:
A subquery written by 'extra' queryset method or raw SQL query. It is not DRY and it was unsupported by some db backends, e.g. by some versions of MySQL, however subqueries are by some limited way used since Django 1.1.
Saving intermediate results into a temporary table in the database. It is not nice in Django.
Emulation of the outer query by loop in Python. The best universal solution. A loop in Python over database data aggregated by the first query can aggregate and filter the data fast enough.
A) Subquery emulated by Python
from django.db.models import Q, Avg
from itertools import groupby
from myapp.models import Profile, Ratings
def iterator_filtered_by_average(dictionary):
qr = Ratings.objects.values('profile', 'category', 'points').order_by(
'profile', 'category').annotate(points_avg=Avg('points'))
f = Q()
for k, v in dictionary.iteritems():
f |= Q(category=k, points_avg__gte=v)
for profile, grp in groupby(qr.filter(f).values('profile')):
if len(list(grp)) == len(dictionary):
yield profile
#example
FILTER_DATA = {1:category_1_avg_val, 2:category_2_avg_val, 3:category_3_avg_val,
4:category_4_avg_val, 5:category_5_avg_val}
for row in iterator_filtered_by_average(FILTER_DATA):
print row
This is a simple solution for the original question without later additional requirements.
B) Solution with subqueries:
It is necessary for the more detailed version of question because if the initial filters are based on some field of type ManyToManyField and also because it contains a distinct clause:
# objects: QuerySet that you get from your initial filters. Not yet executed.
if rtd:
# Method `as_nested_sql` removes the `order_by` clase, unlike `as_sql`
subquery3 = objects.values('id').query \
.get_compiler(connection=connection).as_nested_sql()
subquery2 = ("""SELECT profile_id, category, avg(points) AS points_avg
FROM myapp_ratings
WHERE profile_id in
( %s
) GROUP BY profile_id, category
""" % subquery3[0], subquery3[1]
)
where_sql = ' OR '.join(
'category = %d AND points_avg >= %%s' % cat for cat in rtd.keys()
)
subquery = (
"""SELECT profile_id
FROM
( %s
) subquery2
WHERE %s
GROUP BY profile_id
HAVING count(*) = %s
""" % (subquery2[0], where_sql, len(rtd)),
subquery2[1] + tuple(rtd.values())
)
assert order_by_ranking_level in ('asc', 'desc')
mainquery = ("""SELECT myapp_profile.* FROM myapp_profile
INNER JOIN
( %s
) subquery ON subquery.profile_id=myapp_profile.id
ORDER BY ranking_level %s"""
% (subquery[0], order_by_ranking_level), subquery[1]
)
objects = Profile.objects.raw(mainquery[0], params=mainquery[1])
return objects
Replace please all strings myapp by name_of_your_application.
Example of SQL generated by this code
SELECT myapp_profile.* FROM myapp_profile
INNER JOIN
( SELECT profile_id
FROM
( SELECT profile_id, category, avg(points) AS points_avg
FROM myapp_ratings
WHERE profile_id IN
( SELECT U0.`id` FROM `myapp_profile` U0 WHERE U0.`ranking_level` >= 4
) GROUP BY profile_id, category
) subquery2
WHERE category = 1 AND points_avg >= 7 OR category = 2 AND points_avg >= 5
OR category = 3 AND points_avg >= 5 OR category = 4 AND points_avg >= 7
OR category = 5 AND points_avg >= 9
GROUP BY profile_id
HAVING count(*) = 5
) subquery ON subquery.profile_id=myapp_profile.id
ORDER BY ranking_level asc
(This SQL is for better readability parsed manually with strings %s replaced by parameters, however the database engine receive parameters unparsed for security reasons.)
Your problem is due to little support of subqueries generated by Django. Only examples from documentation of more complicated queries create a subquery. (e.g. aggregate after annotate or count after annotate or aggregate after distinct, but no annotate after distinct or after annotate) Complicated nested aggregations are simplified to one query which is unexpected.
All other solutions that execute a new individual SQL query for every object filtered by the first query are discouraged for production although they can be very useful for testing results of any better solution.
You could add methods to a manager
# Untested code
class ProfileManager(models.Manager):
def with_category_average(self, cat, avg):
# Give each filter a unique annotation key
key = 'avg_pts_' + str(cat)
return self.filter(ratings__category=cat) \
.annotate(**{key: Avg('ratings__points')}) \
.filter(**{key + '__gte': avg})
# Expects a dict of `cat: avg` pairs
def filter_by_averages(self, avg_dict):
qs = self.get_query_set()
for key, val in avg_dict.items():
qs &= self.with_category_average(key, val)
return qs

Categories