DRF dynamic filtering - python

I need dynamic filtering in DRF that should allow using parenthesis for defining operations precedence and use any combination of the available fields in model.
Operations are: and, or, eq (equal), ne (not equal), gt (greater than), lt (less than)
example: "(date eq '2016-05-01') AND ((number_of_calories gt 20) OR (number_of_calories lt 10))"
How can I achieve this? what is best way?
Currently I have below solution but it's not good approach as it's vulnerable to SQL Injection:
utils.py
mappings = {
' eq ': ' = ',
' ne ': ' != ',
' gt ': ' > ',
' lt ': ' < ',
' gte ': ' >= ',
' lte ': ' <= ',
}
def convert_string(query: str) -> Optional[str]:
if query and isinstance(query, str):
pattern_drop = re.compile(r"drop\s+table\s*\w*")
pattern_alter = re.compile(r"alter\s+table\s+\w+")
pattern_delete = re.compile(r"delete\s+from\s+\w+")
pattern_update = re.compile(r"update\s+\w+\s+set\s+\w+")
pattern_insert = re.compile(r"insert\s+into\s+\w+")
pattern_select = re.compile(r"select\s+\w+\s+from\s+")
query_lower = query.lower()
if '--' in query_lower or '/*' in query_lower or \
pattern_drop.match(query_lower) or pattern_alter.match(query_lower) or \
pattern_update.match(query_lower) or pattern_insert.match(query_lower) or \
pattern_delete.match(query_lower) or pattern_select.match(query_lower):
return None
for expression, operation in mappings.items():
query = query.replace(expression, operation)
return query
views.py
def get_queryset(self):
q_string = self.request.data['query']
# q_string = "(date eq '2016-05-01') AND ((number_of_calories gt 20) OR (number_of_calories lt 10))"
query = convert_string(q_string)
# just replace 'eq' with '=', 'ne' with '!=', and so on ...
# query = "(date = '2016-05-01') AND ((number_of_calories > 20) OR (number_of_calories < 10))"
users = Users.objects.raw('SELECT * FROM Users WHERE ' + query)
return users

For parsing a query string like:
string = "((num_of_pages gt 20) OR (num_of_pages lt 10)) AND (date gt '2016-05-01')"
you can use the pyparsing package (not an expert but very powerful library) with django Q objects:
parsing code:
import pyparsing as pp
import operator as op
from django.db.models import Q
word = pp.Word(pp.alphas, pp.alphanums + "_-*'")
operator = pp.oneOf('lt gt eq').setResultsName('operator')
number = pp.pyparsing_common.number()
quoted = pp.quotedString().setParseAction(pp.removeQuotes)
term = (word | number | quoted)
key = term.setResultsName('key')
value = term.setResultsName('value')
group = pp.Group(key + operator + value)
def q_item(item):
"""Helper for create django Q() object"""
k = f'{item.key}__{item.operator}'
v = item.value
return Q(**{k: v})
class BaseBinary:
def __init__(self, tokens):
self.args = tokens[0][0::2]
def __repr__(self):
return f'{self.__class__.__name__}({self.symbol}):{self.args}'
def evaluate(self):
a = q_item(self.args[0]) if not isinstance(self.args[0], BaseBinary) else self.args[0].evaluate()
b = q_item(self.args[1]) if not isinstance(self.args[1], BaseBinary) else self.args[1].evaluate()
return self.op(a, b)
class BoolNotOp(BaseBinary):
symbol = 'NOT'
op = op.not_
def __init__(self, tokens):
super().__init__(tokens)
self.args = tokens[0][1]
def evaluate(self):
a = q_item(self.args) if not isinstance(self.args, BaseBinary) else self.args.evaluate()
return ~a
class BoolAndOp(BaseBinary):
symbol = 'AND'
op = op.and_
class BoolOrOp(BaseBinary):
symbol = 'OR'
op = op.or_
expr = pp.infixNotation(group,
[('NOT', 1, pp.opAssoc.RIGHT, BoolNotOp),
('AND', 2, pp.opAssoc.LEFT, BoolAndOp),
('OR', 2, pp.opAssoc.LEFT, BoolOrOp)])
Now given a string like:
string = "(date gt '2016-05-01') AND ((num_of_pages gt 20) OR (num_of_pages lt 10))"
to the parser:
parser = expr.parseString(string)[0]
print(parser.evaluate())
give us our Q objects:
(AND: ('date__gt', '2016-05-01'), (OR: ('num_of_pages__gt', 20), ('num_of_pages__lt', 10)))
ready to be filtered
class Book(models.Model):
title = models.CharField(max_length=200)
counter = models.PositiveIntegerField(default=0)
date = models.DateField(auto_now=True)
num_of_pages = models.PositiveIntegerField(default=0)
qs = Book.objects.filter(parser.evaluate())
print(qs.query)
SELECT "core_book"."id", "core_book"."title", "core_book"."counter", "core_book"."date", "core_book"."num_of_pages" FROM "core_book" WHERE ("core_book"."date" > 2016-05-01 AND ("core_book"."num_of_pages" > 20 OR "core_book"."num_of_pages" < 10))
P.S not fully tested.

I currently use the Q object extensively in a project I have that is using the users get parameters to filter a search result.
Here is a snippet
some_initial_query_object = Model.objects.all()
qs_result_dates = []
qs_result_dates.append(
Q(
event_date__start_date_time__gte='2021-08-01',
event_date__start_date_time__lt='2021-09-01' + datetime.timedelta(days=1)
)
)
some_initial_query_object = some_initial_query_object.filter(qs_result_dates)
In your scenario you can use | for OR and & for AND
Q(date='2016-05-01')
&
Q(number_of_calories__gt=20, number_of_calories__lt=10)

Here is an example of dynamic filtering using DRF by overriding the get_queryset method on the ModelViewSet that I use in all of my projects. Using this method I can leverage the full power of Django-ORM framework from the frontend.
views.py
def BaseAPIView(...):
''' base view for other views to inherit '''
def get_queryset(self):
queryset = self.queryset
# get filter request from client:
filter_string = self.request.query_params.get('filter')
# apply filters if they are passed in:
if filters:
filter_dictionary = json.loads(filter_string)
queryset = queryset.filter(**filter_dictionary)
return queryset
The request url will now look like, for example: my_website.com/api/users?filter={"first_name":"John"}
Which can be built like:
script.js
// using ajax as an example:
var filter = JSON.stringify({
"first_name" : "John"
});
$.ajax({
"url" : "my_website.com/api/users?filter=" + filter,
"type" : "GET",
...
});
Some advantages:
no need to specify which fields can be filtered on each view class
write it once, use it everywhere
front end filtering looks exactly like django filtering
can do the same with exclude
Some disadvantages:
potential security risks if you want some fields to be non-filterable
less intuitive front-end code to query a table
Overall, this approach has been far more useful for me than any packages out there.

Related

pyparsing: how to parse nested function which start with particular function name?

I want to use pyparsing to parse a nested function which start with particular function name.
Just like this:
tag("tag_name_1", value_equal("proxy.province", "value", "return_value", test(1,2)))
The string waited to be parsed starts with the function named 'tag'.
The problem is that why exprStack doesn't contain "tag" function?
import pyparsing as pp
from typing import Any, List, Dict
def debug(*args, **kwargs):
print("debug"+"---"*10)
print(*args, **kwargs)
print("debug"+"---"*10)
# return "debug"
return "debug"
def insert_fn_argcount_tuple(t):
fn = t.pop(0)
num_args = len(t)
print((fn, num_args))
t.insert(0, (fn, num_args))
def push_first(toks):
exprStack.append(toks[0])
def to_string(toks):
pass
LPAREN, RPAREN, COMMA = map(pp.Suppress, '(),')
ident = pp.Word(pp.alphas, pp.alphanums+"_")
integer = pp.Word(pp.nums)
string = (pp.QuotedString("'") | pp.QuotedString('"')).setParseAction()
expr = pp.Forward()
expr_list = pp.delimitedList(pp.Group(expr))
tag_fn = ("tag" + LPAREN + expr_list + RPAREN).setParseAction(insert_fn_argcount_tuple)
fn_call = (ident + LPAREN + expr_list + RPAREN).setParseAction(insert_fn_argcount_tuple)
atom = ( (fn_call | string | integer) | pp.Group(LPAREN+expr+RPAREN)).addParseAction(push_first)
# atom = ( fn_call | pp.Group(LPAREN+expr+RPAREN)).addParseAction(push_first)
expr <<= atom
bnf = pp.Forward()
bnf <<= tag_fn
funcs = """tag
value_equal
value_contain
value_match
value
"""
# functions
def tag(tag_name: str, value:Any)->Dict:
if not tag_name or not value:
return {}
return {"tag_name": tag_name, "tag_value": value}
def test(*args, **kwargs):
return ""
def value_equal(key: str, value, default=None, test=None):
print(f"---{value_equal}---")
print(f"key: {key}, value: {value}, defaul: {default}, test:{test}")
return "value-1"
fns = {
"tag": tag,
"value_equal": value_equal,
"test": test
}
exprStack = []
def evaluate_stack(s:List): # List param will be changed after invoke evaluate_stack function
fn, arg_nums = s.pop(), 0
if isinstance(fn, tuple):
fn, arg_nums = fn
if fn in fns:
args = reversed([evaluate_stack(s) for _ in range(arg_nums)])
return fns[fn](*args)
else:
return fn
test_str = """tag("tag_name_1", value_equal("proxy.province", "value", "return_value", test(1,2)))"""
# test_str = "123"
p = bnf.parse_string(test_str)
print(f"\nexprStack:{exprStack}\n")
t = evaluate_stack(exprStack)
print(f"tag:{t}")
The output of above code is:
('test', 2)
('value_equal', 4)
('tag', 2)
exprStack:['tag_name_1', 'proxy.province', 'value', 'return_value', '1', '2', ('test', 2), ('value_equal', 4)\]
I expect that exprStack contains tag function. maybe like this:
exprStack:['tag_name_1', 'proxy.province', 'value', 'return_value', '1', '2', ('test', 2), ('value_equal', 4), ('tag', 2)\]
You are really pretty close. The thing is, the push_first parse action is attached to atoms, but tag_fn is not an atom. So it won't get its data pushed to expr_stack.
To fix this:
Change atom to include tag_fn, something like this:
atom = ((tag_fn | fn_call | string | integer) | pp.Group(LPAREN+expr+RPAREN)).addParseAction(push_first)
Change bnf to expr instead of tag_fn:
bnf <<= expr
With these two changes, I get this for expr_stack:
exprStack:['tag_name_1', 'proxy.province', 'value', 'return_value', '1', '2', ('test', 2), ('value_equal', 4), ('tag', 2)]

How to reduce called paramets in methods?

I have a class and in that class I have a method that calls multiple methods in it.
But the problem I am facing now is that when the method with the multiple methods in it duplicate parameter has.
And so when I am calling the method with the multiple methods in it, it returns a empty list:[].
So this is the method with the multiple methods in it:
def show_extracted_data_from_file(self, file_name):
self.extractingText.extract_text_from_image(file_name)
total_fruit = self.filter_verdi_total_number_fruit()
fruit_name = self.filter_verdi_fruit_name()
fruit_total_cost = self.filter_verdi_total_fruit_cost(file_name)
return "\n".join("{} \t {} \t {}".format(a, b, c) for a, b, c in zip(total_fruit, fruit_name, fruit_total_cost))
and this is the method: filter_verdi_total_fruit_cost:
def filter_verdi_total_fruit_cost(self, file_name):
locale.setlocale(locale.LC_ALL, locale='Dutch')
self.extractingText.extract_text_from_image(file_name)
return [
locale.atof(items[-1]) for items in (
token.split() for token in file_name.split('\n')
) if len(items) > 2 and items[1] in self.extractingText.list_fruit
]
this method returns the following data:
[123.2, 2772.0, 46.2, 577.5, 69.3, 3488.16, 137.5, 500.0, 1000.0, 2000.0, 1000.0, 381.25]
You see that I am calling two times file_name.
and so when I calling the method show_extracted_data_from_file in the views.py:
if uploadfile.image.path.endswith('.pdf'):
content = filter_text.show_extracted_data_from_file(uploadfile.image.path)
print(content)
it produces a empty list: []
Question: how can I reduce the parameter file_name so that it will return the correct results?
this are my two other methods that I am calling in the combined method:
def filter_verdi_total_number_fruit(self):
regex = r"(\d*(?:\.\d+)*)\s*\W+(?:" + '|'.join(re.escape(word)
for word in self.extractingText.list_fruit) + ')'
return re.findall(regex, self.extractingText.text_factuur_verdi[0])
def filter_verdi_fruit_name(self):
regex = r"(?:\d*(?:\.\d+)*)\s*\W+(" + '|'.join(re.escape(word)
for word in self.extractingText.list_fruit) + ')'
return re.findall(regex, self.extractingText.text_factuur_verdi[0])
So this is the other class:
class ExtractingTextFromFile:
def extract_text_from_image(self, filename):
self.text_factuur_verdi = []
pdf_file = wi(filename=filename, resolution=300)
all_images = pdf_file.convert('jpeg')
for image in all_images.sequence:
image = wi(image=image)
image = image.make_blob('jpeg')
image = Image.open(io.BytesIO(image))
text = pytesseract.image_to_string(image, lang='eng')
self.text_factuur_verdi.append(text)
return self.text_factuur_verdi
def __init__(self):
# class variables:
self.tex_factuur_verdi = []
self.list_fruit = ['Appels', 'Ananas', 'Peen Waspeen',
'Tomaten Cherry', 'Sinaasappels',
'Watermeloenen', 'Rettich', 'Peren', 'Peen',
'Mandarijnen', 'Meloenen', 'Grapefruit', 'Rettich']
#AndrewRyan has the right idea.
I presume calling extract_text_from_image just adds the attribute list_fruit
Two routes you can go, from what you are commenting you'll probably just go with #1.. but I gave #2 as another option in case you'd ever want to call filter_verdi_total_fruit_cost by itself.
Path 1, Just remove it.
Note: filter_verdi_total_fruit_cost is only called from show_extracted_data_from_file.
def show_extracted_data_from_file(self, file_name):
# extract text
# Note: stores data in `self.extractingText.list_fruit`
self.extractingText.extract_text_from_image(file_name)
total_fruit = self.filter_verdi_total_number_fruit()
fruit_name = self.filter_verdi_fruit_name()
fruit_total_cost = self.filter_verdi_total_fruit_cost()
return "\n".join("{} \t {} \t {}".format(a, b, c) for a, b, c in zip(total_fruit, fruit_name, fruit_total_cost))
def filter_verdi_total_fruit_cost(self):
# Note: `self.extractingText.list_fruit` should be already defined
locale.setlocale(locale.LC_ALL, locale='Dutch')
return [
locale.atof(items[-1]) for items in (
token.split() for token in file_name.split('\n')
) if len(items) > 2 and items[1] in self.extractingText.list_fruit
]
Path 2, Check if it's already extracted- if not, extract; if so, continue
Note: if you wanted to just call filter_verdi_total_fruit_cost
def show_extracted_data_from_file(self, file_name):
# extract text
# Note: stores data in `self.extractingText.list_fruit`
self.extractingText.extract_text_from_image(file_name)
total_fruit = self.filter_verdi_total_number_fruit()
fruit_name = self.filter_verdi_fruit_name()
fruit_total_cost = self.filter_verdi_total_fruit_cost(file_name)
return "\n".join("{} \t {} \t {}".format(a, b, c) for a, b, c in zip(total_fruit, fruit_name, fruit_total_cost))
def filter_verdi_total_fruit_cost(self, file_name):
locale.setlocale(locale.LC_ALL, locale='Dutch')
if not hasattr(self, 'list_fruit'):
# file hasn't been extracted yet.. extract it
# Note: stores data in `self.extractingText.list_fruit`
self.extractingText.extract_text_from_image(file_name)
return [
locale.atof(items[-1]) for items in (
token.split() for token in file_name.split('\n')
) if len(items) > 2 and items[1] in self.extractingText.list_fruit
]

Casting result of an F function to an int

I have a simple Django application were I try to aggregate multiple values into an annotation for easier processing on the client side.
Basically, I need to sum the values of multiple columns into one.
For this I'm trying to use annotate with F functions:
qs = TimeReport.objects \
.filter(year=year, term=term) \
.annotate(
created_by_first_name=F('created_by__first_name'),
created_by_last_name=F('created_by__last_name'),
total_hours = F('master_thesis_supervision_hours')
+ F('semester_project_supervision_hours')
+ F('other_job_hours')
+ F('MAN_hours')
+ F('exam_proctoring_and_grading_hours')
+ F('class_teaching_exam_hours')
+ F('class_teaching_practical_work_hours')
+ F('class_teaching_preparation_hours')
+ F('class_teaching_teaching_hours'),
) \
.all().values()
Suprisingly, when I inspect the content of the calculated field, it does not contain anything:
list(qs)[0]['total_hours']
None
Trying to cast the result of the F function does not either:
...
total_hours = int(F('master_thesis_supervision_hours'))
+int(F('semester_project_supervision_hours'))
+ ...
I also tried to update the models.py to add a property:
#property
def total_hours(self):
return self.master_thesis_supervision_hours + self.class_teaching_total_hours + self.semester_project_supervision_hours + self.other_job_hours + self.MAN_hours + self.exam_proctoring_and_grading_hours
and update the views.py accordingly:
qs = TimeReport.objects \
.filter(year=year, term=term) \
.annotate(
created_by_first_name=F('created_by__first_name'),
created_by_last_name=F('created_by__last_name'),
total_hours = F('total_hours'),
) \
.all().values()
But I get the following error:
django.core.exceptions.FieldError: Cannot resolve keyword 'total_hours' into field.
What would be the correct way to do this?

How do I improve my 'for' loop performance in terms of speed?

I have a for loop which it is taking almost 20 seconds for iterating 6907 rows. That loop does the job of making a list of unique region names in the given queryset.
I have placed timestamps at various places in the code to record the timings. The 'for' loop which is taking more time is between variables 't3' and 't4'.
timestamps
t = 12:27:22:169533
t2 = 12:27:22:173535
t3 = 12:27:22:793567
6907
t4 = 12:27:42:907362
t5 = 12:27:43:242596
t6 = 12:27:43:242596
6907 is the length of my queryset sales_data
views.py
class MSZoneProduct(generic.TemplateView):
template_name = 'sales/MSZoneProduct.html'
form_class = MSZoneProductForm
def get(self, request, *args, **kwargs):
if request.user.is_authenticated:
form = self.form_class(request.GET)
context = {'form': form}
if form.is_valid():
zone_code_ = form.cleaned_data['zone_name']
product_code_ = form.cleaned_data['product_name']
t = datetime.now().strftime('%H:%M:%S:%f')
print("t = " + t)
product = Product.objects.get(product_code=product_code_)
t2 = datetime.now().strftime('%H:%M:%S:%f')
print("t2 = " + t2)
sales_data = Sales.objects.filter(zone_code=zone_code_, product_code=product).select_related()
t3 = datetime.now().strftime('%H:%M:%S:%f')
print("t3 = " + t3)
print(len(sales_data))
regions = []
message = ""
regions_dict = {}
for x in sales_data:
if x.region_name not in regions:
regions.append(x.region_name)
else:
continue
t4 = datetime.now().strftime('%H:%M:%S:%f')
print("t4 = " + t4)
for x in regions:
sum_ = 0
for y in sales_data:
if y.region_name == x:
sum_ = sum_ + y.quantity
regions_dict[x] = sum_
t5 = datetime.now().strftime('%H:%M:%S:%f')
print("t5 = " + t5)
if len(regions) == 0:
message = "There is no data available for this product in this particular region."
context = {'form': form, 'message': message, 'data': regions_dict}
t6 = datetime.now().strftime('%H:%M:%S:%f')
print("t6 = " + t6)
return render(request, 'sales/MSZoneProduct.html', context)
return render(request, 'sales/MSZoneProduct.html', context)
else:
return redirect('/sales/')
Based on your view, you want to sum up the quantities per region. We can move all this logic into a database query. This will not only make it more efficient, but more elegant as well:
from django.db.models import Sum
sales_data = Sales.objects.filter(
zone_code=zone_code_, product_code=product
).values('region_name').annotate(
total_quantity=Sum('quantity')
).order_by('region_name')
This will result in a QuerySet that contains dictionaries with two elements: 'region_name' that maps to the name of the region, and total_quantity that sums up the quantity for that region.
Next we can convert it to a dictionary regions_dict with:
regions_dict = { r['region_name']: r['total_quantity'] for r in sales_data }
The entire code from sales_data = ... to t5 = ... can be much more efficiently done with dict comprehension and itertools.groupby:
from itertools import groupby
from operator import itemgetter
regions_dict = {k: sum(map(itemgetter('quantity'), g)) for k, g in groupby(Sales.objects.filter(zone_code=zone_code_, product_code=product).order_by('region_name').values('region_name', 'quantity'), itemgetter('region_name'))}
Use a set constructed with list comprehensions?
regions = set(x.region_name for x in sales_data)
The only problem is if you need the items in order. They could be sorted after the fact, your you could append them to a list depending on if they are successfully added to the set.
Edit: I agree that doing it in the database is the better approach (#Willem Van Onsem's answer), but this answer will be relevant for anyone who isn't using Django.
Improving the for
It is possible to directly store the sums at the first go in the dictionary with complexity O(N)
regions_dict_sums = {x.region_name:0 for x in sales_data}
for x in sales_data:
regions_dict_sums[x.region_name]+=x.quantity
So the total regions where sales are greater than zero will be hashed in the dictionary as keys and their values are the total sales for that region, accessible at constant time.

Order by a join alias in sqlalchemy

I have used manual join (Query.join not joinedload) in sqlalchemy for some reason. I have used alias since I have multiple join to a single table. Now I want to sort the result by one of relations' fields. How can I use Query.order_by with aliased name? When I do this, I get a ambiguous %(####) instead of field name in query.
if self.order_by:
entity = self.cls
for field, order in self.order_by:
if '.' in field:
m = re.match(r'(.+)\.(.+)', field)
if m.group(1) not in self.aliases:
for item in m.group(1).split('.'):
cls = inspect(entity)
attr = cls.attrs[item]
entity = get_type(attr)
if attr.innerjoin:
aliased_entity = aliased(entity)
self.aliases[m.group(1)] = aliased_entity
_query = _query.join(aliased_entity, item).options(contains_eager(item,
alias=aliased_entity))
else:
aliased_entity = aliased(entity)
self.aliases[m.group(1)] = aliased_entity
_query = _query.outerjoin(aliased_entity, item).options(contains_eager(item,
alias=aliased_entity))
if order == "desc":
_query = _query.order_by(self.get_order_by_field(field).desc())
else:
_query = _query.order_by(self.get_order_by_field(field).asc())
And then:
def get_order_by_field(self, field: str) -> Column:
if '.' in field:
m = re.match(r'(.+)\.(.+)', field)
if m.group(1) in self.aliases:
return getattr(self.aliases[m.group(1)], m.group(2))
else:
return Column(self.column_map[field])
else:
return Column(field)
See exsample:
entity = sqlalchemy.aliased(ModelUser)
session.query(ModelLog.id, ModelLog.date, ModelUser.id.label('current_user_id'),
entity.id.label('prev_user_id'))) \
.join(ModelUser, ModelUser.id == ModelLog.id_model_user) \
.join(entity, entity.id == ModelLog.id_prev_model_user) \
.filter(...) \
.order_by(entity.id.asc(), ModelUser.id.desc()

Categories