Add count next to each aggregated value - python

I currently aggregate a query to get a unique list of strings used in my endpoint. As of now, the fetched data looks like this:
{
"data": [
"Beige",
"Grey",
...
]
}
However, I'm attempting to do something like this:
{
"data": [
{
name: "Beige",
count: 7
},
{
name: "Grey",
count: 3
},
...
]
}
Where count is the amount of times the value occurs in the datbase.
Currently, my viewset is structured like this:
class ProductFiltersByCategory(APIView):
"""
This viewset takes the category parameter from the url and returns related product filters
"""
def get(self, request, *args, **kwargs):
"""
Gets parameter in urls and aggregated filtered result
"""
category = self.kwargs['category']
aggregated_result = Product.objects.filter(
category__parent__name__iexact=category,
status='available'
).distinct().aggregate(data=ArrayAgg('colors__name', distinct=True))
return Response(aggregated_result)
How do i go about making the key-value pair structure i want, as well as do the count for each "color"?

You can annotate the Count of each category and group by that category using .values(). Since your relations involve a many-to-many field, you probably just want to do the query on the Color model rather than the Product model.
Color.objects.filter(
product__category__parent__name__iexact=category,
product__status='available'
).values('name', 'product').annotate(
colors_count=Count('product', distinct=True)
).values_list('name', 'colors_count')

You can do this with collections.Counter:
from random import choices
from collections import Counter
from pprint import pprint
colours = ['White', 'Yellow', 'Blue', 'Red', 'Green', 'Black', 'Brown', 'Azure', 'Ivory', 'Teal', 'Silver', 'Purple', 'Navy blue', 'Pea green', 'Gray',
'Orange', 'Maroon', 'Charcoal', 'Aquamarine', 'Coral', 'Fuchsia', 'Wheat', 'Lime', 'Crimson', 'Khaki', 'Hot pink', 'Magenta', 'Olden', 'Plum', 'Olive', 'Cyan']
data = {"data": choices(colours, k=10)}
pprint(data)
newData = {"data": [{"name": k, "count": v} for k,v in Counter(data["data"]).items()]}
pprint(newData)
Sample Output:
{'data': ['Coral',
'Ivory',
'Red',
'Crimson',
'Azure',
'Ivory',
'Red',
'Red',
'Khaki',
'Hot pink']}
{'data': [{'count': 1, 'name': 'Coral'},
{'count': 2, 'name': 'Ivory'},
{'count': 3, 'name': 'Red'},
{'count': 1, 'name': 'Crimson'},
{'count': 1, 'name': 'Azure'},
{'count': 1, 'name': 'Khaki'},
{'count': 1, 'name': 'Hot pink'}]}

You can use Counter form the collections library to count the values.
You can try something like this:
from collections import Counter
data = get_data_from_endpoint(request)[data]
c = Counter(data)
result = {'data': [{'name': element, 'count': c[element]} for element in c]}

Related

How can I collect key-value pairs of dictionaries into one large dictionary in Python?

I have a dictionary in the following format:
data = {
'Bob': {
'age': 12,
'weight': 150,
'eye_color': 'blue'
},
'Jim': {
'favorite_food': 'cherries',
'sport': 'baseball',
'hobby': 'running'
},
'Tom': {
'strength': 'average',
'endurance': 'high',
'heart_rate': 'low'
}
}
What is the most Pythonic way to concatenate all of the dictionaries within dict into a new dictionary so that I would end up with something like the following:
new_dict = {
'age': 12,
'weight': 150,
'eye_color': 'blue',
'favorite_food': 'cherries',
'sport': 'baseball',
'hobby': 'running',
'strength': 'average',
'endurance': 'high',
'heart_rate': 'low'
}
You can use functools.reduce() to build up the result, unioning one dictionary at a time:
from functools import reduce
data = {
'Bob' : { 'age': 12, 'weight': 150, 'eye_color': 'blue' },
'Jim' : { 'favorite_food': 'cherries', 'sport': 'baseball', 'hobby': 'running' },
'Tom' : { 'strength': 'average', 'endurance': 'high', 'hear_rate': 'low' }
}
result = reduce(lambda x, y: dict(**x, **y), data.values(), {})
print(result)
This outputs:
{'age': 12, 'weight': 150, 'eye_color': 'blue', 'favorite_food': 'cherries',
'sport': 'baseball', 'hobby': 'running', 'strength': 'average',
'endurance': 'high', 'hear_rate': 'low'}
On Python 3.9 or higher, you can use lambda x: x | y, operator.or_, or dict.__or__ instead of lambda x: dict(**x, **y) if you're on Python 3.9 or higher. The latter two are from a suggestion by Mad Physicist.
One option is to use a dictionary comprehension with a nested generator expression:
new_dict = {k: v for d in data.values() for k, v in d.items()}
Another way that's subtly different to to use collections.ChainMap:
new_dict = collections. ChainMap(*data.values())
In this case, new_dict will not be a dict, but will quack like one just fine. Lookup will be a bit slower, but construction will be faster.

Python List data to Django ORM query

Let say, I have a list data for example:
data = [
{'id': 1, 'name': 'brad', 'color': 'red', 'tags': [], 'author': {'name': 'admin'}},
{'id': 2, 'name': 'sylvia', 'color': 'blue', 'tags': [], 'author': {'name': 'user'}},
{'id': 3, 'name': 'sylwia', 'color': 'green', 'tags': [], 'author': {'name': 'admin'}},
{'id': 4, 'name': 'shane', 'color': 'red', 'tags': [], 'author': {'name': 'admin'}},
{'id': 5, 'name': 'shane', 'color': 'red', 'tags': ['python', 'django'], 'author': {'name': 'user'}}
]
and I want to make it ORM'able, such as what Django has doing:
ModelName.objects.filter(color__icontains="gree")
And this what I have do;
import operator
from collections import namedtuple
from django.core.exceptions import MultipleObjectsReturned, ObjectDoesNotExist
class DataQuerySet:
"""
Custom ORM for List dict data,
https://stackoverflow.com/a/58351973/6396981
"""
allowed_operations = {
'gt': operator.gt,
'lt': operator.lt,
'eq': operator.eq,
'icontains': operator.contains
}
def __init__(self, data):
self.data = data
def all(self):
return self.data
def filter(self, **kwargs):
"""
>>> kwargs = {'name': 'sylwia', 'id__gt': 1}
>>> DataQuerySet().filter(**kwargs)
[{'id': 3, 'name': 'sylwia', 'color': 'green'}]
"""
operation = namedtuple('Q', 'op key value')
def parse_filter(item):
"""item is expected to be a tuple with exactly two elements
>>> parse_filter(('id__gt', 2))
Q(op=<built-in function gt>, key='id', value=2)
>>> parse_filter(('id__ ', 2))
Q(op=<built-in function eq>, key='id', value=2)
>>> parse_filter(('color__bad', 'red'))
Traceback (most recent call last):
...
AssertionError: 'bad' operation is not allowed
"""
key, *op = item[0].split('__')
# no value after __ means exact value query, e.g. name='sylvia'
op = ''.join(op).strip() or 'eq'
assert op in self.allowed_operations, f'{repr(op)} operation is not allowed'
return operation(self.allowed_operations[op], key, item[1])
filtered_data = self.data.copy()
for item in map(parse_filter, kwargs.items()):
filtered_data = [
entry for entry in filtered_data
if item.op(entry[item.key], item.value)
]
return filtered_data
def get(self, **kwargs):
"""
>>> DataQuerySet().get(id=3)
[{'id': 3, 'name': 'sylwia', 'color': 'green'}]
"""
operation = namedtuple('Q', 'op key value')
def parse_get(item):
key, *op = item[0].split('__')
return operation(self.allowed_operations['eq'], key, item[1])
filtered_data = self.data.copy()
for item in map(parse_get, kwargs.items()):
filtered_data = [
entry for entry in filtered_data
if item.op(entry[item.key], item.value)
]
if len(filtered_data) > 1:
raise MultipleObjectsReturned(filtered_data)
elif len(filtered_data) < 1:
raise ObjectDoesNotExist(kwargs)
return filtered_data[0]
And to use it:
class DataModel:
def __init__(self, data):
self._data = DataQuerySet(data)
#property
def objects(self):
return self._data
data = [
{'id': 1, 'name': 'brad', 'color': 'red', 'tags': [], 'author': {'name': 'admin'}},
{'id': 2, 'name': 'sylvia', 'color': 'blue', 'tags': [], 'author': {'name': 'user'}},
{'id': 3, 'name': 'sylwia', 'color': 'green', 'tags': [], 'author': {'name': 'admin'}},
{'id': 4, 'name': 'shane', 'color': 'red', 'tags': [], 'author': {'name': 'admin'}},
{'id': 5, 'name': 'shane', 'color': 'red', 'tags': ['python', 'django'], 'author': {'name': 'user'}}
]
d = DataModel(data)
print(d.objects.filter(id__gt=2))
print(d.objects.filter(color='green'))
print(d.objects.filter(color__icontains='gree'))
print(d.objects.get(id=1))
Above tests is just work properly, but seems we have a problem when we want to do more:
print(d.objects.filter(tags__in=['python']))
print(d.objects.filter(author__name='admin'))
print(d.objects.filter(author__name__icontains='use'))
Finally, I found a nice module to handle that case, it called with reobject, and here is the test:
from reobject.models import Model, Field
from reobject.query.parser import Q as Query
data = [
{'name': 'brad', 'color': 'red', 'tags': [], 'author': {'name': 'admin'}},
{'name': 'sylvia', 'color': 'blue', 'tags': [], 'author': {'name': 'user'}},
{'name': 'sylwia', 'color': 'green', 'tags': [], 'author': {'name': 'admin'}},
{'name': 'shane', 'color': 'red', 'tags': [], 'author': {'name': 'admin'}},
{'name': 'shane', 'color': 'red', 'tags': ['python', 'django'], 'author': {'name': 'user'}}
]
class Book(Model):
name = Field()
color = Field()
tags = Field()
author = Field()
for item in data:
Book(**item)
Book.objects.all()
Book.objects.get(name='brad')
Book.objects.filter(name='brad')
Book.objects.filter(author__name='admin')
Book.objects.filter(tags__contains='python')
Book.objects.filter(Query(author__name='admin') | Query(author__name='user'))
Meanwhile, it still doesn't support with id or pk fields.
Mybe because it already taken.

Mapping JSON key-value pairs from source to destination using Python

Using Python requests I want to grab a piece of JSON from one source and post it to a destination. The structure of the JSON received and the one required by the destination, however, differs a bit so my question is, how do I best map the items from the source structure onto the destination structure?
To illustrate, imagine we get a list of all purchases made by John and Mary. And now we want to post the individual items purchased linking these to the individuals who purchased them (NOTE: The actual use case involves thousands of entries so I am looking for an approach that would scale accordingly):
Source JSON:
{
'Total Results': 2,
'Results': [
{
'Name': 'John',
'Age': 25,
'Purchases': [
{
'Fruits': {
'Type': 'Apple',
'Quantity': 3,
'Color': 'Red'}
},
{
'Veggie': {
'Type': 'Salad',
'Quantity': 2,
'Color': 'Green'
}
}
]
},
{
'Name': 'Mary',
'Age': 20,
'Purchases': [
{
'Fruits': {
'Type': 'Orange',
'Quantity': 2,
'Color': 'Orange'
}
}
]
}
]
}
Destination JSON:
{
[
{
'Purchase': 'Apple',
'Purchased by': 'John',
'Quantity': 3,
'Type': 'Red',
},
{
'Purchase': 'Salad',
'Purchased by': 'John',
'Quantity': 2,
'Type': 'Green',
},
{
'Purchase': 'Orange',
'Purchased by': 'Mary',
'Quantity': 2,
'Type': 'Orange',
}
]
}
Any help on this would be greatly appreciated! Cheers!
Just consider loop through the dict.
res = []
for result in d['Results']:
value = {}
for purchase in result['Purchases']:
item = list(purchase.values())[0]
value['Purchase'] = item['Type']
value['Purchased by'] = result['Name']
value['Quantity'] = item['Quantity']
value['Type'] = item['Color']
res.append(value)
pprint(res)
[{'Purchase': 'Apple', 'Purchased by': 'John', 'Quantity': 3, 'Type': 'Red'},
{'Purchase': 'Salad', 'Purchased by': 'John', 'Quantity': 2, 'Type': 'Green'},
{'Purchase': 'Orange', 'Purchased by': 'Mary', 'Quantity': 2, 'Type': 'Orange'}]

Join dictionaries so that previous dictionary names are now 2nd layer keys?

I am attempting to organize new dictionaries based on the results of a df.groupby('fruits'). I now have dictionaries setup like the following,
print(type(flavors))
<class 'dict'>
print(flavors)
{'apples':['crisp', 'tart'],
'oranges':['citrusy','juicy', 'sour'],
'bananas':['sweet']}
print(colors)
{'apples':['red', 'green'],
'oranges':['orange','orange', 'rotten'],
'bananas':['yellow']}
print(farms)
{'apples':['upstate orchard', 'dreamy estates'],
'oranges':['crop culture','sandy bay', 'heartland'],
'bananas':['horticulture heros']}
How could I join them in a new dictionary like the following?
print(fruits_dict)
{'apples': {'flavors': ['crisp', 'tart'],
'colors': ['red', 'green'],
'farms': ['upstate orchard', 'dreamy estates'] },
'oranges': {'flavors': ['citrusy','juicy', 'sour'],
'colors': ['orange','orange', 'rotten'],
'farms': ['crop culture','sandy bay', 'heartland'] },
'bananas': {'flavors': ['sweet'],
'colors': ['yellow'],
'farms': ['horticulture heros'] } }
dicts = {'flavors' : flavors, 'colors': colors, 'farms' : farms}
result = {key: { k : dicts[k][key] for k in dicts} for key in flavors}
You can use this example with dict comprehension how to get your output:
flavors = {
"apples": ["crisp", "tart"],
"oranges": ["citrusy", "juicy", "sour"],
"bananas": ["sweet"],
}
colors = {
"apples": ["red", "green"],
"oranges": ["orange", "orange", "rotten"],
"bananas": ["yellow"],
}
farms = {
"apples": ["upstate orchard", "dreamy estates"],
"oranges": ["crop culture", "sandy bay", "heartland"],
"bananas": ["horticulture heros"],
}
out = {
k: {
"flavors": flavors.get(k, []),
"colors": colors.get(k, []),
"farms": farms.get(k, []),
}
for k in (flavors.keys() & colors.keys() & farms.keys())
}
from pprint import pprint
pprint(out)
Prints:
{'apples': {'colors': ['red', 'green'],
'farms': ['upstate orchard', 'dreamy estates'],
'flavors': ['crisp', 'tart']},
'bananas': {'colors': ['yellow'],
'farms': ['horticulture heros'],
'flavors': ['sweet']},
'oranges': {'colors': ['orange', 'orange', 'rotten'],
'farms': ['crop culture', 'sandy bay', 'heartland'],
'flavors': ['citrusy', 'juicy', 'sour']}}

Reading collection from UnQLite?

I'm using nobonobo's python bindings to unqlite and am running into an issue when attempting to work with a JSON document collection.
In the README, there is this JX9 script:
sample = (
"db_create('users'); /* Create the collection users */"
"db_store('users',{ 'name' : 'dean' , 'age' : 32 });"
"db_store('users',{ 'name' : 'chems' , 'age' : 27 });"
"print db_fetch_all('users')..'\n';"
"while( ($rec = db_fetch('users')) != NULL ){"
" print $rec; print '\n';"
"}"
)
This correctly prints each record:
[{"name":"dean","age":32,"__id":0},{"name":"chems","age":27,"__id":1}]
{"name":"dean","age":32,"__id":0}
{"name":"chems","age":27,"__id":1}
However when I try to read the collection in Python using a callback, I get garbage back:
#unqlitepy.OutputCallback
def f(output, outlen, udata):
output = (c_char*outlen).from_address(output).raw
print locals()
return unqlitepy.UNQLITE_OK
db.fetch_cb('users', f)
This is the output:
{'udata': None, 'output': 'a\x1e\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x02D\xa7\x83\x0b', 'outlen': 22L}
Similarly if I grab a cursor and print the first user in the users collection, I get this:
'users_0' '\x01\x08\x00\x00\x00\x04name\x05\x08\x00\x00\x00\x04dean\x06\x08\x00\x00\x00\x03age\x05\n\x00\x00\x00\x00\x00\x00\x00 \x06\x08\x00\x00\x00\x04__id\x05\n\x00\x00\x00\x00\x00\x00\x00\x00\x06\x02'
Does anybody know what might be happening? Is there some way to decode the data returned to python?
I wrote some new bindings which make all of this way easier: https://github.com/coleifer/unqlite-python
>>> users.store([
... {'name': 'Charlie', 'color': 'green'},
... {'name': 'Huey', 'color': 'white'},
... {'name': 'Mickey', 'color': 'black'}])
True
>>> users.store({'name': 'Leslie', 'color': 'also green'})
True
>>> users.fetch(0) # Fetch the first record.
{'__id': 0, 'color': 'green', 'name': 'Charlie'}
>>> users.delete(0) # Delete the first record.
True
>>> users.delete(users.last_record_id()) # Delete the last record.
True
>>> users.all()
[{'__id': 1, 'color': 'white', 'name': 'Huey'},
{'__id': 2, 'color': 'black', 'name': 'Mickey'}]
>>> users.filter(lambda obj: obj['name'].startswith('H'))
[{'__id': 1, 'color': 'white', 'name': 'Huey'}]

Categories