Create one 'list' by userID

Create one 'list' by userID - python

I want to create a list per user so i got this jsonfile:
data = [
{
"id": "1",
"price": 1,
},
{
"id": "1",
"price": 10,
},
{
"id": "2",
"price": 3,
},
{
"id": "1",
"price": 10,
},
{
"id": "2",
"price":8,
},
]
I'm on python and I want to have a result like
for the user with 'id':1 [1,10,10]
and for the user with "id": "2": [3,8]
so two lists corresponding to the prices according to the ids
is it possible to do that in python ?
note, in fact user id are UUID type and randomly generated.
edit: quantity was a mistake all data are price and id, sorry

collections.defaultdict to the rescue.
Assuming you really do have mixed quantitys and prices and you don't care about mixing them into the same list,
from collections import defaultdict
data = [
{
"id": "1",
"price": 1,
},
{
"id": "1",
"price": 10,
},
{
"id": "2",
"quantity": 3,
},
{
"id": "1",
"price": 10,
},
{
"id": "2",
"price": 8,
},
]
by_id = defaultdict(list)
for item in data:
item = item.copy() # we need to mutate the item
id = item.pop("id")
# whatever is the other value in the dict, grab that:
other_value = item.popitem()[1]
by_id[id].append(other_value)
print(dict(by_id))
The output is
{'1': [1, 10, 10], '2': [3, 8]}
If you actually only do have prices, the loop is simpler:
by_id = defaultdict(list)
for item in data:
by_id[item["id"]].append(item.get("price"))
or
by_id = defaultdict(list)
for item in data:
by_id[item["id"]].append(item["price"])
to fail fast when the price is missing.

first :
you structur data : {[]}, is not supported in python.
assume your data is :
my_json = [
{
"id": "1",
"price": 1,
},
{
"id": "1",
"price": 10,
},
{
"id": "2",
"quantity": 3,
},
{
"id": "1",
"price": 10,
},
{
"id": "2",
"price":8,
},
]
then you can achive with this:
results = {}
for data in my_json:
if data.get('id') not in results:
results[data.get('id')] = [data.get('price') or data.get('quantity')]
else:
results[data.get('id')].append(data.get('price') or data.get('quantity'))
print(results)
output:
{'1': [1, 10, 10], '2': [3, 8]}

Maybe like this:
data = [
{
"id": "1",
"price": 1,
},
{
"id": "1",
"price": 10,
},
{
"id": "2",
"quantity": 3,
},
{
"id": "1",
"price": 10,
},
{
"id": "2",
"price": 8,
}
]
result = {}
for item in data:
try:
result[item['id']].append(item.get('price'))
except KeyError:
result[item['id']] = [item.get('price')]
print(result)
Where None is put in place of the missing price for that entry, quantity key ignored.
Result:
{'1': [1, 10, 10], '2': [None, 8]}

A simple loop that enumerates your list (it's not JSON) in conjunction with setdefault() is all you need:
data = [
{
"id": "1",
"price": 1,
},
{
"id": "1",
"price": 10,
},
{
"id": "2",
"price": 3,
},
{
"id": "1",
"price": 10,
},
{
"id": "2",
"price": 8,
}
]
dict_ = {}
for d in data:
dict_.setdefault(d['id'], []).append(d['price'])
print(dict_)
Output:
{'1': [1, 10, 10], '2': [3, 8]}
Note:
This will fail (KeyError) if either 'id' or 'price' is missing from the dictionaries in the list

Related

How to eliminate duplicate items while adding them to their own structure

I have a list of dictionary items, with each dictionary containing a list of presentation items. The sample dictionaries below are a small prototype of my real data set.
I need to remove duplicate presentations based on day (one presentation per day) and store them in a new dictionary with the same structure within the existing list.
So starting with:
[
{
"time": "04:00-20:59",
"category": 1,
"presentations": [
{
"presentation": "ABC",
"day": 7,
},
{
"presentation": "DEF",
"day": 7,
},
{
"presentation": "GHI",
"day": 8,
},
{
"presentation": "JKL",
"day": 8,
},
{
"presentation": "MNO",
"day": 9,
},
{
"presentation": "PQR",
"day": 9,
},
{
"presentation": "STU",
"day": 9,
}
]
} #only one dictionary item in the list for simplicity
]
The end result should be three dictionaries containing lists of presentations where there is one presentation for a given day:
[
{
"time": "04:00-20:59",
"category": 1,
"presentations": [
{
"presentation": "ABC",
"day": 7
},
{
"presentation": "DEF",
"day": 8
},
{
"presentation": "GHI",
"day": 9
}
]
},
{
"time": "04:00-20:59",
"category": 1,
"presentations": [
{
"presentation": "JKL",
"day": 7
},
{
"presentation": "MNO",
"day": 8
},
{
"presentation": "PQR",
"day": 9
}
]
},
{
"time": "04:00-20:59",
"category": 1,
"presentations": [
{
"presentation": "STU",
"day": 9
}
]
}
]
I don't know how to go about removing these duplicates (based on day) while adding them to their own dictionary.

Get different values from repeating item JSON

I have this json derived dict:
{
"stats": [
{
"name": "Jengas",
"time": 166,
"uid": "177098244407558145",
"id": 1
},
{
"name": "- k",
"time": 20,
"uid": "199295228664872961",
"id": 2
},
{
"name": "MAD MARX",
"time": "0",
"uid": "336539711785009153",
"id": 3
},
{
"name": "loli",
"time": 20,
"uid": "366299640976375818",
"id": 4
},
{
"name": "Woona",
"time": 20,
"uid": "246996981178695686",
"id": 5
}
]
}
I want to get the "time" from everybody in the list and use it with sort.
So the result I get has this:
TOP 10:
Jengas: 166
Loli: 20
My first try is to list different values from repeating item.
Right now the code is:
with open('db.json') as json_data:
topvjson = json.load(json_data)
print(topvjson)
d = topvjson['stats'][0]['time']
print(d)

Extract the stats list, apply sort to it with the appropriate key:
from json import loads
data = loads("""{
"stats": [{
"name": "Jengas",
"time": 166,
"uid": "177098244407558145",
"id": 1
}, {
"name": "- k",
"time": 20,
"uid": "199295228664872961",
"id": 2
}, {
"name": "MAD MARX",
"time": "0",
"uid": "336539711785009153",
"id": 3
}, {
"name": "loli",
"time": 20,
"uid": "366299640976375818",
"id": 4
}, {
"name": "Woona",
"time": 20,
"uid": "246996981178695686",
"id": 5
}]
}""")
stats = data['stats']
stats.sort(key = lambda entry: int(entry['time']), reverse=True)
print("TOP 10:")
for entry in stats[:10]:
print("%s: %d" % (entry['name'], int(entry['time'])))
This prints:
TOP 10:
Jengas: 166
- k: 20
loli: 20
Woona: 20
MAD MARX: 0
Note that your time is neither an integer nor string: there are both 0 and "0" in the dataset. That's why you need the conversion int(...).

You can sort the list of dict values like:
Code:
top_three = [(x[1], -x[0]) for x in sorted(
(-int(user['time']), user['name']) for user in stats['stats'])][:3]
This works by taking the time and the name and building a tuple. The tuples can the be sorted, and then the names can be extracted (via: x[1]) after the sort.
Test Code:
stats = {
"stats": [{
"name": "Jengas",
"time": 166,
"uid": "177098244407558145",
"id": 1
}, {
"name": "- k",
"time": 20,
"uid": "199295228664872961",
"id": 2
}, {
"name": "MAD MARX",
"time": "0",
"uid": "336539711785009153",
"id": 3
}, {
"name": "loli",
"time": 20,
"uid": "366299640976375818",
"id": 4
}, {
"name": "Woona",
"time": 20,
"uid": "246996981178695686",
"id": 5
}]
}
top_three = [x[1] for x in sorted(
(-int(user['time']), user['name']) for user in stats['stats'])][:3]
print(top_three)
Results:
[('Jengas', 166), ('- k', 20), ('Woona', 20)]

Here's a way to do it using the built-in sorted() function:
data = {
"stats": [
{
"name": "Jengas",
"time": 166,
"uid": "177098244407558145",
"id": 1
},
{
etc ...
}
]
}
print('TOP 3')
sorted_by_time = sorted(data['stats'], key=lambda d: int(d['time']), reverse=True)
for i, d in enumerate(sorted_by_time, 1):
if i > 3: break
print('{name}: {time}'.format(**d))
Output:
TOP 3
Jengas: 166
- k: 20
loli: 20

Transformation of JSON file per field

I have a JSON file in this format :
[
{"itemId": "1", "score": 0.2, "userId": "1", "rank": 1},
{"itemId": "3", "score": 0.1, "userId": "1", "rank": 2},
{"itemId": "12", "score": 0.6, "userId": "2", "rank": 1},
{"itemId": "21", "score": 0.2, "userId": "2", "rank": 2},
...
]
and I would like to sort it by userId like this :
{
{
"userId": "1",
"items": [
{"itemId": "1", "score": 0.2, "rank": 1},
{"itemId": "3", "score": 0.1, "rank": 2},
...
]
},
{
"userId": "2",
"items": [
{"itemId": "12", "score": 0.6, "rank": 1},
{"itemId": "21", "score": 0.2, "rank": 2}
]
},
...
}
I tried to do it by myself with Python but I get an error that says : "TypeError: unhashable type: 'dict'".
Do you have an idea how to do it ?
Thank you !

Accumulate the items in a defaultdict(list), re-keying off the user ids:
from collections import defaultdict
data = [
{"itemId": "1", "score": 0.2, "userId": "1", "rank": 1},
{"itemId": "3", "score": 0.1, "userId": "1", "rank": 2},
{"itemId": "12", "score": 0.6, "userId": "2", "rank": 1},
{"itemId": "21", "score": 0.2, "userId": "2", "rank": 2},
]
output = defaultdict(list)
for dict_ in data:
userId = dict_.pop('userId')
output[int(userId)].append(dict_)
new_data = [{'userId': str(k), 'items': output[k]} for k in sorted(output)]

you could use pandas to load and then use groupby and sort. Then write back to json in the way you want. See below:
data = '[{"itemId": "1", "score": 0.2, "userId": "1", "rank": 1}, {"itemId": "12", "score": 0.6, "userId": "2", "rank": 1}, {"itemId": "3", "score": 0.1, "userId": "1", "rank": 2}, {"itemId": "21", "score": 0.2, "userId": "2", "rank": 2}]'
import pandas as pd
import collections
# read the json file to pandas
df = pd.read_json(data, dtype = {"itemId":object, "score": object, "userId": object, "rank":int})
# group by user id and sort them
g = df.groupby(['userId'],sort=True)
mylist = []
for k in g.groups.keys():
# create a temp dict holder
temp_dict = collections.OrderedDict()
#populate teh temp dict
temp_dict['userId'] = k
temp_dict['items'] = g['itemId','rank', 'score'].get_group(k).to_dict(orient='records')
# add the temp dict to the list
mylist.append(temp_dict)
# print as json
import json
print json.dumps(mylist,indent=4)
this will result in
[
{
"userId": "1",
"items": [
{
"itemId": "1",
"score": 0.2,
"rank": 1
},
{
"itemId": "3",
"score": 0.1,
"rank": 2
}
]
},
{
"userId": "2",
"items": [
{
"itemId": "12",
"score": 0.6000000000000001,
"rank": 1
},
{
"itemId": "21",
"score": 0.2,
"rank": 2
}
]
}
]

converting list of dictionary to dictionary tree based on parent id

I want to make a list of dictionary that way, every element which has a parent id, it should be child of the parent element.
Let's say we have a python list, which contains multiple dictionaries.
[{
"id": 1,
"title": "node1",
"parent": null
},
{
"id": 2,
"title": "node2",
"parent": 1
},
{
"id": 3,
"title": "node3",
"parent": 1
},
{
"id": 4,
"title": "node4",
"parent": 2
},
{
"id": 5,
"title": "node5",
"parent": 2
}]
And I want to convert this list to tree based on parent key. like,
[{
'id':1,
'title':'node1',
'childs':[
{
'id':2,
'title':'node2'
'childs':[
{
'id':4,
'title':'node4',
'childs': []
},
{
'id':5,
'title':'node5',
'childs': []
}
]
},
{
'id':3,
'title':'node3'
'childs':[]
}
]
}]

data = [{
"id": 1,
"title": "node1",
"parent": "null"
},
{ "id": 2,
"title": "node2",
"parent": "null"
},
{
"id": 2,
"title": "node2",
"parent": 1
},
{
"id": 3,
"title": "node3",
"parent": 1
},
{
"id": 4,
"title": "node4",
"parent": 2
},
{
"id": 5,
"title": "node5",
"parent": 2
}]
parent_data=[]
for keys in data:
if keys['parent'] == "null":
keys['childs']=[]
parent_data.append(keys)
for keys in data:
for key in parent_data:
if key['id'] == keys['parent']:
key['childs'].append(keys)
print parent_data

k = [{
"id": 1,
"title": "node1",
"parent": "null"
},
{
"id": 2,
"title": "node2",
"parent": 1
},
{
"id": 3,
"title": "node3",
"parent": 1
},
{
"id": 4,
"title": "node4",
"parent": 2
},
{
"id": 5,
"title": "node5",
"parent": 2
}]
result, t = [], {}
for i in k:
i['childs'] = []
if i['parent'] == 'null':
del i['parent']
result.append(i)
t[1] = result[0]
else:
t[i['parent']]['childs'].append(i)
t[i['id']] = t[i['parent']]['childs'][-1]
del t[i['parent']]['childs'][-1]['parent']
print result

Comparing lists of dictionaries in Python

I've have read various questions but nothing I have found quite matches this scenario and I can't get it round my head.
I want to compare 2 lists of dictionaries. I don't want to check the individual key value pairs, I want to check the whole dictionary against the other but the gotcha is that one of the dictionaries in one list has an extra item 'id' which the other list doesn't so I don't need to compare that.
status_code and desc are not unique
just desc could change but as far as I'm concerned the whole thing has then changed.
Sample data:
data_db = [
{ "id": 1, "status_code": 2, "desc": "Description sample1" },
{ "id": 2, "status_code": 4, "desc": "Description sample2" },
{ "id": 3, "status_code": 5, "desc": "Description sample3" },
{ "id": 4, "status_code": 5, "desc": "Description sample4" }
]
data_api = [
{ "status_code": 1, "desc": "Description sample5" },
{ "status_code": 4, "desc": "Description sample6" },
{ "status_code": 5, "desc": "Description sample3" }
]
Expected output:
missing_from_db = [
{ "status_code": 1, "desc": "Description sample4" },
{ "status_code": 4, "desc": "Description sample6" } # because in data_db it desc is different
]
missing_from_api = [1,2,4] # This can just be the ids from data_db
I hope this makes sense (as it's confusing enough to me!).
Code wise I've not come up with anything remotely close or useful. Nearest thought I've had is reformatting data_db to this:
data_db = [
{
"id": 1,
"data": { "status_code": 2, "desc": "Description sample1" }
},
{
"id": 2,
"data": { "status_code": 4, "desc": "Description sample2" }
},
{
"id": 3,
"data": { "status_code": 5, "desc": "Description sample3" }
},
{
"id": 4,
"data": { "status_code": 5, "desc": "Description sample4" }
}
]
Thank you!

Reformatting your data_db should work:
data_db = [
{
"id": 1,
"data": { "status_code": 2, "desc": "Description sample1" }
},
{
"id": 2,
"data": { "status_code": 4, "desc": "Description sample2" }
},
{
"id": 3,
"data": { "status_code": 5, "desc": "Description sample3" }
},
{
"id": 4,
"data": { "status_code": 5, "desc": "Description sample4" }
}
]
data_api = [
{ "status_code": 1, "desc": "Description sample5" },
{ "status_code": 4, "desc": "Description sample6" },
{ "status_code": 5, "desc": "Description sample3" }
]
# checking the dicts in data_api against the 'data' sub-dicts in data_db
missing_from_db = [d for d in data_api if d not in [x['data'] for x in data_db]]
# using similar comprehension to extract the 'id' vals of the 'data' in data_db which aren't in data_api
missing_from_api = [d['id'] for d in data_db if d['data'] not in data_api]
Results:
print missing_from_db
[{'status_code': 1, 'desc': 'Description sample5'},
{'status_code': 4, 'desc': 'Description sample6'}]
print missing_from_api
[1, 2, 4]

This isn't a nice solution and it relies on the particular structure you have, but it works:
data_db = [
{ "id": 1, "status_code": 2, "desc": "Description sample1" },
{ "id": 2, "status_code": 4, "desc": "Description sample2" },
{ "id": 3, "status_code": 5, "desc": "Description sample3" },
{ "id": 4, "status_code": 5, "desc": "Description sample4" }
]
data_api = [
{ "status_code": 1, "desc": "Description sample5" },
{ "status_code": 4, "desc": "Description sample6" },
{ "status_code": 5, "desc": "Description sample3" }
]
lst = []
for dct in data_api:
for dct2 in data_db:
if all(dct[key] == dct2[key] for key in dct):
break
else:
lst.append(dct)
lst2 = []
for dct2 in data_db:
for dct in data_api:
if all(dct[key] == dct2[key] for key in dct):
break
else:
lst2.append(dct2["id"])
print(lst)
print(lst2)

will this help
def find_missing(data1,data2):
missig_from_data = list()
for i in range(0,len(data2)):
status = False
dec = False
for j in range(0,len(data1)):
if data2[i]['status_code'] == data1[j]['status_code']:
status = True
if data2[i]['desc'] == data1[j]['desc']:
dec = True
if (status == False and dec==False) or (status == True and dec==False) or (status == False and dec==True):
missig_from_data.append(data2[i])
return missig_from_data
data_db = [
{ "id": 1, "status_code": 2, "desc": "Description sample1" },
{ "id": 2, "status_code": 4, "desc": "Description sample2" },
{ "id": 3, "status_code": 5, "desc": "Description sample3" },
{ "id": 4, "status_code": 5, "desc": "Description sample4" }
]
data_api = [
{ "status_code": 1, "desc": "Description sample5" },
{ "status_code": 4, "desc": "Description sample6" },
{ "status_code": 5, "desc": "Description sample3" }
]
missig_from_data_db = find_missing(data_db,data_api)
missing_from_api = find_missing(data_api,data_db)
missing_from_api_1 = list()
for i in range(0,len(missing_from_api)): missing_from_api_1.append(missing_from_api[i]['id'])
print missig_from_data_db
print missing_from_api_1
Output :
[{'status_code': 1, 'desc': 'Description sample5'}, {'status_code': 4, 'desc': 'Description sample6'}]
[1, 2, 4]

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Create one 'list' by userID - python

Related

How to eliminate duplicate items while adding them to their own structure

Get different values from repeating item JSON

Transformation of JSON file per field

converting list of dictionary to dictionary tree based on parent id

Comparing lists of dictionaries in Python

Categories

Resources