Python elegant way to order an array of dictionary - python

I have the data in this format -
[
{'gstin_code': 'A',
'price_effective': 1199.0,
'company_id': 489,
'month': datetime.datetime(2018, 6, 1, 0, 0),
'year': datetime.datetime(2018, 1, 1, 0, 0)
},
{'gstin_code': 'B',
'price_effective': 1199.0,
'company_id': 489,
'month': datetime.datetime(2018, 6, 1, 0, 0),
'year': datetime.datetime(2018, 1, 1, 0, 0)
},
{'gstin_code': 'C',
'price_effective': 1199.0,
'company_id': 489,
'month': datetime.datetime(2018, 6, 1, 0, 0),
'year': datetime.datetime(2018, 1, 1, 0, 0)
}
]
The output expected is this -
{
"2": {
"2018": {
"3": {
"27AA": 1799
},
"4": {
"03AA": 1299,
"04AA": 1499,
"05AA": 699,
"06AA": 599,
"07AA": 199,
"09AA": 499,
"19AA": 599,
"23AA": 770,
"27AA": 420,
"27AA": 499
},
"5": {
"03AA": 1399,
"27AA": 399,
"27AA": 640
}
}
}
}
i.e {company_id:{year:{month:{gst:price_effective}}}}
Those values also come as None, like company_id, year, month, gst do come as None.
The solution which works is this -
for f_bag in data:
if f_bag['company_id'] in result and f_bag['company_id'] is not None:
if f_bag['year'].year in result[f_bag['company_id']] and f_bag['year'].year is not None:
if f_bag['month'].month in result[f_bag['company_id']][f_bag['year'].year] and f_bag['month'].month is not None:
if f_bag['gstin_code'] in result[f_bag['company_id']][f_bag['year'].year][f_bag['month'].month] and f_bag['gstin_code'] is not None:
pass
else:
result[f_bag['company_id']][f_bag['year'].year][f_bag['month'].month][f_bag['gstin_code']] = f_bag['price_effective']
else:
result[f_bag['company_id']][f_bag['year'].year][f_bag['month'].month] = {}
else:
result[f_bag['company_id']][f_bag['year'].year] = {}
else:
result[f_bag['company_id']] = {}
Any elegant way to avoid these if else statements?

with collections.defaultdict you can remove the checks whether the key exist in the dictionary:
from collections import defaultdict
def nested_dict():
return defaultdict(nested_dict)
def toNested(data):
nd = nested_dict()
for aa in data:
nd[aa['company_id']][aa['year'].year][aa['month'].month][aa['gstin_code']] = aa['price_effective']
return nd
import pprint
pp = pprint.PrettyPrinter(indent=4)
pp.pprint( toNested(data) )

Related

Python - Iterate over a dict with list as values and create new dict based on list values

I need to create a new dict for each value in the list for each key
dict = {'app_clicks': [56, 65, 41, 40, 64],
'billed_charge_local_micro': [219941307,
247274715,
181271175,
164359644,
223745830],
'billed_engagements': [85831, 89976, 60566, 55304, 88839],
'card_engagements': None,
'carousel_swipes': None,
'clicks': [322, 351, 225, 197, 337],
'engagements': [363, 397, 258, 233, 383],
'follows': None,
'impressions': [86236, 90763, 60596, 55689, 88916],
'likes': [4, 2, 3, 1, 8],
'media_engagements': [41, 45, 33, 36, 46],
'media_views': [33533, 35665, 23611, 21957, 35792],
'poll_card_vote': None,
'qualified_impressions': None,
'replies': [0, 1, 0, 0, 0],
'retweets': None,
'tweets_send': None,
'unfollows': None,
'url_clicks': [56, 65, 41, 40, 64],
'video_15s_views': [27859, 29801, 19852, 18974, 30373],
'video_3s100pct_views': [16441, 17699, 11112, 10337, 16993],
'video_6s_views': [17332, 18785, 12126, 11517, 18663],
'video_content_starts': [81824, 85312, 58449, 53392, 84893],
'video_cta_clicks': None,
'video_total_views': [33533, 35665, 23611, 21957, 35792],
'video_views_100': [27861, 29774, 19840, 18982, 30386],
'video_views_25': [72137, 75452, 51049, 46138, 74744],
'video_views_50': [48377, 50961, 34242, 31603, 51323],
'video_views_75': [35233, 37444, 24959, 23430, 38004]}
And basically I need to create a new list with dicts for each value on the list.
So it would be something like this
new_list_with_dicts = [
{ "billed_charge_local_micro" : 219941307,
"clicks": 322,
"impressions" : 86236,
},
{ "billed_charge_local_micro" : 247274715,
"clicks": 351,
"impressions" : 90763,
},
{ "billed_charge_local_micro" : 181271175,
"clicks": 225,
"impressions" : 60596,
},
]
This is one of the ways I tried:
i = 0
list_data = []
dict = {}
for key,value in report.items():
if isinstance(value, list):
while i < len(value):
dict[key] = value[i]
pprint.pprint(dict)
i = i + 1
i = 0
But it is creating the dict with repeated values, apparently for the last item of each array
I hope I could explain clearly
I'm stuck in this so any help would be much appreciated.
Thanks in advance
I belive this solves the problem, it seems like you were almost there but messed up some of the syntax
list_data = [{} for i in range(len(list(report.values())[0]))]
for key,value in report.items():
if isinstance(value, list):
for i in range(len(value)):
list_data[i][key] = value[i]
print(list_data)
and this should return the desired output assuming your dictionary is stored in report

OperationFailure: unknown top level operator: $ne (Monogbd)

What is wrong with this code? When I try to run it I get OperationFailure: unknown top level operator: $ne full error: {'ok': 0.0, 'errmsg': 'unknown top level operator: $ne', 'code': 2, 'codeName': 'BadValue'}.
Any ideas what this means? Thank you in advance :)
import pandas as pd
def length_vs_references(articles):
res = {"1-5" : 0, "6-10" : 0, "11-15" : 0, "16-20" : 0, "21-25" : 0, "25-30" : 0, ">30" :0}
n = {"1-5" : 0, "6-10" : 0, "11-15" : 0, "16-20" : 0, "21-25" : 0, "25-30" : 0, ">30" :0}
cursor = articles.aggregate([
{'$match': {'$and' : [{'references': {'$exists': False}
}, {'$ne':['$page_end', '']}, {'$ne':['$page_start', '']} ]}},
{'$project': {'len_refernces': {"$size": '$references'},
'pages': {'$subtract': [{"$toInt": 'page_end'},
{"$toInt" : 'page_start'}]}}},
{'$bucket' :{
'$groupBy': '$pages',
'boundaries': [ 0, 6, 11, 16, 21, 26, 31, 1000000],
'default': 'Other',
'key': {
'output': {"average": {"$avg" : '$len_references'}},
}
}
}
])
return cursor
print(length_vs_references(articles))
Reading between the lines I suspect you want:
cursor = articles.aggregate([
{'$match': {'references': {'$exists': False}, 'page_end': {'$ne': ''}, 'page_start': {'$ne': ''}}},
{'$project': {'len_refernces': {"$size": '$references'},
'pages': {'$subtract': [{"$toInt": '$page_end'},
{"$toInt": '$page_start'}]}}},
{'$bucket': {
'groupBy': '$pages',
'boundaries': [0, 6, 11, 16, 21, 26, 31, 1000000],
'default': 'Other'
}
}
])
You don't need to AND your match filters as they are ANDed by default. I'm guessing you are trying to filter out blank page_end and page_start items. If not, please describe what you are trying to do.

Convert tuples to list of dictionary

I'm having trouble converting two tuples into a list of dictionaries. Here is the structure:
train_detail = (Counter({2: 50, 0: 62, 1: 38}),
{2: 0.3333333333333333, 0: 0.41333333333333333, 1: 0.25333333333333335})
test_detail = (Counter({2: 6, 0: 49, 1: 4}),
{2: 0.1016949152542373, 0: 0.8305084745762712, 1: 0.06779661016949153})
Now i want to turn these two into a structure like the following:
[
{
"label": "0",
"trainPercent": 0.41333333333333333,
"trainNumber": 62,
"testPercent": 0.8305084745762712,
"testNumber": 49,
},
{
"label": "1",
"trainPercent": 0.25333333333333335,
"trainNumber": 38,
"testPercent": 0.06779661016949153,
"testNumber": 4,
},
{
"label": "2",
"trainPercent": 0.3333333333333333,
"trainNumber": 50,
"testPercent": 0.1016949152542373,
"testNumber": 6,
},
]
What's an effective way of doing that with minimum looping? Thank you. Note Counter is a subclass of dict so inherited every methods of a regular dict.
from pprint import pprint
from collections import Counter
train_detail = (Counter({2: 50, 0: 62, 1: 38}),
{2: 0.3333333333333333, 0: 0.41333333333333333, 1: 0.25333333333333335})
test_detail = (Counter({2: 6, 0: 49, 1: 4}),
{2: 0.1016949152542373, 0: 0.8305084745762712, 1: 0.06779661016949153})
out = []
for t in train_detail[0]:
out.append({
'label': str(t),
'trainNumber': train_detail[0][t],
'trainPercent': train_detail[1][t],
'testPercent': test_detail[1][t],
'testNumber': test_detail[0][t]
})
# pretty print to screen:
pprint(out)
Prints:
[{'label': '2',
'testNumber': 6,
'testPercent': 0.1016949152542373,
'trainNumber': 50,
'trainPercent': 0.3333333333333333},
{'label': '0',
'testNumber': 49,
'testPercent': 0.8305084745762712,
'trainNumber': 62,
'trainPercent': 0.41333333333333333},
{'label': '1',
'testNumber': 4,
'testPercent': 0.06779661016949153,
'trainNumber': 38,
'trainPercent': 0.25333333333333335}]
Use lambda function.lambda is used to unpack tuples.
Thanks for #Andrej Kesely and #Björn Marschollek
To combine their answers:
labels_detail_list = [{'label': str(i),
'trainNumber': train_detail[0][i],
'trainPercent': train_detail[1][i],
'testNumber': test_detail[0][i],
'testPercent': test_detail[1][i]
} for i in train_detail[0]]
sorted(labels_detail_list, key=lambda x: int(x['label']))
should be more concise version.

Fuzzywuzzy for a list of dictionaries

I have a list of dictionaries (API response) and I use the following function to search for certain nations:
def nation_search(self):
result = next((item for item in nations_v2 if (item["nation"]).lower() == (f"{self}").lower()), False)
if result:
return result
else:
return next((item for item in nations_v2 if (item["leader"]).lower() == (f"{self}").lower()), False)
2 examples :
nations_v2 = [{'nation_id': 5270, 'nation': 'Indo-Froschtia', 'leader': 'Saxplayer', 'continent': 2, 'war_policy': 4, 'domestic_policy': 2, 'color': 15, 'alliance_id': 790, 'alliance': 'Rose', 'alliance_position': 3, 'cities': 28, 'offensive_wars': 0, 'defensive_wars': 0, 'score': 4945, 'v_mode': False, 'v_mode_turns': 0, 'beige_turns': 0, 'last_active': '2020-08-10 04:04:48', 'founded': '2014-08-05 00:09:31', 'soldiers': 0, 'tanks': 0, 'aircraft': 2100, 'ships': 0, 'missiles': 0, 'nukes': 0},
{'nation_id': 582, 'nation': 'Nightsilver Woods', 'leader': 'Luna', 'continent': 4, 'war_policy': 4, 'domestic_policy': 2, 'color': 10, 'alliance_id': 615, 'alliance': 'Seven Kingdoms', 'alliance_position': 2, 'cities': 23, 'offensive_wars': 0, 'defensive_wars': 0, 'score': 3971.25, 'v_mode': False, 'v_mode_turns': 0, 'beige_turns': 0, 'last_active': '2020-08-10 00:22:16', 'founded': '2014-08-05 00:09:35', 'soldiers': 0, 'tanks': 0, 'aircraft': 1725, 'ships': 115, 'missiles': 0, 'nukes': 0}]
I want to add a fuzzy-search using fuzzywuzzy to get 5 possible matches in case there's a spelling error in the argument passed into the function but I can't seem to figure it out.
I only want to search in values for nation and leader.
If you need 5 possible matches, use process.
from fuzzywuzzy import process
def nation_search(self):
nations_only = [ v2['nation'].lower() for v2 in nations_v2 ]
leaders_only = [ v2['leader'].lower() for v2 in nations_v2 ]
matched_nations = process.extract((f"{self}").lower(), nations_only, limit=5)
matched_leaders = process.extract((f"{self}").lower(), leaders_only, limit=5)
return matched_nations, matched_leaders

parse data from Dictionary in python

So i have this dictionary "runs":
[{
'id': 12,
'suite_id': 2,
'name': 'name',
'description': "desc.",
'nice_id': 3,
'joku_id': None,
'onko': False,
'eikai': False,
'tehty': None,
'config': None,
'config_ids': [],
'passed_count': 1,
'blocked_count': 2,
'untested_count': 3,
'retest_count': 4,
'failed_count': 5,
'custom_status1_count': 0,
'custom_status2_count': 0,
'custom_status3_count': 0,
'custom_status4_count': 0,
'custom_status5_count': 0,
'custom_status6_count': 0,
'custom_status7_count': 0,
'projekti_id': 1,
'plan_id': None,
'created_on': 12343214,
'created_by': 11,
'url': 'google.com'
}, {
'id': 16,
'suite_id': 2,
'name': 'namae)',
'description': "desc1",
'nice_id': 5,
'joku_id': None,
'onko': False,
'eikai': False,
'tehty': None,
'config': None,
'config_ids': [],
'passed_count': 100,
'blocked_count': 1,
'untested_count': 3,
'retest_count': 2,
'failed_count': 5,
'custom_status1_count': 0,
'custom_status2_count': 0,
'custom_status3_count': 0,
'custom_status4_count': 0,
'custom_status5_count': 0,
'custom_status6_count': 0,
'custom_status7_count': 0,
'prokti_id': 7,
'plan_id': None,
'created_on': 4321341644,
'created_by': 11,
'url': 'google.com/2' }
there is "id" for about 50 times. that is just a part of it.
i need to find all "id":s (Not joku_ids, ncie_ids etc. Only "id") and make a string/dict of them
and same for name, and description
i have tried:
j = json.load(run)
ids = (j["id"])
j = json.load(run)
names = (j["name"])
j = json.load(run)
descriptions = (j["description"])
but it returns:
AttributeError: 'list' object has no attribute 'read'
I also need to send a request with specific id and in this case the specific id is marked by o. so id[o]
the request code is below:
test = client.send_get('get_tests/1/ ')
so i need to have the id[o] instead of the 1.
i have tried
test = client.send_get('get_tests/' + id[o] + '/ ')
but it returns:
TypeError: 'int' object is not subscriptable
May be this can help you.
id = []
for i in runs :
id.append(i.get('id'))
[12, 16]
You are trying to pass a list to a json.load function. Please read the docs. Load() does not accep lists, it accepts
a .read()-supporting file-like object containing a JSON document
If you want your result in list of dictionary then:
result = [{x:y} for i in range(len(data)) for x,y in data[i].items() if x=='id' or x=='name' or x=='description']
output:
[{'name': 'name'}, {'id': 12}, {'description': 'desc.'}, {'name': 'namae)'}, {'id': 16}, {'description': 'desc1'}]
the data is your list of dictionary data.
hope this answer helpful for you.

Categories