Related
given the list of dictionaries below, I want to do the following things:
1: Sort the following data by key (top level)'name'
2: Sort the by the nested key "name" under key "items"
3: Group values under items by aggregation interval for example "1d"
4: Get again the min max and avg result from step number 3\
Atm, I resolve this by iter down to the values and group them with pandas, aggregate again min max and avg from result.
This way feels really tricky, and the performance is not given.
Can someone help me out?
[
{
'_id': 2,
'name': 'b',
'device': 'b',
'items': [
{
'item_id': 'item_id_2', 'name': 'item_2', 'unit': 'b/s',
'values': [
{'time': datetime.datetime(2022, 9, 5, 15, 0), 'min': 0.0, 'max': 1.0, 'avg': 0.5},
{'time': datetime.datetime(2022, 9, 5, 16, 0), 'min': 0.0, 'max': 1.0, 'avg': 0.5},
{'time': datetime.datetime(2022, 9, 5, 17, 0), 'min': 0.0, 'max': 1.0, 'avg': 0.5},
{'time': datetime.datetime(2022, 9, 5, 18, 0), 'min': 0.0, 'max': 1.0, 'avg': 0.5},
{'time': datetime.datetime(2022, 9, 5, 19, 0), 'min': 0.0, 'max': 1.0, 'avg': 0.5},
{'time': datetime.datetime(2022, 9, 5, 20, 0), 'min': 0.0, 'max': 1.0, 'avg': 0.5},
]
}
]
},
{
'_id': 1,
'name': 'a',
'device': 'a',
'items': [
{
'item_id': 'item_id_1', 'name': 'item_1', 'unit': 'b/s',
'values': [
{'time': datetime.datetime(2022, 9, 5, 15, 0), 'min': 0.0, 'max': 1.0, 'avg': 0.5},
{'time': datetime.datetime(2022, 9, 5, 16, 0), 'min': 0.0, 'max': 1.0, 'avg': 0.5},
{'time': datetime.datetime(2022, 9, 5, 17, 0), 'min': 0.0, 'max': 1.0, 'avg': 0.5},
{'time': datetime.datetime(2022, 9, 5, 18, 0), 'min': 0.0, 'max': 1.0, 'avg': 0.5},
{'time': datetime.datetime(2022, 9, 5, 19, 0), 'min': 0.0, 'max': 1.0, 'avg': 0.5},
{'time': datetime.datetime(2022, 9, 5, 20, 0), 'min': 0.0, 'max': 1.0, 'avg': 0.5},
]
}
]
}
]
As for the result, I would expect something like this:
[
{
'_id': 1,
'name': 'a',
'device': 'a',
'items': [
{
'item_id': 'item_id_1', 'name': 'item_1', 'unit': 'b/s',
'values': [
{'time': datetime.datetime(2022, 9, 5, 0, 0), 'min': 0.0, 'max': 1.0, 'avg': 0.5},
]
}
]
},
{
'_id': 1,
'name': 'b',
'device': 'b',
'items': [
{
'item_id': 'item_id_2', 'name': 'item_2', 'unit': 'b/s',
'values': [
{'time': datetime.datetime(2022, 9, 5, 0, 0), 'min': 0.0, 'max': 1.0, 'avg': 0.5},
]
}
]
}
]
With the initial list of dicts that you provided and that I choose to call data, here is one way to do it:
df = pd.DataFrame(data)
# First, sort values
df = df.assign(temp=df["items"].apply(lambda x: x[0]["name"])).pipe(
lambda df_: df_.sort_values(by="temp").drop(columns="temp").reset_index(drop=True)
)
# Get aggregated as new column 'temp'
dfs = df["items"].apply(lambda x: pd.DataFrame(x[0].pop("values", None)))
df["temp"] = pd.Series(
[
{
k: v[0]
for k, v in df.set_index("time")
.resample("D")
.mean()
.reset_index()
.to_dict(orient="list")
.items()
}
for df in dfs
]
)
df["items"] = df["items"].apply(lambda x: x[0])
# Merge intermediate dictionaries
df["items"] = df.apply(lambda x: x["items"] | {"values": [x["temp"]]}, axis=1)
df = df.drop(columns="temp")
And so:
print(df.to_json(orient="records"))
# Output
[
{
"_id": 1,
"name": "a",
"device": "a",
"items": {
"item_id": "item_id_1",
"name": "item_1",
"unit": "b\\/s",
"values": [{"time": 1662336000000, "min": 0.0, "max": 1.0, "avg": 0.5}],
},
},
{
"_id": 2,
"name": "b",
"device": "b",
"items": {
"item_id": "item_id_2",
"name": "item_2",
"unit": "b\\/s",
"values": [{"time": 1662336000000, "min": 0.0, "max": 1.0, "avg": 0.5}],
},
},
]
I have 2 dataframes
df1 = pd.DataFrame(data={'ID': ['0','1'], 'col1': [0.73, 0.58], 'col2': [0.51, 0.93], 'Type': ['mean', 'mean'] })
df2 = pd.DataFrame(data={'ID': ['0','1'], 'col1': [0.44, 0.49], 'col2': [0.50, 0.24], 'Type': ['std', 'std'] })
print(df1)
print(df2)
I need to convert to nested dictionary like
mydict = {0: {'col1': {'mean': 0.73, 'std': 0.44}, 'col2': {'mean': 0.51, 'std': 0.5}},
1: {'col1': {'mean': 0.58, 'std': 0.49}, 'col2': {'mean': 0.93, 'std': 0.24}}}
where 'ID' as key, column names as nested key and 'Type' as nested keys and column values as values
Use concat with DataFrame.pivot for MultiIndex DataFrame and then convert to nested dict:
df = pd.concat([df1, df2]).pivot('Type', 'ID')
d = {level: df.xs(level, axis=1, level=1).to_dict() for level in df.columns.levels[1]}
print (d)
{'0': {'col1': {'mean': 0.73, 'std': 0.44},
'col2': {'mean': 0.51, 'std': 0.5}},
'1': {'col1': {'mean': 0.58, 'std': 0.49},
'col2': {'mean': 0.93, 'std': 0.24}}}
(df1.drop(columns = 'Type').melt('ID', value_name='mean')
.merge(df2.drop(columns='Type').melt('ID', value_name='std'))
.assign(c = lambda x:x[['mean', 'std']].to_dict('records'))
.pivot('variable','ID', 'c').to_dict())
{'0': {'col1': {'mean': 0.73, 'std': 0.44},
'col2': {'mean': 0.51, 'std': 0.5}},
'1': {'col1': {'mean': 0.58, 'std': 0.49},
'col2': {'mean': 0.93, 'std': 0.24}}}
I have created a for loop and what I want is that the end result of each iteration of the loop to be stored as a dictionary(tfdict). Now what I need is all the dicts to be combined in one dict get that final dict.
for i in range(0,len(sep)):
n=len(sep[i])
tfDict = dict.fromkeys(setwords,0)
for word in sep[i]:
tfDict[word]+=1
tfDict[word] = tfDict[word]/n
x=fin.values()
for word,val in tfDict.items():
for w,v in fin.items():
x = v
if(word==w):
tfDict[word]=val*x
print(tfDict)
here on print this inside the loop, I get the needed output
{'and': 0.0, 'document': 0.23783346831109634, 'first': 0.0, 'is': 0.16666666666666666, 'one': 0, 'second': 0.16666666666666666, 'the': 0.16666666666666666, 'third': 0, 'this': 0.16666666666666666}
{'and': 0.3193817886456925, 'document': 0.0, 'first': 0.0, 'is': 0.16666666666666666, 'one': 0.16666666666666666, 'second': 0, 'the': 0.16666666666666666, 'third': 0.16666666666666666, 'this': 0.16666666666666666}
{'and': 0.0, 'document': 0.24462871026284194, 'first': 0.3021651247531982, 'is': 0.2, 'one': 0, 'second': 0, 'the': 0.2, 'third': 0, 'this': 0.2}
Now, I want all of this outside the loop as well, in from of dict of dict or panda. Is there a way i can do this?
I have two list of dictionaries and wanted to create new list of dictionary from existing two list of dictionaries. dict1 have all the details about person (pid, pname , pscore, sid) and dict2 have details about person with city (pid, cid, cscore) wanted to create new list of dictionary where pid from dict1 matches pid of dict2 and add pid, pname, pscore, cscore from both list of dictionaries where match happened into list of new_dict. Any help will be appreciated. Thanks in advance.
dict1 = [{'pid': [7830351800, 8756822045, 7985031822, 8882181833],
'pname': ['ABC', 'XYZ', 'QWE', 'MNQ'],
'pscore': [0.8, 0.8, 0.8, 0.8],
'sid': 8690694}]
dict2 = [{'pid': 7830351800, 'cid': [1, 2, 3, 4], 'cscore': [0.8, 0.78, 0.7, 0.45]},
{'pid': 8756822045, 'cid': [5, 6, 7, 8], 'cscore': [0.9, 0.88, 0.8, 0.75]},
{'pid': 7985031822, 'cid': [9, 10, 11, 12], 'cscore': [0.5, 0.48, 0.3, 0.25]},
{'pid': 8882181833, 'cid': [2, 13, 14, 15], 'cscore': [0.6, 0.58, 0.5, 0.45]}]
new_dict = [{'pid': 7830351800,
'pname': 'ABC',
'pscore': 0.8,
'cid': [1, 2, 3, 4],
'cscore': [0.8, 0.78, 0.7, 0.45]},
{'pid': 8756822045,
'pname': 'XYZ',
'pscore': 0.8,
'cid': [5, 6, 7, 8],
'cscore': [0.9, 0.88, 0.8, 0.75]},
{'pid': 7985031822,
'pname': 'QWE',
'pscore': 0.8,
'cid': [9, 10, 11, 12],
'cscore': [0.5, 0.48, 0.3, 0.25]},
{'pid': 8882181833,
'pname': 'MNQ',
'pscore': 0.8,
'cid': [2, 13, 14, 15],
'cscore': [0.6, 0.58, 0.5, 0.45]}]
I tried below code but ran into error. I am not able to understand how to solve this. Just started learning python:
new_dict = {}
for k, v in dict1[0].items():
if v[0] in dict2[0]['pid']:
new_dict = dict({'pid': v[0], 'pname' :v[0], 'pscore':v[0], 'cid':dict2[0]['cid'], 'cscore':dict2[0]['score']})
print(new_dict)
dict1 = dict1[0]
pname_dict = {key:value for key,value in zip(dict1['pid'], dict1['pname'])}
pscore_dict = {key:value for key,value in zip(dict1['pid'], dict1['pscore'])}
ans = dict2.copy()
for d in ans:
d['pname'] = pname_dict[d['pid']]
d['pscore'] = pscore_dict[d['pid']]
Output :
>> ans
[{'pid': 7830351800,
'cid': [1, 2, 3, 4],
'cscore': [0.8, 0.78, 0.7, 0.45],
'pname': 'ABC',
'pscore': 0.8},
{'pid': 8756822045,
'cid': [5, 6, 7, 8],
'cscore': [0.9, 0.88, 0.8, 0.75],
'pname': 'XYZ',
'pscore': 0.8},
{'pid': 7985031822,
'cid': [9, 10, 11, 12],
'cscore': [0.5, 0.48, 0.3, 0.25],
'pname': 'QWE',
'pscore': 0.8},
{'pid': 8882181833,
'cid': [2, 13, 14, 15],
'cscore': [0.6, 0.58, 0.5, 0.45],
'pname': 'MNQ',
'pscore': 0.8}]
Create 2 dictionaries to match pid ->pname and pid->pscore. These dictionaries are used to add the other 2 key values to the dict2
I have a list of dictionaries that I would like to filter and create a vector that corresponds to the list values. That list files contains several entries, where each one has a field time, item, state: {values1, value2, value3}. The variable item could take the following 11 values [0.0, 0.1, 0.2, 0.3 ... 1.0]. For each of whose values I would like to find the correspondent value3 values and create a vector of 11 elements where each one should correspond to value3 for the associated item variable. For example if my list is:
my_json = [{'time': datetime.datetime(2018, 7, 4, 13, 42, 55, 613000), 'item': 0.3, 'state': {'value1': 0.0, 'value2': 0.167, 'value3': 0.833}}
{'time': datetime.datetime(2018, 7, 6, 9, 40, 54, 44000), 'item': 0.6, 'state': {'value1': 0.0, 'value2': 0.273, 'value3': 0.727}}
{'time': datetime.datetime(2018, 7, 6, 10, 0, 16, 507000), 'item': 0.5, 'state': {'value1': 0.0, 'value2': 0.0, 'value3': 1.0}}
{'time': datetime.datetime(2018, 7, 6, 10, 37, 16, 769000), 'item': 0.5, 'state': {'value1': 0.0, 'value2': 0.0, 'value3': 1.0}}
{'time': datetime.datetime(2018, 7, 6, 10, 38, 28, 948000), 'item': 0.5, 'state': {'value1': 0.0, 'value2': 0.143, 'value3': 0.857}}
{'time': datetime.datetime(2018, 7, 6, 10, 41, 11, 201000), 'item': 0.4, 'state': {'value1': 0.0, 'value2': 0.091, 'value3': 0.909}}
{'time': datetime.datetime(2018, 7, 6, 11, 45, 25, 145000), 'item': 0.1, 'state': {'value1': 0.0, 'value2': 0.083, 'value3': 0.917}}
{'time': datetime.datetime(2018, 7, 6, 11, 46, 31, 508000), 'item': 0.1, 'state': {'value1': 0.0, 'value2': 0.0, 'value3': 1.0}}
{'time': datetime.datetime(2018, 7, 6, 11, 46, 33, 120000), 'item': 0.1, 'state': {'value1': 0.0, 'value2': 0.214, 'value3': 0.786}}
{'time': datetime.datetime(2018, 7, 6, 12, 36, 25, 695000), 'item': 0.0, 'state': {'value1': 0.0, 'value2': 0.0, 'value3': 1.0}}
{'time': datetime.datetime(2018, 7, 6, 12, 37, 35, 721000), 'item': 0.0, 'state': {'value1': 0.0, 'value2': 0.0, 'value3': 1.0}}]
The desired output of the above example is: [1.0, 0.76, 0.0, 0.833, 0.909, 0.857, 0.727, 0.0, 0.0, 0.0, 0.0] that is keeping also the most recent value (when there are multiple item values) by taking into account time. I have tried to solve it using if-else statements, however, I would like a more elegant solution.
Create a dictionary whose keys are the item values. Loop through my_json, assigning value3 to the corresponding element.
d = {}
for i in my_json:
d[i['item']] = i['state']['value3']
I'm assuming the list is already sorted by the timestamp; if not, sort the list first.
I've extended the above solution to include sorting by time in case it is needed
sorted_list = sorted(my_json, key=lambda k: k['time'])
d = {}
for i in sorted_list:
d[i['item']] = i['state']['value3']