I have a list of pairs of nested dict dd and would like to maintain the structure to a list of dictionaries:
dd = [
[{'id': 'bla',
'detail': [{'name': 'discard', 'amount': '123'},
{'name': 'KEEP_PAIR_1A', 'amount': '2'}]},
{'id': 'bla2',
'detail': [{'name': 'discard', 'amount': '123'},
{'name': 'KEEP_PAIR_1B', 'amount': '1'}]}
],
[{'id': 'bla3',
'detail': [{'name': 'discard', 'amount': '123'},
{'name': 'KEEP_PAIR_2A', 'amount': '3'}]},
{'id': 'bla4',
'detail': [{'name': 'discard', 'amount': '123'},
{'name': 'KEEP_PAIR_2B', 'amount': '4'}]}
]
]
I want to reduce this to a list of paired dictionaries while extracting only some detail. For example, an expected output may look like this:
[{'name': ['KEEP_PAIR_1A', 'KEEP_PAIR_1B'], 'amount': [2, 1]},
{'name': ['KEEP_PAIR_2A', 'KEEP_PAIR_2B'], 'amount': [3, 4]}]
I have run my code:
pair=[]
for all_pairs in dd:
for output_pairs in all_pairs:
for d in output_pairs.get('detail'):
if d['name'] != 'discard':
pair.append(d)
output_pair = {
k: [d.get(k) for d in pair]
for k in set().union(*pair)
}
But it didn't maintain that structure :
{'name': ['KEEP_PAIR_1A', 'KEEP_PAIR_1B', 'KEEP_PAIR_2A', 'KEEP_PAIR_2B'],
'amount': ['2', '1', '3', '4']}
I assume I would need to use some list comprehension to solve this but where in the for loop should I do that to maintain the structure.
Since you want to combine dictionaries in lists, one option is to use dict.setdefault:
pair = []
for all_pairs in dd:
dct = {}
for output_pairs in all_pairs:
for d in output_pairs.get('detail'):
if d['name'] != 'discard':
for k,v in d.items():
dct.setdefault(k, []).append(v)
pair.append(dct)
Output:
[{'name': ['KEEP_PAIR_1A', 'KEEP_PAIR_1B'], 'amount': [2, 1]},
{'name': ['KEEP_PAIR_2A', 'KEEP_PAIR_2B'], 'amount': [3, 4]}]
I have the below list of dictionaries -
results = [
{'type': 'check_datatype',
'kwargs': {'table': 'cars', 'columns': ['car_id','index'], 'd_type': 'str'},
'datasource_path': '/cars_dataset_ok/',
'Result': False},
{'type': 'check_string_consistency',
'kwargs': {'table': 'cars', 'columns': ['car_id'], 'string_length': 6},
'datasource_path': '/cars_dataset_ok/',
'Result': False}
]
I want output list with below output where key and value fields are coming from kwargs key in the above list -
id|key|value|index
[[1,table,cars,null],[1,columns,car_id,1],[1,columns,index,2]
[1,dtype,str,null],[2,table,cars,null],[2,columns,car_id,null],[2,string_length,6,null]]
Update - Now, i want one more column in output - uniquehaschode --> here unique hashcode means Dictionaries with the same keys and values should generate the same id or hash. Hence if key value pairs are same in dictionary 'kwargs', then they should return the same hashcode. Output should be like this -
[[1,table,cars,null,uniquehaschode1],[1,columns,car_id,1,uniquehaschode1],[1,columns,index,2,uniquehaschode1]
[1,dtype,str,null,uniquehaschode1],[2,table,cars,null,uniquehaschode2],[2,columns,car_id,null,uniquehaschode2],[2,string_length,6,null,uniquehaschode2]]
Also, i don't want to insert anything into this table if a particular uniquehaschode already exists.
Update2: I want to create a dataframe with below schema. args_id will be same for each unique pair of (kwargs and check_name). i want to run the above list of dictionaries everyday and hence for different date run, args_id should be same if
unique pair of (kwargs and check_name) has come again. i want to store this result into a dataframe everyday and then put it into my delta table of spark.
Type|time|args_id
check_datatype|2021-03-29|0
check_string_consistency|2021-03-29|1
check_datatype|2021-03-30|0
Until now, i was using below code -
type_results = [[elt['type'] for
elt in results]
checkColumns = ['type']
spark = SparkSession.builder.getOrCreate()
DF = spark.createDataFrame(data=results, schema=checkColumns)
DF = DF.withColumn("time", F.current_timestamp())
DF = DF.withColumn("args_id", F.row_number().over(Window.orderBy(F.monotonically_increasing_id())))
Probably you need:
results = [
{'type': 'check_datatype',
'kwargs': {'table': 'cars', 'columns': ['car_id','index'], 'd_type': 'str'},
'datasource_path': '/cars_dataset_ok/',
'Result': False},
{'type': 'check_string_consistency',
'kwargs': {'table': 'cars', 'columns': ['car_id'], 'string_length': 6},
'datasource_path': '/cars_dataset_ok/',
'Result': False}
]
result_list = []
for c, l in enumerate(results, start=1):
for key, value in l['kwargs'].items():
if isinstance(value,list):
if len(value) == 1:
result_list.append([str(c),key,value[0],'null'])
continue
for i in value:
result_list.append([str(c),key,i,str(value.index(i)+1)])
else:
result_list.append([str(c),key,value,'null'])
print(result_list)
Output:
[['1', 'table', 'cars', 'null'], ['1', 'columns', 'car_id', '1'], ['1', 'columns', 'index', '2'], ['1', 'd_type', 'str', 'null'], ['2', 'table', 'cars', 'null'], ['2', 'columns', 'car_id', 'null'], ['2', 'string_length', 6, 'null']]
As for the Update part you can use pip install maps:
import maps
results = [
{'type': 'check_datatype',
'kwargs': {'table': 'cars', 'columns': ['car_id','index'], 'd_type': 'str'},
'datasource_path': '/cars_dataset_ok/',
'Result': False},
{'type': 'check_string_consistency',
'kwargs': {'table': 'cars', 'columns': ['car_id'], 'string_length': 6},
'datasource_path': '/cars_dataset_ok/',
'Result': False},
{'type': 'check_string_consistency',
'kwargs': {'table': 'cars', 'columns': ['car_id'], 'string_length': 6},
'datasource_path': '/cars_dataset_ok/',
'Result': False}
]
result_list = []
for c, l in enumerate(results, start=1):
h = hash(maps.FrozenMap.recurse(l['kwargs']))
for key, value in l['kwargs'].items():
if isinstance(value,list):
if len(value) == 1:
result_list.append([str(c),key,value[0],'null', f'{h}-{c}'])
continue
for i in value:
result_list.append([str(c),key,i,str(value.index(i)+1),f'{h}-{c}'])
else:
result_list.append([str(c),key,value,'null',f'{h}-{c}'])
print(result_list)
Output:
[['1', 'table', 'cars', 'null', '-6654319495930648246-1'], ['1', 'columns', 'car_id', '1', '-6654319495930648246-1'], ['1', 'columns', 'index', '2', '-6654319495930648246-1'], ['1', 'd_type', 'str', 'null', '-6654319495930648246-1'], ['2', 'table', 'cars', 'null', '-3876605863049152209-2'], ['2', 'columns', 'car_id', 'null', '-3876605863049152209-2'], ['2', 'string_length', 6, 'null', '-3876605863049152209-2'], ['3', 'table', 'cars', 'null', '-3876605863049152209-3'], ['3', 'columns', 'car_id', 'null', '-3876605863049152209-3'], ['3', 'string_length', 6, 'null', '-3876605863049152209-3']]
results = [
{'type': 'check_datatype',
'kwargs': {'table': 'cars', 'columns': ['car_id','index'], 'd_type': 'str'},
'datasource_path': '/cars_dataset_ok/',
'Result': False},
{'type': 'check_string_consistency',
'kwargs': {'table': 'cars', 'columns': ['car_id'], 'string_length': 6},
'datasource_path': '/cars_dataset_ok/',
'Result': False}
]
for each in results:
print(each['kwargs'])
Below is result I got from API query.
[{'type':'book','title': 'example1', 'id': 12456, 'price': '8.20', 'qty': '12', 'status': 'available'},
{'type':'book','title': 'example2', 'id': 12457, 'price': '10.50', 'qty': '5', 'status': 'none'}]
How do I specify in code to get value pairs of title, price, & status only?
So result will be like:
[{'title': 'example1', 'price': '8.20', 'status': 'available'},
{'title': 'example2', 'price': '10.50', 'status': 'none'}]
You can use a dictionary comprehension within a list comprehension:
L = [{'type':'book','title': 'example1', 'id': 12456, 'price': '8.20', 'qty': '12', 'status': 'available'},
{'type':'book','title': 'example2', 'id': 12457, 'price': '10.50', 'qty': '5', 'status': 'none'}]
keys = ['title', 'price', 'status']
res = [{k: d[k] for k in keys} for d in L]
print(res)
[{'price': '8.20', 'status': 'available', 'title': 'example1'},
{'price': '10.50', 'status': 'none', 'title': 'example2'}]
How do I convert the following defaultdict()?
defaultdict(<class 'dict'>, {
'key1_A': {
'id': 'key1',
'length': '663',
'type': 'A'},
'key1_B': {
'id': 'key1',
'length': '389',
'type': 'B'},
'key2_A': {
'id': 'key2',
'length': '865',
'type': 'A'},
'key2_B': {
'id': 'key2',
'length': '553',
'type': 'B' ........}})
the value of the id i.e key1 becomes the key, and the key called length is changed to length_A or B with corresponding values belonging in the earlier type.
defaultdict(<class 'dict'>, {
'key1': {
'length_A': '663',
'length_B': '389'},
'key2': {
'length_A': '865',
'length_B': '553'}})
Thanks,
I think this does what you want:
from collections import defaultdict
import pprint
d = {
'key1_A': {
'id': 'key1',
'length': '663',
'type': 'A',
},
'key1_B': {
'id': 'key1',
'length': '389',
'type': 'B',
},
'key2_A': {
'id': 'key2',
'length': '865',
'type': 'A',
},
'key2_B': {
'id': 'key2',
'length': '553',
'type': 'B',
},
}
transformed = defaultdict(dict)
for v in d.values():
transformed[v["id"]]["length_{}".format(v["type"])] = v["length"]
pprint.pprint(transformed)
# Output:
# defaultdict(<class 'dict'>,
# {'key1': {'length_A': '663', 'length_B': '389'},
# 'key2': {'length_A': '865', 'length_B': '553'}})
I Have a list as following and I want to convert that as output shown below using List comprehensions. Any help is appreciated.
a = [{'type': 'abc', 'values': 1},
{'type': 'abc', 'values': 2},
{'type': 'abc', 'values': 3},
{'type': 'xyz', 'values': 4},
{'type': 'xyz', 'values': 5},
{'type': 'pqr', 'values': 6},
{'type': 'pqr', 'values': 8},
{'type': 'abc', 'values': 9},
{'type': 'mno', 'values': 10},
{'type': 'def', 'values': 11}]
This is the output I am expecting.
output = {'abc': [1,2,3,9], 'xyz': [4,5], 'pqr': [6,8], 'mno': [10], 'def': [11]}
from operator import itemgetter
from itertools import groupby
a = [{'type': 'abc', 'values': 1},
{'type': 'abc', 'values': 2},
{'type': 'abc', 'values': 3},
{'type': 'xyz', 'values': 4},
{'type': 'xyz', 'values': 5},
{'type': 'pqr', 'values': 6},
{'type': 'pqr', 'values': 8},
{'type': 'abc', 'values': 9},
{'type': 'mno', 'values': 10},
{'type': 'def', 'values': 11}]
typegetter = itemgetter('type')
valuesgetter = itemgetter('values')
groups = groupby(sorted(a, key=typegetter), key=typegetter)
print {k:list(map(valuesgetter, v)) for k, v in groups}
a = [{'type': 'abc', 'values': 1},
{'type': 'abc', 'values': 2},
{'type': 'abc', 'values': 3},
{'type': 'xyz', 'values': 4},
{'type': 'xyz', 'values': 5},
{'type': 'pqr', 'values': 6},
{'type': 'pqr', 'values': 8},
{'type': 'abc', 'values': 9},
{'type': 'mno', 'values': 10},
{'type': 'def', 'values': 11}]
output = {}
for item in a:
output[item['type']] = [item['values']] if output.get(item['type'], None) is None else output[item['type']] + [item['values']]
print output