Find and replace part of dictionary values in python - python

I have a python dictionary and I would like to find and replace part of the characters in the values of the dictionary. I'm using python 2.7.
My dictionary is
data1 = {'customer_order': {'id': '20'},
'patient':
{'birthdate': None,
'medical_proc': None,
'medical_ref': 'HG_CTRL12',
'name': 'Patient_96',
'sex': None},
'physician_name': 'John Doe'
}
I would like to change the underscore to backslash underscore only in the values of the dictionary, in this case only for Patient_96 and HG_CTRL12.
I would like to change it to the following:
data1 = {'customer_order': {'id': '20'},
'patient':
{'birthdate': None,
'medical_proc': None,
'medical_ref': 'HG\_CTRL12',
'name': 'Patient\_96',
'sex': None},
'physician_name': 'John Doe'
}
Thank you for your help

This function recursively replaces the underscore in the values of the dictionary with replace_char:
def replace_underscores(a_dict, replace_char):
for k, v in a_dict.items():
if not isinstance(v, dict):
if v and '_' in v:
a_dict[k] = v.replace('_', replace_char)
else:
replace_underscores(v, replace_char)
More on isinstance() here.

>>> for i in data1:
... if type(data1[i]) is str:
... if data1[i]:
... data1[i] = data1[i].replace('_','\_')
... elif type(data1[i]) is dict:
... for j in data1[i]:
... if data1[i][j]:
... data1[i][j] = data1[i][j].replace('_','\_')
...
>>>
>>>
>>> data1
{'physician_name': 'John Doe', 'customer_order': {'id': '20'}, 'patient': {'medical_ref': 'HG\\_CTRL12', 'medical_proc': None, 'name': 'Patient\\_96', 'birthdate': None, 'sex': None}}

Related

Add dictionary if key value is empty using python

I have a dictionary with missing values (the key is there, but the associated value is empty). For example I want the dictionary below:
dct = {'ID': '', 'gender': 'male', 'age': '20', 'weight': '', 'height': '5.7'}
to be changed to this form:
dct = {'ID': {'link': '','value': ''}, 'gender': 'male', 'age': '20', 'weight': {'link': '','value': ''}, 'height': '5.7'}
I want the ID and Weight key should be replaced with nested dictionary if its empty.
How can I write that in the most time-efficient way?
I have tried solutions from below links but didnt work,
def update(orignal, addition):
for k, v in addition.items():
if k not in orignal:
orignal[k] = v
else:
if isinstance(v, dict):
update(orignal[k], v)
elif isinstance(v, list):
for i in range(len(v)):
update(orignal[k][i], v[i])
else:
if not orignal[k]:
orignal[k] = v
Error: TypeError: 'str' object does not support item assignment
Fill missing keys by comparing example json in python
Adding missing keys in dictionary in Python
It seems similar with this issue https://stackoverflow.com/a/3233356/6396981
import collections.abc
def update(d, u):
for k, v in u.items():
if isinstance(v, collections.abc.Mapping):
d[k] = update(d.get(k, {}) or {}, v)
else:
d[k] = v
return d
For example in your case:
>>> dict1 = {'ID':'', 'gender':'male', 'age':'20', 'weight':'', 'height':'5.7'}
>>> dict2 = {'ID': {'link':'','value':''}, 'weight': {'link':'','value':''}}
>>>
>>> update(dict1, dict2)
{'ID': {'link': '', 'value': ''}, 'gender': 'male', 'age': '20', 'weight': {'link': '', 'value': ''}, 'height': '5.7'}
>>>
You can iterate through the list and see if the value is an empty string('') if it is, replace it with the default value. Here's a small snippet which does it -
dct = {'ID':'', 'gender':'male', 'age':'20', 'weight':'', 'height':'5.7'}
def update(d, default):
for k, v in d.items():
if v == '':
d[k] = default.copy()
update(dct, {'link':'','value':''})
print(dct)
Output :
{'ID': {'link': '', 'value': ''}, 'gender': 'male', 'age': '20', 'weight': {'link': '', 'value': ''}, 'height': '5.7'}
Note that the dict is passed by reference to the function, so any updates made there will be reflected in the original dictionary as well as seen in the above example.
If your dict is nested and you want the replacement to be done for nested items as well then you can use this function -
def nested_update(d, default):
for k, v in d.items():
if v == '':
d[k] = default.copy()
if isinstance(v, list):
for item in v:
nested_update(item, default)
if isinstance(v, dict):
nested_update(v, default)
here's a small example with list of dictionaries and nested dictionary -
dct = {'ID':'', 'gender':'male', 'age':'20', 'weight':'', 'height':'5.7', "list_data":[{'empty': ''}, {'non-empty': 'value'}], "nested_dict": {"key1": "val1", "missing_nested": ""}}
nested_update(dct, {'key1': 'val1-added', 'key2': 'val2-added'})
print(dct)
Output :
{'ID': {'key1': 'val1-added', 'key2': 'val2-added'}, 'gender': 'male', 'age': '20', 'weight': {'key1': 'val1-added', 'key2': 'val2-added'}, 'height': '5.7', 'list_data': [{'empty': {'key1': 'val1-added', 'key2': 'val2-added'}}, {'non-empty': 'value'}], 'nested_dict': {'key1': 'val1', 'missing_nested': {'key1': 'val1-added', 'key2': 'val2-added'}}}
For "this default dictionary to only specified keys like ID and Weight and not for other keys", you can update the condition of when we replace the value -
def nested_update(d, default):
for k, v in d.items():
if k in ('ID', 'weight') and v == '':
d[k] = default.copy()
if isinstance(v, list):
for item in v:
nested_update(item, default)
if isinstance(v, dict):
nested_update(v, default)

Python, sort dict based on external list

I have to sort a dict like:
jobs = {'elem_05': {'id': 'fifth'},
'elem_03': {'id': 'third'},
'elem_01': {'id': 'first'},
'elem_00': {'id': 'zeroth'},
'elem_04': {'id': 'fourth'},
'elem_02': {'id': 'second'}}
based on the "id" elements, whose order can be found in a list:
sorting_list = ['zeroth', 'first', 'second', 'third', 'fourth', 'fifth']
The trivial way to solve the problem is to use:
tmp = {}
for x in sorting_list:
for k, v in jobs.items():
if v["id"] == x:
tmp.update({k: v})
but I was trying to figure out a more efficient and pythonic way.
I've been trying sorted and lambda functions as key, but I'm not familiar with that yet, so I was unsuccessful so far.
I would use a dictionary as key for sorted:
order = {k:i for i,k in enumerate(sorting_list)}
# {'zeroth': 0, 'first': 1, 'second': 2, 'third': 3, 'fourth': 4, 'fifth': 5}
out = dict(sorted(jobs.items(), key=lambda x: order.get(x[1].get('id'))))
output:
{'elem_00': {'id': 'zeroth'},
'elem_01': {'id': 'first'},
'elem_02': {'id': 'second'},
'elem_03': {'id': 'third'},
'elem_04': {'id': 'fourth'},
'elem_05': {'id': 'fifth'}}
There is a way to sort the dict using lambda as a sorting key:
jobs = {'elem_05': {'id': 'fifth'},
'elem_03': {'id': 'third'},
'elem_01': {'id': 'first'},
'elem_00': {'id': 'zeroth'},
'elem_04': {'id': 'fourth'},
'elem_02': {'id': 'second'}}
sorting_list = ['zeroth', 'first', 'second', 'third', 'fourth', 'fifth']
sorted_jobs = dict(sorted(jobs.items(), key=lambda x: sorting_list.index(x[1]['id'])))
print(sorted_jobs)
This outputs
{'elem_00': {'id': 'zeroth'}, 'elem_01': {'id': 'first'}, 'elem_02': {'id': 'second'}, 'elem_03': {'id': 'third'}, 'elem_04': {'id': 'fourth'}, 'elem_05': {'id': 'fifth'}}
I have a feeling the sorted expression could be cleaner but I didn't get it to work any other way.
You can use OrderedDict:
from collections import OrderedDict
sorted_jobs = OrderedDict([(el, jobs[key]['id']) for el, key in zip(sorting_list, jobs.keys())])
This creates an OrderedDict object which is pretty similar to dict, and can be converted to dict using dict(sorted_jobs).
Similar to what is already posted, but with error checking in case id doesn't appear in sorting_list
sorting_list = ['zeroth', 'first', 'second', 'third', 'fourth', 'fifth']
jobs = {'elem_05': {'id': 'fifth'},
'elem_03': {'id': 'third'},
'elem_01': {'id': 'first'},
'elem_00': {'id': 'zeroth'},
'elem_04': {'id': 'fourth'},
'elem_02': {'id': 'second'}}
def custom_order(item):
try:
return sorting_list.index(item[1]["id"])
except ValueError:
return len(sorting_list)
jobs_sorted = {k: v for k, v in sorted(jobs.items(), key=custom_order)}
print(jobs_sorted)
The sorted function costs O(n log n) in average time complexity. For a linear time complexity you can instead create a reverse mapping that maps each ID to the corresponding dict entry:
mapping = {d['id']: (k, d) for k, d in jobs.items()}
so that you can then construct a new dict by mapping sorting_list with the ID mapping above:
dict(map(mapping.get, sorting_list))
which, with your sample input, returns:
{'elem_00': {'id': 'zeroth'}, 'elem_01': {'id': 'first'}, 'elem_02': {'id': 'second'}, 'elem_03': {'id': 'third'}, 'elem_04': {'id': 'fourth'}, 'elem_05': {'id': 'fifth'}}
Demo: https://replit.com/#blhsing/WorseChartreuseFonts

Fastest method: for value in DictA, find value in DictB and retrieve other DictB value?

For each item in dictA, I want to search for it in dictB, if dictB has it then I want to pull some other values from dictB and add it to dictA.
An example that is working is here, however it is rather slow as I have 50,000+ items to search through and it will perform this similar function on multiple dicts.
Is there a fast method of performing this search?
dictA = [
{'id': 12345},
{'id': 67890},
{'id': 11111},
{'id': 22222}
]
dictB = [
{'id': 63351, 'name': 'Bob'},
{'id': 12345, 'name': 'Carl'},
{'id': 59933, 'name': 'Amy'},
{'id': 11111, 'name': 'Chris'}
]
for i in dictA:
name = None
for j in dictB:
if i['id'] == j['id']:
name = j['name']
i['name'] = name
The dictA output after this would be:
dictA = [
{'id': 12345, 'name': 'Carl'},
{'id': 67890, 'name': None},
{'id': 11111, 'name': 'Chris'},
{'id': 22222, 'name': None}
]
The given is list of dict. You can create dict from that assuming id is uninque. Converting from list of dict to dict will work for your case.
dictA = [
{'id': 12345},
{'id': 67890},
{'id': 11111},
{'id': 22222}
]
dictB = [
{'id': 63351, 'name': 'Bob'},
{'id': 12345, 'name': 'Carl'},
{'id': 59933, 'name': 'Amy'},
{'id': 11111, 'name': 'Chris'}
]
actual_dictB = dict()
for d in dictB:
actual_dictB[d['id']] = d['name']
for i in dictA:
i['name'] = actual_dictB.pop(i['id'], None) # now search have became O(1) constant. So best time complexity achived O(n) n=length of dictA
print(dictA)
Follow up for additional question:
actual_dictB = dict()
for d in dictB:
id_ = d['id']
d.pop('id')
actual_dictB[id_] = d
tmp = dict([(k,None) for k in dictB[0].keys() if k!='id'])
for i in dictA:
if i['id'] not in actual_dictB:
i.update(tmp)
else:
i.update(actual_dictB[i['id']])
print(dictA)

Remove duplicates in python dictionary

I have a list of dictionaries in python and I would like to override old value with duplicate value. Please let me know how can I do.
{'message': [{'name': 'raghav', 'id': 10}, {'name': 'raghav', 'id': 11}]}
Output should be:
{'message': [ {'name': 'raghav', 'id': 11}]}
I don't know what you mean by "override old value with duplicate value". If you mean just picking the second dict from the list, you could:
print({k: [v[1]] for (k, v) in data.items()})
If the idea is to update the "name" with a newer value of "id" as you move along the list, then maybe:
def merge_records(data):
records = data['message']
users = {}
for record in records:
name = record['name']
id_ = record['id']
users[name] = id_
new_records = []
for name, id_ in users.items():
new_records.append({'name': name, 'id': id_})
return {'message': new_records}
But, if you have any control over how the data is represented, you might reconsider. You probably want a different data structure.
Here you go:
d = {'message': [{'name': 'raghav', 'id': 10}, {'name': 'raghav', 'id': 11}]}
#loop over outer dictionary
for key, value in d.items():
d[key] = [dict([t for k in value for t in k.items()])]
print(d)
Edit:
As per your requirement:
d = {'message': [ {'name': 'raghav', 'id': 11}, {'name': 'krish', 'id': 20}, {'name': 'anu', 'id': 30}]}
for key, value in d.items():
print [dict((k1,v1)) for k1,v1 in dict([tuple(i.items()) for i in value for val in i.items()]).items()]

Convert nested dictionary into a dictionary

I have a list of dictionary like this
[
{'id':1, 'name': 'name1', 'education':{'university':'university1', 'subject': 'abc1'}},
{'id':2, 'name': 'name2', 'education':{'university':'university2', 'subject': 'abc2'}},
{'id':3, 'name': 'name3', 'education':{'university':'university3', 'subject': 'abc3'}},
]
and I want to convert it like
[
{'id':1, 'name': 'name1', 'university':'university1', 'subject': 'abc1'},
{'id':2, 'name': 'name2', 'university':'university2', 'subject': 'abc2'},
{'id':3, 'name': 'name3', 'university':'university3', 'subject': 'abc3'},
]
is there any pythonic way to solve this.
You could simply do the following:
l = [...]
for d in l:
d.update(d.pop('education', {}))
# l
[{'id': 1, 'name': 'name1', 'subject': 'abc1', 'university': 'university1'},
{'id': 2, 'name': 'name2', 'subject': 'abc2', 'university': 'university2'},
{'id': 3, 'name': 'name3', 'subject': 'abc3', 'university': 'university3'}]
Depending if you want to transform the original list or if you want to return a new one you could go for one of these two approaches:
l = [
{'id':1, 'name': 'name1', 'education':{'university':'university1', 'subject': 'abc1'}},
{'id':2, 'name': 'name2', 'education':{'university':'university2', 'subject': 'abc2'}},
{'id':3, 'name': 'name3', 'education':{'university':'university3', 'subject': 'abc3'}},
]
def flattenReturn(input):
output = {key: value for key, value in input.items() if type(value) != dict}
for value in input.values():
if type(value) == dict:
output.update(value)
return output
def flattenTransform(d):
for key, value in list(d.items()):
if isinstance(value, dict):
d.update(d.pop(key))
print(list(map(flattenReturn, l)))
print(l)
print("-"*80)
map(flattenTransform, l)
print(l)
As you can see flattenReturn generates a new dict filtering the values which are dictionaries and then updates it with their key-values to flatten it while the second option modifies the dict in place. If the size of the data is big, a solution including generators should be prefered.

Categories