Python: Change a JSON value - python

Let's say I have the following JSON file named output.
{'fields': [{'name': 2, 'type': 'Int32'},
{'name': 12, 'type': 'string'},
{'name': 9, 'type': 'datetimeoffset'},
}],
'type': 'struct'}
If type key has a value datetimeoffset, I would like to change it to dateTime and if If type key has a value Int32, I would like to change it to integer and like this, I have multiple values to replace.
The expected output is
{'fields': [{ 'name': 2, 'type': 'integer'},
{ 'name': 12, 'type': 'string'},
{ 'name': 9, 'type': 'dateTime'},
,
}],
'type': 'struct'}
Can anyone help with this in Python?

You can try this out:
substitute = {"Int32": "integer", "datetimeoffset": "dateTime"}
x = {'fields': [
{'name': 2, 'type': 'Int32'},
{'name': 12, 'type': 'string'},
{'name': 9, 'type': 'datetimeoffset'}
],'type': 'struct'}
for i in range(len(x['fields'])):
if x['fields'][i]["type"] in substitute:
x['fields'][i]['type'] = substitute[x['fields'][i]['type']]
print(x)

You can use the following code. Include in equivalences dict the values you want to replace:
json = {
'fields': [
{'name': 2, 'type': 'Int32'},
{'name': 12, 'type': 'string'},
{'name': 9, 'type': 'datetimeoffset'},
],
'type': 'struct'
}
equivalences = {"datetimeoffset": "dateTime", "Int32": "integer"}
#Replace values based on equivalences dict
for i, data in enumerate(json["fields"]):
if data["type"] in equivalences.keys():
json["fields"][i]["type"] = equivalences[data["type"]]
print(json)
The output is:
{
"fields": [
{
"name": 2,
"type": "integer"
},
{
"name": 12,
"type": "string"
},
{
"name": 9,
"type": "dateTime"
}
],
"type": "struct"
}

simple but ugly way:
json_ ={'fields': [{'name': 2, 'type': 'Int32'},
{'name': 12, 'type': 'string'},
{'name': 9, 'type': 'datetimeoffset'}], 'type': 'struct'}
result = json.loads(json.dumps(json_ ).replace("datetimeoffset", "dateTime").replace("Int32", "integer"))

Related

How to extract group count from dictionary?

I need to get the count of groups which is same 'id' and 'name'
Input:
myd = {
"Items": [
{
"id": 1,
"name": "ABC",
"value": 666
},
{
"id": 1,
"name": "ABC",
"value": 89
},
{
"id": 2,
"name": "DEF",
"value": 111
},
{
"id": 3,
"name": "GHI",
"value": 111
}
]
}
Expected output:
The count of {'id':1, 'name': 'ABC' } is 2
The count of {'id':2, 'name': 'DEF' } is 1
The count of {'id':3, 'name': 'GHI' } is 1
for total length we can get by len(myd) for single key its len(myd['id'])
How to get the count for the combination of id and name
You can use collections.OrderedDict and set both 'id' and 'name' as tuple keys. In this way, the OrderedDict automatically groups the dictionaries with same 'id' and 'name' values in order:
myd = {'Items': [
{'id':1, 'name': 'ABC', 'value': 666},
{'id':1, 'name': 'ABC', 'value': 89},
{'id':2, 'name': 'DEF', 'value': 111 },
{'id':3, 'name': 'GHI', 'value': 111 }]
}
from collections import OrderedDict
od = OrderedDict()
for d in myd['Items']:
od.setdefault((d['id'], d['name']), set()).add(d['value'])
for ks, v in od.items():
print("The count of {{'id': {}, 'name': {}}} is {}".format(ks[0], ks[1], len(v)))
Output:
The count of {'id': 1, 'name': ABC} is 2
The count of {'id': 2, 'name': DEF} is 1
The count of {'id': 3, 'name': GHI} is 1
This is a good candidate for groupby and itemgetter usage:
from itertools import groupby
from operator import itemgetter
myd = {'Items': [
{'id': 1, 'name': 'ABC', 'value': 666},
{'id': 1, 'name': 'ABC', 'value': 89},
{'id': 2, 'name': 'DEF', 'value': 111},
{'id': 3, 'name': 'GHI', 'value': 111}]
}
grouper = itemgetter('id', 'name')
for i, v in groupby(sorted(myd['Items'], key=grouper), key=grouper):
print(f"the count for {dict(id=i[0], name=i[1])} is {len(list(v))}")

How to get the count for a particular key in the dictionary

My content inside a dictionary is below
I need to now for BusinessArea how many different name key is there, like this need to know Designation also
test=
[ { 'masterid': '1', 'name': 'Group1', 'BusinessArea': [ { 'id': '14', 'name': 'Accounting', 'parentname': 'Finance'}, { 'id': '3', 'name': 'Research', 'parentname': 'R & D' } ], 'Designation': [ { 'id': '16', 'name': 'L1' }, { 'id': '20', 'name': 'L2' }, { 'id': '25', 'name': 'L2' }] },
{ 'masterid': '2', 'name': 'Group1', 'BusinessArea': [ { 'id': '14', 'name': 'Research', 'parentname': '' }, { 'id': '3', 'name': 'Accounting', 'parentname': '' } ], 'Role': [ { 'id': '5032', 'name': 'Tester' }, { 'id': '5033', 'name': 'Developer' } ], 'Designation': [ { 'id': '16', 'name': 'L1' }, { 'id': '20', 'name': 'L2' }, { 'id': '25', 'name': 'L2' }]},
{ 'masterid': '3', 'name': 'Group1', 'BusinessArea': [ { 'id': '14', 'name': 'Engineering' }, { 'id': '3', 'name': 'Engineering', 'parentname': '' } ], 'Role': [ { 'id': '5032', 'name': 'Developer' }, { 'id': '5033', 'name': 'Developer', 'parentname': '' } ], 'Designation': [ { 'id': '16', 'name': 'L1' }, { 'id': '20', 'name': 'L2' }, { 'id': '25', 'name': 'L2' }]}]
I want to get the count of masterid of BusinessArea and Designation which is all the names
Expected out is below
[
{
"name": "BusinessArea",
"values": [
{
"name": "Accounting",
"count": "2"
},
{
"name": "Research",
"count": "2"
},
{
"name": "Engineering",
"count": "1"
}
]
},
{
"name": "Designation",
"values": [
{
"name": "L1",
"count": "3"
},
{
"name": "l2",
"count": "3"
}
]
}
]
Try this:
res=[{'name': 'BusinessArea', 'values': []}, {'name': 'Designation', 'values': []}]
listbus=sum([i['BusinessArea'] for i in test], [])
listdes=sum([i['Designation'] for i in test], [])
res[0]['values']=[{'name':i, 'count':0} for i in set(k['name'] for k in listbus)]
res[1]['values']=[{'name':i, 'count':0} for i in set(k['name'] for k in listdes)]
for i in listbus:
for k in range(len(res[0]['values'])):
if i['name']==res[0]['values'][k]['name']:
res[0]['values'][k]['count']+=1
for i in listdes:
for k in range(len(res[1]['values'])):
if i['name']==res[1]['values'][k]['name']:
res[1]['values'][k]['count']+=1
>>> print(res)
[{'name': 'BusinessArea', 'values': [{'name': 'Accounting', 'count': 2}, {'name': 'Research', 'count': 2}, {'name': 'Engineering', 'count': 2}]}, {'name': 'Designation', 'values': [{'name': 'L1', 'count': 3}, {'name': 'L2', 'count': 6}]}]
You could count unique names using a nested collections.defaultdict:
from collections import defaultdict
from json import dumps
keys = ["BusinessArea", "Designation"]
group_counts = defaultdict(lambda: defaultdict(int))
for group in test:
for key in keys:
names = [item["name"] for item in group[key]]
unique_names = list(dict.fromkeys(names))
for name in unique_names:
group_counts[key][name] += 1
print(dumps(group_counts, indent=2))
Which will give you these counts:
{
"BusinessArea": {
"Accounting": 2,
"Research": 2,
"Engineering": 1
},
"Designation": {
"L1": 3,
"L2": 3
}
}
Then you could modify the result to get the list of dicts you expect:
result = [
{
"name": name,
"values": [{"name": value, "count": count} for value, count in counts.items()],
}
for name, counts in group_counts.items()
]
print(dumps(result, indent=2))
Which gives you this:
[
{
"name": "BusinessArea",
"values": [
{
"name": "Accounting",
"count": 2
},
{
"name": "Research",
"count": 2
},
{
"name": "Engineering",
"count": 1
}
]
},
{
"name": "Designation",
"values": [
{
"name": "L1",
"count": 3
},
{
"name": "L2",
"count": 3
}
]
}
]

keep duplicates by key in a list of dictionaries

I have a list of dictionaries, and I would like to obtain those that have the same value in a key:
my_list_of_dicts = [{
'id': 3,
'name': 'John'
},{
'id': 5,
'name': 'Peter'
},{
'id': 2,
'name': 'Peter'
},{
'id': 6,
'name': 'Mariah'
},{
'id': 7,
'name': 'John'
},{
'id': 1,
'name': 'Louis'
}
]
I want to keep those items that have the same 'name', so, I would like to obtain something like:
duplicates: [{
'id': 3,
'name': 'John'
},{
'id': 5,
'name': 'Peter'
},{
'id': 2,
'name': 'Peter'
}, {
'id': 7,
'name': 'John'
}
]
I'm trying (not successfully):
duplicates = [item for item in my_list_of_dicts if len(my_list_of_dicts.get('name', None)) > 1]
I have clear my problem with this code, but not able to do the right sentence
Another concise way using collections.Counter:
from collections import Counter
my_list_of_dicts = [{
'id': 3,
'name': 'John'
},{
'id': 5,
'name': 'Peter'
},{
'id': 2,
'name': 'Peter'
},{
'id': 6,
'name': 'Mariah'
},{
'id': 7,
'name': 'John'
},{
'id': 1,
'name': 'Louis'
}
]
c = Counter(x['name'] for x in my_list_of_dicts)
duplicates = [x for x in my_list_of_dicts if c[x['name']] > 1]
You could use the following list comprehension:
>>> [d for d in my_list_of_dicts if len([e for e in my_list_of_dicts if e['name'] == d['name']]) > 1]
[{'id': 3, 'name': 'John'},
{'id': 5, 'name': 'Peter'},
{'id': 2, 'name': 'Peter'},
{'id': 7, 'name': 'John'}]
my_list_of_dicts = [{
'id': 3,
'name': 'John'
},{
'id': 5,
'name': 'Peter'
},{
'id': 2,
'name': 'Peter'
},{
'id': 6,
'name': 'Mariah'
},{
'id': 7,
'name': 'John'
},{
'id': 1,
'name': 'Louis'
}
]
df = pd.DataFrame(my_list_of_dicts)
df[df.name.isin(df[df.name.duplicated()]['name'])].to_json(orient='records')
Attempt similar to #cucuru
Hopefully Helpful.
Explained in comments what I did differently.
my_list_of_dicts = [{
'id': 3,
'name': 'John'
},{
'id': 5,
'name': 'Peter'
},{
'id': 2,
'name': 'Peter'
},{
'id': 6,
'name': 'Mariah'
},{
'id': 7,
'name': 'John'
},{
'id': 1,
'name': 'Louis'
}
]
# Create a list of names
names = [person.get('name') for person in my_list_of_dicts]
# Add item to list if the name occurs more than once in names
duplicates = [item for item in my_list_of_dicts if names.count(item.get('name')) > 1]
print(duplicates)
produces
[{'id': 3, 'name': 'John'}, {'id': 5, 'name': 'Peter'}, {'id': 2, 'name': 'Peter'}, {'id': 7, 'name': 'John'}]
[Program finished]

How to extract Json data scraped from website

I used Beautiful soup to extract data from a Website. Content is in JSON and I need to extract all the display_name values. I have no clue how to naviagate and print the values I need to save in my CSV.
I tried using some array examples like this one
for productoslvl in soup2.findAll('script',{'id' :'searchResult'}):
element = jsons[0]['display_name']
print (element)
but I keep getting KeyError
This is the JSON data:
{
'page_size': -1,
'refinements': [{
'display_name': 'Brand',
'values': [{
'display_name': 'Acqua Di Parma',
'status': 4,
'value': 900096
}],
'type': 'checkboxes'
}, {
'display_name': 'Bristle Type',
'values': [{
'display_name': 'Addictive',
'status': 1,
'value': 14578019
}, {
'display_name': 'Casual',
'status': 1,
'value': 14578020
}, {
'display_name': 'Chic',
'status': 1,
'value': 14301148
}, {
'display_name': 'Polished',
'status': 1,
'value': 14578022
}],
'type': 'checkboxes'
}, {
'display_name': 'Coverage',
'values': [{
'display_name': 'Balanced',
'status': 1,
'value': 14301025
}, {
'display_name': 'Light',
'status': 1,
'value': 14577894
}, {
'display_name': 'Rich',
'status': 1,
'value': 14577895
}],
'type': 'checkboxes'
}, {
'display_name': 'Formulation',
'values': [{
'display_name': 'Cream',
'status': 1,
'value': 100069
}, {
'display_name': 'Spray',
'status': 1,
'value': 100072
}],
'type': 'checkboxes'
}

creating df to generate json in the given format

I am trying to generate a df to produce this below json.
Json data:
{
"name": "flare",
"children": [
{
"name": "K1",
"children": [
{"name": "Exact", "size": 4},
{"name": "synonyms", "size": 14}
]
},
{
"name": "K2",
"children": [
{"name": "Exact", "size": 10},
{"name": "synonyms", "size": 20}
]
},
{
"name": "K3",
"children": [
{"name": "Exact", "size": 0},
{"name": "synonyms", "size": 5}
]
},
{
"name": "K4",
"children": [
{"name": "Exact", "size": 13},
{"name": "synonyms", "size": 15}
]
},
{
"name": "K5",
"children": [
{"name": "Exact", "size": 0},
{"name": "synonyms", "size": 0}
]
}
]
}
input data:
name Exact synonyms
K1 4 14
K2 10 20
K3 0 5
K4 13 15
K5 0 0
I tried creating df with values in the json but I was not able to get the desired json on df.to_json, please help.
You need reshape data by set_index + stack and then use groupby with apply for nested list of dict:
import json
df = (df.set_index('name')
.stack()
.reset_index(level=1)
.rename(columns={'level_1':'name', 0:'size'})
.groupby(level=0).apply(lambda x: x.to_dict(orient='records'))
.reset_index(name='children')
)
print (df)
name children
0 K1 [{'name': 'Exact', 'size': 4}, {'name': 'synon...
1 K2 [{'name': 'Exact', 'size': 10}, {'name': 'syno...
2 K3 [{'name': 'Exact', 'size': 0}, {'name': 'synon...
3 K4 [{'name': 'Exact', 'size': 13}, {'name': 'syno...
4 K5 [{'name': 'Exact', 'size': 0}, {'name': 'synon...
#convert output to dict
j = { "name": "flare", "children": df.to_dict(orient='records')}
#for nice output - easier check
import pprint
pp = pprint.PrettyPrinter(indent=4)
pp.pprint(j)
{ 'children': [ { 'children': [ {'name': 'Exact', 'size': 4},
{'name': 'synonyms', 'size': 14}],
'name': 'K1'},
{ 'children': [ {'name': 'Exact', 'size': 10},
{'name': 'synonyms', 'size': 20}],
'name': 'K2'},
{ 'children': [ {'name': 'Exact', 'size': 0},
{'name': 'synonyms', 'size': 5}],
'name': 'K3'},
{ 'children': [ {'name': 'Exact', 'size': 13},
{'name': 'synonyms', 'size': 15}],
'name': 'K4'},
{ 'children': [ {'name': 'Exact', 'size': 0},
{'name': 'synonyms', 'size': 0}],
'name': 'K5'}],
'name': 'flare'}
#convert data to json and write to file
with open('data.json', 'w') as outfile:
json.dump(j, outfile)

Categories