Parsing through Nested Json/dict in Python - python

dealing with a nasty bit of JSON. I am using json.load to write into a file and have it stored is a dict type , printed below. In python, how would I go about getting a list of just the "dimension" values starting after ""false_value"" (as they first dimension value is not actually a value I want).
I tried kind of a hacky way, but feel like someone may have a perspective on how to do this in a more eloquent fashion.
Goal, make list of all the dimension values (outside the first) such as ( '100', '121' ...)
{
"reports": [
{
"columnHeader": {
"dimensions": [
"ga:clientId"
],
"metricHeader": {
"metricHeaderEntries": [
{
"name": "blah",
"type": "INTEGER"
}
]
}
},
"data": {
"rows": [
{
"dimensions": [
"false_value"
],
"metrics": [
{
"values": [
"2"
]
}
]
},
{
"dimensions": [
"100"
],
"metrics": [
{
"values": [
"2"
]
}
]
},
{
"dimensions": [
"121"
],
"metrics": [
{
"values": [
"1"
]
}
]
},
{
"dimensions": [
"1212"
],
"metrics": [
{
"values": [
"1"
]
}
]
}, ],
"totals": [
{
"values": [
"10497"
]
}
],
"rowCount": 9028,
"minimums": [
{
"values": [
"0"
]
}
],
"maximums": [
{
"values": [
"9"
]
}
],
"isDataGolden": true
},
"nextPageToken": "1000"
}
]
}

First, you should put your json object in a better textual readable form. Use something like Black to clean up the spaces.
Then just transverse the keys till you find your required value, this post will help you.
You should end up with something like this:
dimensions = [row["dimensions"][0] for row in json["reports"][0]["data"]["rows"]]

Using recursive function to find values with two conditions
Parent key was dimensions
Take only the numeric values
Code
def find_dims(d, inside = False, results = None):
'''
Recursive processing of structure
inside = True when parent was "dimensions"
'''
if results is None:
results = []
if isinstance(d, dict):
for k, v in d.items():
find_dims(v, k=="dimensions" or inside, results)
elif isinstance(d, list):
for k in d:
find_dims(k, inside, results)
else:
if inside and d.isdigit():
# inside dimensions with a number
results.append(int(d))
return results
Test
OP Dictinary (changed true to True)
d = {
"reports": [
{
"columnHeader": {
"dimensions": [
"ga:clientId"
],
"metricHeader": {
"metricHeaderEntries": [
{
"name": "blah",
"type": "INTEGER"
}
]
}
},
"data": {
"rows": [
{
"dimensions": [
"false_value"
],
"metrics": [
{
"values": [
"2"
]
}
]
},
{
"dimensions": [
"100"
],
"metrics": [
{
"values": [
"2"
]
}
]
},
{
"dimensions": [
"121"
],
"metrics": [
{
"values": [
"1"
]
}
]
},
{
"dimensions": [
"1212"
],
"metrics": [
{
"values": [
"1"
]
}
]
}, ],
"totals": [
{
"values": [
"10497"
]
}
],
"rowCount": 9028,
"minimums": [
{
"values": [
"0"
]
}
],
"maximums": [
{
"values": [
"9"
]
}
],
"isDataGolden": True
},
"nextPageToken": "1000"
}
]
}
print(find_dims(d)) # Output: [100, 121, 1212]

Like stated in the comments u can just use a simple recursive function, for example:
all_dimensions = []
search_key = 'dimensions'
def searchDimensions(data):
if isinstance(data, dict):
for (key, sub_data) in data.items():
if key == search_key: all_dimensions.extend(sub_data)
else: all_dimensions.extend(searchDimensions(sub_data))
elif isinstance(data, list):
for sub_data in data:
all_dimensions.extend(searchDimensions(sub_data))
return []
searchDimensions(example)
false_value_index = all_dimensions.index('false_value') + 1
output = all_dimensions[false_value_index:]
print(output)
>>> ['100', '121', '1212']
And then filter the values that u don't want (eg. starting from false_value)

Related

Python equivalent of PHP http_build_query

Here is the PHP code that I want to write in Python.
<?php
$json = '{
"targeting": [
{
"country": {
"allow": [
"US",
"DE"
]
},
"region" : {
"allow" : {
"US" : [
33
],
"DE" : [
10383
]
}
},
"city": {
"allow": {
"US": [
57
],
"DE": [
3324
]
}
},
"os": {
"allow": [
{
"name": "Android",
"comparison": "GTE",
"version": "2.3.1"
},
{
"name": "Apple TV Software",
"comparison": "EQ",
"version": "4.4"
},
{
"name": "Windows",
"comparison": "EQ",
"version": "Vista"
}
]
},
"isp" : {
"allow" : {
"US" : [
"Att"
],
"DE" : [
"Telekom"
]
}
},
"ip": {
"allow": [
"11.12.13.0-17.18.19.22",
"6.0.0.0",
"10.0.0.0-10.0.0.2",
"11.0.0.0/24"
]
},
"device_type": [
"mobile"
],
"browser": {
"allow": [
"Yandex.Browser for iOS",
"SlimBrowser",
"Edge Mobile"
]
},
"brand": {
"allow": [
"Smartbook Entertainment",
"Walton",
"PIPO"
]
},
"sub": {
"allow": {
"1": [
"A",
"B"
]
},
"deny": {
"2": [
"C",
"D"
]
},
"deny_groups": [
{
"1": ""
},
{
"1": "X",
"2": "Y"
}
]
},
"connection": [
"wi-fi",
"cellular"
],
"block_proxy": true,
"affiliate_id": [
1
],
"url": "http://test-url.com"
}
]
}';
$arr = json_decode($json);
$postData = http_build_query($arr);
//POST SomeURLhere
echo urldecode($arr);
What I need is to send this json in this format
targeting[0][country][allow][]=TR
targeting[0][os][allow][][name]=iOS
targeting[1][country][allow][]=DE
targeting[1][os][allow][][name]=iOS
I guess I need to figure out how to use http_build_query in Python.
with referring this answer I found the solution.
from collections.abc import MutableMapping
from urllib.parse import urlencode, unquote
def flatten(dictionary, parent_key=False, separator='.', separator_suffix=''):
"""
Turn a nested dictionary into a flattened dictionary
:param dictionary: The dictionary to flatten
:param parent_key: The string to prepend to dictionary's keys
:param separator: The string used to separate flattened keys
:return: A flattened dictionary
"""
items = []
for key, value in dictionary.items():
new_key = str(parent_key) + separator + key + separator_suffix if parent_key else key
if isinstance(value, MutableMapping):
items.extend(flatten(value, new_key, separator, separator_suffix).items())
elif isinstance(value, list) or isinstance(value, tuple):
for k, v in enumerate(value):
items.extend(flatten({str(k): v}, new_key, separator, separator_suffix).items())
else:
items.append((new_key, value))
return dict(items)
req = {'check': 'command', 'parameters': ({'parameter': '1', 'description':
'2'}, {'parameter': '3', 'description': '4'})}
req = flatten(req, False, '[', ']')
query = urlencode(req)
query_parsed = unquote(query)
print(query)
print(query_parsed)
And the outputs:
check=command&parameters%5B0%5D%5Bparameter%5D=1&parameters%5B0%5D%5Bdescription%5D=2&parameters%5B1%5D%5Bparameter%5D=3&parameters%5B1%5D%5Bdescription%5D=4
check=command&parameters[0][parameter]=1&parameters[0][description]=2&parameters[1][parameter]=3&parameters[1][description]=4

Converting nested JSON structures to Pandas DataFrames

I've been struggling with the nested structure in json, how to convert to correct form
{
"id": "0c576f35-d704-4fa8-8cbb-311c6be36358",
"employee_id": null,
"creator_id": "16ca2db9-206c-4e18-891d-a00a5252dbd3",
"closed_by_id": null,
"request_number": 23,
"priority": "2",
"form_id": "urlaub-weitere-abwesenheiten",
"status": "opened",
"name": "Urlaub & weitere Abwesenheiten",
"read_by_employee": false,
"custom_status": {
"id": 15793,
"name": "In Bearbeitung HR"
},
"due_date": null,
"created_at": "2021-03-29T15:18:37.572040+02:00",
"updated_at": "2021-03-29T15:22:15.590156+02:00",
"closed_at": null,
"archived_at": null,
"attachment_count": 1,
"category": {
"id": "payroll-time-management",
"name": "Payroll, Time & Attendance"
},
"public_comment_count": 0,
"form_data": [
{
"field_id": "subcategory",
"values": [
"Time & Attendance - Manage monthly/year-end consolidation and report"
]
},
{
"field_id": "separator-2",
"values": [
null
]
},
{
"field_id": "art-der-massnahme",
"values": [
"Fortbildung"
]
},
{
"field_id": "bezeichnung-der-schulung-kurses",
"values": [
"dfgzhujiko"
]
},
{
"field_id": "startdatum",
"values": [
"2021-03-26"
]
},
{
"field_id": "enddatum",
"values": [
"2021-03-27"
]
},
{
"field_id": "freistellung",
"values": [
"nein"
]
},
{
"field_id": "mit-bildungsurlaub",
"values": [
""
]
},
{
"field_id": "kommentarfeld_fortbildung",
"values": [
""
]
},
{
"field_id": "separator",
"values": [
null
]
},
{
"field_id": "instructions",
"values": [
null
]
},
{
"field_id": "entscheidung-hr-bp",
"values": [
"Zustimmen"
]
},
{
"field_id": "kommentarfeld-hr-bp",
"values": [
"wsdfghjkmhnbgvfcdxsybvnm,"
]
},
{
"field_id": "individuelle-abstimmung",
"values": [
""
]
}
],
"form_files": [
{
"id": 30129,
"filename": "empty_background.png",
"field_id": "anhang"
}
],
"visible_by_employee": false,
"organization_ids": [],
"need_edit_by_employee": false,
"attachments": []
}
using a simple solution with pandas, dataframe
Request = pd.DataFrame.from_dict(pd.json_normalize(data), orient='columns')
it's displaying almost in its correct form:
how to split a dictionary from columns form_data i form_files, I've done a lot of research, but I'm still having a lot of trouble solving this problem, how to split form_data for columns, no rows for meta to ID
You can do something like this.
pass the dataframe and the column to the function as arguments
def explode_node(child_df, column_value):
child_df = child_df.dropna(subset=[column_value])
if isinstance(child_df[str(column_value)].iloc[0], str):
child_df[column_value] = child_df[str(column_value)].apply(ast.literal_eval)
expanded_child_df = (pd.concat({i: json_normalize(x) for i, x in child_df.pop(str(column_value)).items()}).reset_index(level=1,drop=True).join(child_df, how='right', lsuffix='_left', rsuffix='_right').reset_index(drop=True))
expanded_child_df.columns = map(str.lower, expanded_child_df.columns)
return expanded_child_df

custom json formatting in python

I have the following code to generate json representation of list of lists.
Levels=[['L1','L1','L2'],
['L1','L1','L3'],
['L1','L2'],
['L2','L2','L3'],
['L2','L2','L1'],
['L3','L2'],
['L4','L2','L1'],
['L4','L2','L4']]
def append_path(root, paths):
if paths:
child = root.setdefault(paths[0], {})
append_path(child, paths[1:])
for p in Levels:
append_path(root, p)
def convert(d):
return [{'name': k, 'children': convert(v) if v else [{}]} for k, v in d.items()]
# Print results
import json
print(json.dumps(convert(root), indent=4))
Output:
[
"name": "L1",
"children": [
{
"name": "L1",
"children":[
{
"name":"L3",
"children":[{}]
},
{
"name":"L1",
"children":[{}]
}]
},
{
"name":"L2",
"children":[{}]
}
]
for the levels
Levels=[['L1','L1','L2'],
['L1','L1','L3'],
['L1','L2'],
I also need to encode the count of each level
for eg there is the path from L1 which has two first level childrens L1(2) and L2(1) followed by L2(1) and L3(1) for next level .
L1(3)-->L1(2)-->L2(1)
-->L3(1)
-->L2(1)
How can I encode this count in my json output.
I want my final output to look like this
"name": "L1(3)",
"children": [
{
"name": "L1(2)",
"children":[
root={}
Levels=[['L1','L1','L2'],
['L1','L1','L3'],
['L1','L2'],
['L2','L2','L3'],
['L2','L2','L1'],
['L3','L2'],
['L4','L2','L1'],
['L4','L2','L4']]
def append_path(root, paths):
if paths:
child = root.setdefault(paths[0], {})
append_path(child, paths[1:])
for p in Levels:
append_path(root, p)
def convert(d):
templist=[]
noofchildren=0
if(len(d.items())==0):
return ([{}],1)
for k,v in d.items():
temp,children=convert(v)
noofchildren+=children
if(temp):
templist.append({"name":k+"("+str(children)+")",'children':temp})
else:
templist.append({'name': k+"("+str(children)+")", 'children':[{}]})
return (templist,noofchildren)
# Print results
import json
print(json.dumps(convert(root)[0], indent=2))
OUTPUT
[
{
"name": "L1(3)",
"children": [
{
"name": "L1(2)",
"children": [
{
"name": "L2(1)",
"children": [
{}
]
},
{
"name": "L3(1)",
"children": [
{}
]
}
]
},
{
"name": "L2(1)",
"children": [
{}
]
}
]
},
{
"name": "L2(2)",
"children": [
{
"name": "L2(2)",
"children": [
{
"name": "L3(1)",
"children": [
{}
]
},
{
"name": "L1(1)",
"children": [
{}
]
}
]
}
]
},
{
"name": "L3(1)",
"children": [
{
"name": "L2(1)",
"children": [
{}
]
}
]
},
{
"name": "L4(2)",
"children": [
{
"name": "L2(2)",
"children": [
{
"name": "L1(1)",
"children": [
{}
]
},
{
"name": "L4(1)",
"children": [
{}
]
}
]
}
]
}
]

Extract values from json based on select condition using python

I am trying to Extract values from json based on select condition using python.
My Json file looks like below:
{
"bindings": [
{
"members": [
"user:rohithmn3#gmail.com"
],
"role": "roles/browser"
},
{
"members": [
"serviceAccount:admin-user#linuxacademy-3.iam.gserviceaccount.com",
"user:rohithmn03#gmail.com"
],
"role": "roles/owner"
},
{
"members": [
"user:rohithmn3#gmail.com"
],
"role": "roles/viewer"
}
],
"etag": "BwrRsH-UhJ0=",
"version": 1
}
I am trying to parse this above file in python based on the user. For Example: Get the roles defined for user rohithmn3#gmail.com; as per the json the output should be :
roles/browser
roles/viewer
Regards,
Rohith
Using a list comprehension and dictionary input d:
var = 'rohithmn3#gmail.com'
res = [subd['role'] for subd in d['bindings'] if 'user:'+var in subd['members']]
print(res)
['roles/browser', 'roles/viewer']
Setup
d = {
"bindings": [
{
"members": [
"user:rohithmn3#gmail.com"
],
"role": "roles/browser"
},
{
"members": [
"serviceAccount:admin-user#linuxacademy-3.iam.gserviceaccount.com",
"user:rohithmn03#gmail.com"
],
"role": "roles/owner"
},
{
"members": [
"user:rohithmn3#gmail.com"
],
"role": "roles/viewer"
}
],
"etag": "BwrRsH-UhJ0=",
"version": 1
}

Regex in MongoDB's $cond or $switch condition expression?

For a collection like this
{ _id: 1, name: "novel_1", qty: 15}
{ _id: 2, name: "magazine_1", qty: 5}
{ _id: 3, name: "novel_2", qty: 5}
{ _id: 4, name: "guitar_1", qty: 10}
{ _id: 5, name: "violin_1", qty: 10}
I want to somehow categorize the items based on its name using the $project pipeline. And then get a group-by count out of it.
db.items.aggregate([
{$project: {category: {
$switch: {
branches: [
// use regex here to categorize the items by their name
{case: {$in: ['$name', [/magazine/, /novel/]]},
then: 'book'},
{case: {$in: ['$name', [/guitar/, /violin/]]},
then: 'instrument'}
],
default: 'others'
}
}}},
// get the group-by count based on the category
{$group: {
_id: {category: '$category'},
count: {$sum: '$qty'}
}}
]);
However it seems like MongoDB doesn't support the regex condition expression in $project pipeline. So how can we do this transform-then-group-by query? I guess one way to do it is via the MapReduce, but it is said the performance is not great. Especially I am using python for my app, using MapReduce will tangle the JS code and python code together.
You do not need MapReduce. You can use the Aggregation Framework to do this.
Also note that you don't need to first $project your documents, you can pass the $switch expression to _id
db.items.aggregate(
[
{
"$group": {
"_id": {
"$switch": {
"branches": [
{
"case": {
"$or": [
{
"$gt": [
{
"$indexOfCP": [
"$name",
"magazine"
]
},
-1
]
},
{
"$gt": [
{
"$indexOfCP": [
"$name",
"novel"
]
},
-1
]
}
]
},
"then": "book"
},
{
"case": {
"$or": [
{
"$gt": [
{
"$indexOfCP": [
"$name",
"violin"
]
},
-1
]
},
{
"$gt": [
{
"$indexOfCP": [
"$name",
"guitar"
]
},
-1
]
}
]
},
"then": "instrument"
}
],
"default": "others"
}
},
"count":{"$sum": "$qty"}
}
}
]
)
db.items.aggregate(
[
{
"$group": {
"_id": {
"$switch": {
"branches": [
{
"case": {
"$gt": [
{
"$size": {
"$setInterserction": [
{
"$split": [
"$name",
"-"
]
},
[
"magazine",
"novel"
]
]
}
},
0
]
},
"then": "book"
},
{
"case": {
"$gt": [
{
"$size": {
"$setInterserction": [
{
"$split": [
"$name",
"-"
]
},
[
"guitar",
"violin"
]
]
}
},
0
]
},
"then": "instrument"
}
],
"default": "others"
}
},
"count": {"$sum": "$qty"}
}
}
]
)

Categories