Comparing two Json tree in Python - python

I have this kind of Json tree for folder structure. Is there any way to compare it with same kind of Json tree to get differences (file missing or different file properties (date,crc,..)) and return this as a list with names of different/missing files.
{
"testfolder": {
"children": {
"content.json": {
"last_modified_timestamp": 1485902084.0222416,
"created_timestamp": 1485193414.5027652,
"crc": "7c71cf7ff765ddd78fffcac2eed56ae2",
"type": "file",
"size": 961
},
"config.json": {
"last_modified_timestamp": 1484831126.4821935,
"created_timestamp": 1484830625.6165457,
"crc": "bff5d42e18df483841aa10df8b38cdd4",
"type": "file",
"size": 132
}
}
},
"__init__.py": {
"last_modified_timestamp": 1481651800.7150106,
"created_timestamp": 1481651800.7150106,
"crc": "d41d8cd98f00b204e9800998ecf8427e",
"type": "file",
"size": 0
},
"test.json": {
"last_modified_timestamp": 1486126931.2528062,
"created_timestamp": 1486126732.7074502,
"crc": "8a30d9b3834ef46ad3b996edb06c72bf",
"type": "file",
"size": 1675
},
"test": {
"children": {
"test.txt.txt": {
"last_modified_timestamp": 1486126927.9266162,
"created_timestamp": 1486126865.9750726,
"crc": "b5301fdbf2ba41520b255a651c7017b1",
"type": "file",
"size": 5
}
}
}
}
Thank you for help!

def jsondiff(local,online,path='',todo=[]):
for key in local.keys():
if not online.has_key(key):
if local[key].has_key('children'):
todo = todo + json_path_print(local[key]["children"],path+key+"/")
else:
todo.append(path+key)
else:
if local[key].has_key('children'):
todo=todo+jsondiff(local[key]["children"],online[key]["children"],path+key+"/")
else:
if local[key]["last_modified_timestamp"]>online[key]["last_modified_timestamp"]:
todo.append(path + key)
return todo
Solved it if anyone need solution

Related

A Python script that can navigate a .json and export a .csv based on a search term

I want to take a .json of "_PRESET..." items and their "code-state"s with "actions" that contain other "code-state"s, "appearance"s, and "switch"s and turn it into .csv produced from the actions under a given "_PRESET...", including the "code-state"s and the "actions" listed under their individual entries.
This would allow a user to enter the "_PRESET..." name and receive a 3-column .csv file containing each action's "type", name, and "value". There are of course ways to export the entire .json easily, but I can't fathom a way to navigate it like is needed.
enters "_PRESET_Config_A" for
input.json:
{
"abc_data": {
"_PRESET_Config_A": {
"properties": {
"category": "configuration",
"name": "_PRESET_Config_A",
"collection": null,
"description": ""
},
"actions": {
"EN-R9": {
"type": "code_state",
"value": "on"
}
}
},
"PN4FP": {
"properties": {
"category": "uncategorized",
"name": "PN4FP",
"collection": null,
"description": ""
},
"actions": {
"E_xxxxxx_Default": {
"type": "appearance",
"value": "M_Red"
}
}
},
"HEDIS": {
"properties": {
"category": "uncategorized",
"name": "HEDIS",
"collection": null,
"description": ""
},
"actions": {
"E_xxxxxx_Default": {
"type": "appearance",
"value": "M_Purple"
}
}
},
"_PRESET_Config_B": {
"properties": {
"category": "configuration",
"name": "_PRESET_Config_A",
"collection": null,
"description": ""
},
"actions": {
"HEDIS": {
"type": "code_state",
"value": "on"
}
}
},
"EN-R9": {
"properties": {
"category": "uncategorized",
"name": "EN-R9",
"collection": null,
"description": ""
},
"actions": {
"PN4FP": {
"type": "code_state",
"value": "on"
},
"switch_StorageBin": {
"type": "switch",
"value": "00_w_Storage_Bin_R9"
}
}
}
}
}
Desired output.csv
type,name,value
code_state,EN-R9,on
code_state,PN4FP,on
appearance,E_xxxxxx_Default,M_Red
switch,switch_StorageBin,00_w_Storage_Bin_R9

How to extract data from complex JSON object?

I am trying to extract data from the json file I got from a get request.
{
"data": [
{
"type": "Projects",
"id": "102777c7-50a7-592d-1b65-621d5850a5bb",
"attributes": {
"name": "Hydroelectric Project Updated from Postman",
"projectid": "001"
},
"relationships": {
"Accounts": "Account1"
"Notes": "Note1"
}
},
{
"type": "Projects",
"id": "102c7131-d797-c085-d248-621d5820494f",
"attributes": {
"name": "Ana Hydroelectric Project",
"projectid": "002"
},
"relationships": {
"Accounts": "Account1"
"Notes": "Note1"
}
},
{
"type": "Projects",
"id": "1041f300-5acf-4bd9-2ec4-621d58bbe6bc",
"attributes": {
"name": "Methane Capture Project",
"projectid": "003"
},
"relationships": {
"Accounts": "Account1"
"Notes": "Note1"
}
}
]
}
I have an empty dictionary that stores projectid as Key.
projectids = {
001:"",
002:"",
003:"",
004:"",
}
I was looking for a way to find "projectid" inside "attributes" and the corresponding value for "id" and populate the dictionary projectids with the key(['attributes']['projectid']) and values(id):
{
"001": "102777c7-50a7-592d-1b65-621d5850a5bb",
"002": "102c7131-d797-c085-d248-621d5820494f",
"003": "1041f300-5acf-4bd9-2ec4-621d58bbe6bc",
"004": ""
}
You can try this, assuming data is your variable for the response from the GET request
# this solution will populate for all project ids
projectids = {}
for item in data['data']:
projectids[item['attributes']['projectid']] = item['id']
Output:
{
'001': '102777c7-50a7-592d-1b65-621d5850a5bb',
'002': '102c7131-d797-c085-d248-621d5820494f',
'003': '1041f300-5acf-4bd9-2ec4-621d58bbe6bc'
}
if you're trying to match with already existing projectids in a dict then try
# this solution will search for only pre-specified project ids
projectids = {
"001": "",
"002": "",
"003": "",
"004": "",
}
for idx in projectids.keys():
# find the index of matching dict from data['data']
# will return None if match is not found
matching_index = next((i for i, item in enumerate(data['data']) if
item["attributes"]["projectid"] == idx), None)
if matching_index is not None:
projectids[idx] = data['data'][matching_index]['id']
If data is your input data from the question, then:
projectids = {f"{i:>03}": "" for i in range(1, 5)}
out = {
**projectids,
**{d["attributes"]["projectid"]: d["id"] for d in data["data"]},
}
print(out)
Prints:
{
"001": "102777c7-50a7-592d-1b65-621d5850a5bb",
"002": "102c7131-d797-c085-d248-621d5820494f",
"003": "1041f300-5acf-4bd9-2ec4-621d58bbe6bc",
"004": "",
}
Simply try this:
json_data = {
"data": [
{
"type": "Projects",
"id": "102777c7-50a7-592d-1b65-621d5850a5bb",
"attributes": {
"name": "Hydroelectric Project Updated from Postman",
"projectid": "001"
},
"relationships": {
"Accounts": "Account1",
"Notes": "Note1"
}
},
{
"type": "Projects",
"id": "102c7131-d797-c085-d248-621d5820494f",
"attributes": {
"name": "Ana Hydroelectric Project",
"projectid": "002"
},
"relationships": {
"Accounts": "Account1",
"Notes": "Note1"
}
},
{
"type": "Projects",
"id": "1041f300-5acf-4bd9-2ec4-621d58bbe6bc",
"attributes": {
"name": "Methane Capture Project",
"projectid": "003"
},
"relationships": {
"Accounts": "Account1",
"Notes": "Note1"
}
}
]
}
Just asumme the above json data and try the following code:
project_ids = {item['attributes']['projectid']:item['id'] for item in json_data['data']}
expected output:
{'001': '102777c7-50a7-592d-1b65-621d5850a5bb',
'002': '102c7131-d797-c085-d248-621d5820494f',
'003': '1041f300-5acf-4bd9-2ec4-621d58bbe6bc'}

Use Python to iterate through nested JSON data

TASK:
I am using a API call to get JSON data from our TeamCity CI tool.
We need to identify all those builds which are using old version of msbuild.
We can identify from this API call data
{
"name": "msbuild_version",
"value": "15.0"
}
At the moment i am saving the entire API call data to a file; however i will later integrate the API call to the same script.
Now to the question at hand; How can i filter this above property i.e. msbuild_version, to say msbuild_version < 15.0 (i.e. all msbuild less than version 15.0) and display the corresponding 'id' and 'projectName' under 'buildType'; e.g.
"id": "AIntegration_BTool_BToolBuilds_DraftBuild",
"projectName": "A Integration / B Tool / VAR Builds",
here is a part of the JSON data file:-
{
"project": [{
"id": "_Root",
"buildTypes": {
"buildType": []
}
}, {
"id": "AI_BTool_BToolBuilds",
"buildTypes": {
"buildType": [{
"id": "AI_BTool_BToolBuilds_DraftBuild",
"projectName": "A I / B Tool / VAR Builds",
"steps": {
"step": [ {
"id": "RUNNER_213",
"name": "Build",
"type": "MSBuild",
"properties": {
"property": [ {
"name": "msbuild_version",
"value": "16.0"
}, {
"name": "run-platform",
"value": "x64"
}, {
"name": "targets",
"value": "Build"
}, {
"name": "teamcity.step.mode",
"value": "default"
}, {
"name": "toolsVersion",
"value": "15.0"
}]
}
}, {
"id": "RUNNER_228",
"name": "temp",
"type": "VS.Solution",
"properties": {
"property": [{
"name": "build-file-path",
"value": "x"
}, {
"name": "msbuild_version",
"value": "16.0"
}, {
"name": "vs.version",
"value": "vs2019"
}]
}
}]
}
}, {
"id": "AI_BTool_BToolBuilds_ContinuousBuildWithNexusI",
"projectName": "A I / B Tool / VAR Builds",
"steps": {
"step": [ {
"id": "RUNNER_22791",
"name": "Build",
"type": "MSBuild",
"properties": {
"property": [{
"name": "msbuild_version",
"value": "16.0"
}, {
"name": "run-platform",
"value": "x86"
}, {
"name": "teamcity.step.mode",
"value": "default"
}, {
"name": "toolsVersion",
"value": "15.0"
}]
}
}]
}
}]
}
}, {
"id": "AI_BTool_BToolBuilds_VARApiBuilds",
"buildTypes": {
"buildType": [{
"id": "AI_BTool_BToolBuilds_CiVARNewSolutionContinuousBuild",
"projectName": "A I / B Tool / VAR Builds / VAR API builds",
"steps": {
"step": [ {
"id": "RUNNER_22791",
"name": "Build",
"type": "MSBuild",
"properties": {
"property": [{
"name": "msbuilds_version",
"value": "15.0"
}, {
"name": "toolsVersion",
"value": "15.0"
}]
}
}]
}
}, {
"id": "AI_BTool_BToolBuilds_VARApiBuilds_CiVARIngestionWindowsServiceNonReleaseBranchBuild",
"projectName": "A I / B Tool / VAR Builds / VAR API builds",
"steps": {
"step": [{
"id": "RUNNER_22790",
"name": "Nuget Installer",
"type": "jb.nuget.installer",
"properties": {
"property": [{
"name": "nuget.path",
"value": "%teamcity.tool.NuGet.CommandLine.4.9.2%"
}, {
"name": "msbuilds_version",
"value": "16.0"
}, {
"name": "nuget.use.restore",
"value": "restore"
}, {
"name": "sln.path",
"value": "VAR.sln"
}, {
"name": "teamcity.step.mode",
"value": "default"
}]
}
}]
}
}]
}
}]
}
My Solution till now
And my code snippet till now
import json
with open('UnArchivedBuilds.txt') as api_call:
read_content = json.load(api_call)
#for project in read_content['project']:
# print (project.get('buildTypes'))
for project in read_content['project']:
# print (project['id'])
print (project['buildTypes']['buildType'])
I am not able to decide on the hierarchy of the JSON to print the relevant data (i.e id and projectName) wherever msbuild_version is less than 15.0
I had a look at your JSON data which was broken. In order to work with the snippet you provided I fixed the malformed data and removed unneeded parts to decrease clutter:
{
"project": [{
"buildTypes": {
"buildType": [{
"id": "AIntegration_BTool_BToolBuilds_DraftBuild",
"projectName": "A Integration / B Tool / VAR Builds"
},
{
"id": "AIntegration_BTool_BToolBuilds_ContinuousBuildIntegration",
"projectName": "A Integration / B Tool / VAR Builds"
}]
}
}]
}
As per my comment above I suggested recursion or using a schema validator to boil the JSON data down. However, as a quick-and-dirty approach and due to the weird structure of your dataset, I decided to do a multi-iteration over some parts of your data. Iterating over the same data is quite ugly and considered to be bad practice in most cases.
Assuming the data is stored in a file called input.json, the following snippet should give you the desired output:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import json
with open('input.json') as f:
data = json.load(f)
projects = (element for element in data.get('project'))
build_types = (element.get('buildTypes') for element in projects)
build_types = (element.get('buildType') for element in build_types)
for item in build_types:
for element in item:
identifier = element.get('id')
project_name = element.get('projectName')
print('{} --> {}'.format(identifier, project_name))
Printing:
AIntegration_BTool_BToolBuilds_DraftBuild --> A Integration / B Tool / VAR Builds
AIntegration_BTool_BToolBuilds_ContinuousBuildIntegration --> A Integration / B Tool / VAR Builds

issue in Elastic Search Term Aggregation

In elastic search aggregation query I need to get all the movies watched by the user who watches the movie "Frozen". This is how my Result source
{
"_index": "user",
"_type": "user",
"_id": "ovUowmUBREWOv-CU-4RT",
"_version": 4,
"_score": 1,
"_source": {
"movies": [
"Angry birds 1",
"PINNOCCHIO",
"Frozen",
"Hotel Transylvania 3"
],
"user_id": 86
}
}
This is the query I'm using.
{
"query": {
"match": {
"movies": "Frozen"
}
},
"size": 0,
"aggregations": {
"movies_like_Frozen": {
"terms": {
"field": "movies",
"min_doc_count": 1
}
}
}
}
The result I got in the bucket is correct, but the movie names are splits by white space like this
"buckets": [
{
"key": "3",
"doc_count": 2
},
{
"key": "hotel",
"doc_count": 2
},
{
"key": "transylvania",
"doc_count": 2
},
{
"key": "1",
"doc_count": 1
},
{
"key": "angry",
"doc_count": 1
},
{
"key": "birds",
"doc_count": 1
}
]
How can I get buckets with "Angry birds 1", "Hotel Transylvania 3" as result.
Please help.
In elasticsearch 6.x, every text field is analyzed implicitly. To override this, you need to create a mapping for text type fields as not_analyzed in an index, then insert documents in it.
In your case,
{
"mappings": {
"user": {
"properties": {
"movies": {
"type": "text",
"index": "not_analyzed",
"fields": {
"keyword": {
"type": "text",
"index": "not_analyzed"
}
}
},
"user_id": {
"type": "long"
}
}
}
}
}
Hope it works.

"object mapping [prices] can't be changed from nested to non-nested" on Bulk Python

I'm trying to insert a doc in ElasticSearch but every time i try to insert in python, its return me an error. But if i try to insert from Kibana or cUrl, its succeed.
I already tried the elasticserach-dsl but i've got the same error.
(Sorry for my bad english, i'm from brazil :D)
Error i've got:
elasticsearch.helpers.BulkIndexError: ((...)'status': 400, 'error': {'type':
'illegal_argument_exception', 'reason': "object mapping [prices] can't be changed from nested to non-nested"}}}])
My code:
from elasticsearch import Elasticsearch
from elasticsearch.helpers import bulk
doc = [{
"_index": "products",
"_type": "test_products",
"_source": {
[...]
"prices": {
"latest": {
"value": 89,
"when": 1502795602848
},
"old": [
{
"value": 0,
"when": 1502795602848
}
]
},
"sizes": [
{
"name": "P",
"available": True
},
{
"name": "M",
"available": True
}
],
"created": "2017-08-15T08:13:22.848284"
}
}]
bulk(self.es, doc, index="products")
My ES mapping:
{
"test_products": {
"mappings": {
"products": {
"properties": {
"approved": {
"type": "boolean"
},
"available": {
"type": "boolean"
},
"brand": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"buyClicks": {
"type": "integer"
},
"category": {
"type": "keyword"
},
"code": {
"type": "keyword"
},
"color": {
"type": "nested",
"properties": {
"name": {
"type": "keyword"
},
"value": {
"type": "keyword"
}
}
},
"created": {
"type": "date"
},
"description": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"gender": {
"type": "keyword"
},
"images": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"likes": {
"type": "integer"
},
"link": {
"type": "keyword"
},
"name": {
"type": "text",
"term_vector": "yes",
"analyzer": "nGram_analyzer",
"search_analyzer": "whitespace_analyzer"
},
"prices": {
"type": "nested",
"properties": {
"latest": {
"type": "nested",
"properties": {
"value": {
"type": "long"
},
"when": {
"type": "date",
"format": "dd-MM-yyyy||epoch_millis"
}
}
},
"old": {
"type": "nested",
"properties": {
"value": {
"type": "long"
},
"when": {
"type": "date",
"format": "dd-MM-yyyy||epoch_millis"
}
}
}
}
},
"redirectClicks": {
"type": "integer"
},
"sizes": {
"type": "nested",
"properties": {
"available": {
"type": "boolean"
},
"name": {
"type": "keyword"
},
"quantity": {
"type": "integer"
}
}
},
"slug": {
"type": "keyword"
},
"store": {
"type": "keyword"
},
"subCategories": {
"type": "nested",
"properties": {
"name": {
"type": "keyword"
},
"value": {
"type": "keyword"
}
}
},
"tags": {
"type": "text",
"fields": {
"raw": {
"type": "text",
"term_vector": "yes",
"analyzer": "nGram_analyzer",
"search_analyzer": "whitespace_analyzer"
}
}
},
"thumbnails": {
"type": "keyword"
}
}
}
}
}
}

Categories