Dictionary with same keys in python - python

I am trying to create a json object using Dictionary in python. As far as I understand the key part needs to be unique but in my case the array has multiple items with the same key so looks like Dictionary will not work for me here. Trying to understand my options here? Finally I will be saving this json object into a json file on the server.
data = {}
data['key1'] = hostname
for line in pipe.stdout:
parts = line.split() # split line into parts
if len(parts) > 1: # if at least 2 parts/columns
data['package'] = { 'name': parts[0], 'installed': parts[1], 'available': parts[2]}
print(json.dumps(data, indent=4))
Expected Json Output
{
"key1": "xyz-abc-m001",
"package": [
{ "name":"abc", "installed":"1:1", "available":"1:1.2." },
{ "name":"xyz", "installed":"2.02", "available":"2.02" },
{ "name":"zyc", "installed":"1.17.1", "available":"1.17.1" }
]
}

data = {}
data['key1'] = hostname
data['package'] = []
for line in pipe.stdout:
parts = line.split() # split line into parts
if len(parts) > 1: # if at least 2 parts/columns
data['package'].append({ 'name': parts[0], 'installed': parts[1], 'available': parts[2]})

Related

"TypeError: list indices must be integers or slices, not str" when trying to change keys

I want to remove some problematic $oid and everything that contains $ in a json file. I wrote:
import json
with open('C:\\Windows\\System32\\files\\news.json', 'r', encoding="utf8") as handle:
data = [json.loads(line) for line in handle]
for k,v in data[0].items():
#check if key has dict value
if type(v) == dict:
#find id with $
r = list(data[k].keys())[0]
#change value if $ occurs
if r[0] == '$':
data[k] = data[k][r]
print(data)
But I get TypeError: list indices must be integers or slices, not str. I know it is because the json dictionaries are made redeable for Python, but how do I fix it?
Edit: the .json file in my computer looks like this:
{
"_id": {
"$oid": "5e7511c45cb29ef48b8cfcff"
},
"description": "some text",
"startDate": {
"$date": "5e7511c45cb29ef48b8cfcff"
},
"completionDate": {
"$date": "2021-01-05T14:59:58.046Z"
}
}
I believe this is because your k is a str and you try to call data[k]?
It will be better if you show the format of the json as well.
Updating with answer.
This should work for the given json. But if you want to for a larger file. looping can be tricky, specially because you're trying to modify the keys of a dictionary.
import json
line = '{"_id": { "$oid": "5e7511c45cb29ef48b8cfcff" }, "description": "some text", "startDate": { "$date": "5e7511c45cb29ef48b8cfcff"},"completionDate": {"$date": "2021-01-05T14:59:58.046Z"}}'
data = [json.loads(line)]
for k,v in data[0].items():
if type(v) == dict:
for k2, v2 in data[0][k].items():
if k2[0] == '$':
formatted = k2[1:]
del data[0][k][k2]
data[0][k][formatted] = v2
print(data)
# import json
# with open('C:\\Windows\\System32\\files\\news.json', 'r', encoding="utf8") as handle:
# data = [json.loads(line) for line in handle]
data = [
{
"_id": {
"$oid": "5e7511c45cb29ef48b8cfcff"
},
"description": "some text",
"startDate": {
"$date": "5e7511c45cb29ef48b8cfcff"
},
"completionDate": {
"$date": "2021-01-05T14:59:58.046Z"
}
}
]
for d in data:
for k, v in d.items():
# check if key has dict value
del_keys = set()
if type(v) == dict:
# find id with $
del_keys.update([i for i in v if i.startswith("$")])
[v.pop(key) for key in del_keys]
print(data)
# [{'_id': {}, 'description': 'some text', 'startDate': {}, 'completionDate': {}}]

Iterate / Loop thru a json file using python multiple times

Ive a json file,
{
"IGCSE":[
{
"rolename": "igcsesubject1",
"roleid": 764106550863462431
},
{
"rolename": "igcsesubject2",
"roleid": 764106550863462431
}
],
"AS":[
{
"rolename": "assubject1",
"roleid": 854789476987546
},
{
"rolename": "assubject2",
"roleid": 854789476987546
}
],
"A2":[
{
"rolename": "a2subject1",
"roleid": 854789476987856
},
{
"rolename": "a2subject2",
"roleid": 854789476987856
}
]
}
I want to fetch the keys [igcse, as, a2..] and then fetch the rolename and roleids under the specific keys. How do i do it?
Below is the python code for how i used to do it without the keys.
with open(fileloc) as f:
data = json.load(f)
for s in range(len(data)):
d1 = data[s]
rname = d1["rolename"]
rid = d1["roleid"]
any help would be appreciated :)
First you can have a list of keys, under which you will get them:
l = ['A1','A2']
Then iterate like this:
for x in data:
if x in l:
for y in range(len(data[x])):
print(j[x][y]['rolename'])
print(j[x][y]['roleid'])
hi you can use for and you will get the keys:
with open(fileloc) as f:
data = json.load(f)
for s in data:
d1 = data[s]
rname = d1["rolename"]
rid = d1["roleid"]
The following would work for what you need:
with open(file) as f:
json_dict = json.load(f)
for key in json_dict:
value_list = json_dict[key]
for item in value_list:
rname = item["rolename"]
rid = item["roleid"]
If you need to filter for specific keys in the JSON, you can have a list of keys you want to obtain and filter for those keys as you iterate through the keys (similar to Wasif Hasan's suggestion above).

How to group dictionary elements into lists dynamically?

I have JSON file as mentioned below,
**test.json**
{
"header1" :
{
"header1_body1":
{
"some_key":"some_value",
.......................
},
"header1_body2":
{
"some_key":"some_value",
.......................
}
},
"header2":
{
"header2_body1":
{
"some_key":"some_value",
.......................
},
"header2_body2":
{
"some_key":"some_value",
.......................
}
}
}
Would like to group the JSON content into lists as below:
header1 = ['header1_body1','header1_body2']
header2 = ['header2_body1','header2_body2']
header1, header2 can be till ....header n. So dynamically lists has to be created containing it's values as shown above.
How can i achieve this ?
What's the best optimal way to approach ?
SOLUTION:
with open('test.json') as json_data:
d = json.load(json_data)
for k,v in d.iteritems():
if k == "header1" or k == "header2":
globals()['{}'.format(k)] = d[k].keys()
now, header1 and header2 can be accessed as list.
for i in header1:
print i
Assuming you read the JSON into a variable d (maybe using json.loads), you could iterate over the keys (sorted?) and build the lists with the keys of current value:
for key in sorted(d.keys()):
l = [x for x in sorted(d[key].keys())] # using list comprehension
print(key + ' = ' + str(l))
Fixing your json structure:
{
"header1" :
{
"header1_body1":
{
"some_key":"some_value"
},
"header1_body2":
{
"some_key":"some_value"
}
},
"header2":
{
"header2_body1":
{
"some_key":"some_value"
},
"header2_body2":
{
"some_key":"some_value"
}
}
}
And then loading and creating lists:
header = []
for key, value in dictdump.items():
header.append(list(value.keys()))
for header_num in range(0, len(header)):
print("header{} : {}".format(header_num + 1, header[header_num]))
Gives:
header1 : ['header1_body1', 'header1_body2']
header2 : ['header2_body1', 'header2_body2']
Once you load your json, you can get the list you want for any key by doing something like the following (headers variable below is a placeholder for your loaded json). You don't need to convert it to a list to work with it as an iterable but wrapped it in list(...) to match the output in your question.
list(headers['header1'].keys())
If you need to actually store the list of keys for each of your "header" dicts in some sort of accessible format, then you could create another dictionary that contains the lists you want. For example:
import json
data = """{
"header1" : {
"header1_body1": {
"some_key":"some_value"
},
"header1_body2": {
"some_key":"some_value"
}
},
"header2": {
"header2_body1": {
"some_key":"some_value"
},
"header2_body2": {
"some_key":"some_value"
}
}
}"""
headers = json.loads(data)
# get the list of keys for a specific header
header = list(headers['header1'].keys())
print(header)
# ['header1_body1', 'header1_body2']
# if you really want to store them in another dict
results = {h[0]: list(h[1].keys()) for h in headers.items()}
print(results)
# OUTPUT
# {'header1': ['header1_body1', 'header1_body2'], 'header2': ['header2_body1', 'header2_body2']}
You can use recursion:
d = {'header1': {'header1_body1': {'some_key': 'some_value'}, 'header1_body2': {'some_key': 'some_value'}}, 'header2': {'header2_body1': {'some_key': 'some_value'}, 'header2_body2': {'some_key': 'some_value'}}}
def flatten(_d):
for a, b in _d.items():
yield a
if isinstance(b, dict):
yield from flatten(b)
new_results = {a:[i for i in flatten(b) if i.startswith(a)] for a, b in d.items()}
Output:
{'header1': ['header1_body1', 'header1_body2'], 'header2': ['header2_body1', 'header2_body2']}
import json
with open('test.json') as json_data:
d = json.load(json_data)
for k,v in d.iteritems():
if k == "header1" or k == "header2":
globals()['{}'.format(k)] = d[k].keys()
now, `header1` and `header2` can be accessed as list.
for i in header1:
print i

nested json to csv using pandas normalize

With given script I am able to get output as I showed in a screenshot,
but there is a column named as cve.description.description_data which is again in json format. I want to extract that data as well.
import json
import pandas as pd
from pandas.io.json import json_normalize
#load json object
with open('nvdcve-1.0-modified.json') as f:
d = json.load(f)
#tells us parent node is 'programs'
nycphil = json_normalize(d['CVE_Items'])
nycphil.head(3)
works_data = json_normalize(data=d['CVE_Items'], record_path='cve')
works_data.head(3)
nycphil.to_csv("test4.csv")
If I change works_data = json_normalize(data=d['CVE_Items'], record_path='cve.descr') it gives this error:
"result = result[spec] KeyError: 'cve.description'"
JSON format as follows:
{
"CVE_data_type":"CVE",
"CVE_data_format":"MITRE",
"CVE_data_version":"4.0",
"CVE_data_numberOfCVEs":"1000",
"CVE_data_timestamp":"2018-04-04T00:00Z",
"CVE_Items":[
{
"cve":{
"data_type":"CVE",
"data_format":"MITRE",
"data_version":"4.0",
"CVE_data_meta":{
"ID":"CVE-2001-1594",
"ASSIGNER":"cve#mitre.org"
},
"affects":{
"vendor":{
"vendor_data":[
{
"vendor_name":"gehealthcare",
"product":{
"product_data":[
{
"product_name":"entegra_p&r",
"version":{
"version_data":[
{
"version_value":"*"
}
]
}
}
]
}
}
]
}
},
"problemtype":{
"problemtype_data":[
{
"description":[
{
"lang":"en",
"value":"CWE-255"
}
]
}
]
},
"references":{
"reference_data":[
{
"url":"http://apps.gehealthcare.com/servlet/ClientServlet/2263784.pdf?DOCCLASS=A&REQ=RAC&DIRECTION=2263784-100&FILENAME=2263784.pdf&FILEREV=5&DOCREV_ORG=5&SUBMIT=+ ACCEPT+"
},
{
"url":"http://www.forbes.com/sites/thomasbrewster/2015/07/10/vulnerable- "
},
{
"url":"https://ics-cert.us-cert.gov/advisories/ICSMA-18-037-02"
},
{
"url":"https://twitter.com/digitalbond/status/619250429751222277"
}
]
},
"description":{
"description_data":[
{
"lang":"en",
"value":"GE Healthcare eNTEGRA P&R has a password of (1) value."
}
]
}
},
"configurations":{
"CVE_data_version":"4.0",
"nodes":[
{
"operator":"OR",
"cpe":[
{
"vulnerable":true,
"cpe22Uri":"cpe:/a:gehealthcare:entegra_p%26r",
"cpe23Uri":"cpe:2.3:a:gehealthcare:entegra_p\\&r:*:*:*:*:*:*:*:*"
}
]
}
]
},
"impact":{
"baseMetricV2":{
"cvssV2":{
"version":"2.0",
"vectorString":"(AV:N/AC:L/Au:N/C:C/I:C/A:C)",
"accessVector":"NETWORK",
"accessComplexity":"LOW",
"authentication":"NONE",
"confidentialityImpact":"COMPLETE",
"integrityImpact":"COMPLETE",
"availabilityImpact":"COMPLETE",
"baseScore":10.0
},
"severity":"HIGH",
"exploitabilityScore":10.0,
"impactScore":10.0,
"obtainAllPrivilege":false,
"obtainUserPrivilege":false,
"obtainOtherPrivilege":false,
"userInteractionRequired":false
}
},
"publishedDate":"2015-08-04T14:59Z",
"lastModifiedDate":"2018-03-28T01:29Z"
}
]
}
I want to flatten all data.
Assuming the multiple URLs delineate between rows and all else meta data repeats, consider a recursive function call to extract every key-value pair in nested json object, d.
The recursive function will call global to update the needed global objects to be binded into a list of dictionaries for pd.DataFrame() call. Last loop at end updates the recursive function's dictionary, inner, to integrate the different urls (stored in multi)
import json
import pandas as pd
# load json object
with open('nvdcve-1.0-modified.json') as f:
d = json.load(f)
multi = []; inner = {}
def recursive_extract(i):
global multi, inner
if type(i) is list:
if len(i) == 1:
for k,v in i[0].items():
if type(v) in [list, dict]:
recursive_extract(v)
else:
inner[k] = v
else:
multi = i
if type(i) is dict:
for k,v in i.items():
if type(v) in [list, dict]:
recursive_extract(v)
else:
inner[k] = v
recursive_extract(d['CVE_Items'])
data_dict = []
for i in multi:
tmp = inner.copy()
tmp.update(i)
data_dict.append(tmp)
df = pd.DataFrame(data_dict)
df.to_csv('Output.csv')
Output (all columns the same except for URL, widened for emphasis)

Create JSON array from python list

I am currently trying to write something to convert an Ansible inventory file into a JSON array which would allow it to be pulled into awx/tower however I have struggled to build a brand new array from the current inventory file format. I am avoiding use of any of the Ansible python API modules as there is no guarantee that future updates won't break these. One solution I found no longer works as there appears to be a change to the Ansible InventoryParser python module so I'm trying to come up with a Python 2.7 solution.
Example inventory file;
[test]
host1
host2
[test1]
host3
host4
The [] signify groups and the other entries are the hosts which will be the key:value relationship. I have converted this to a list in python and I am then attempting to format this into a key:value setup using the [] as where to split the key from the values.
both = []
f = open(filename, "r")
line = f.readline().strip()
while line:
both.append(line)
line = f.readline().strip()
f.close()
start = '['
end = ']'
json_dict = {'all': [dict(item.split(start)[1].split(end)[0] for item in
both)]}
print json.dumps(json_dict)
Unfortunately this returns the error:
ValueError: dictionary update sequence element #0 has length 4; 2 is required
Although truth be told I'm not sure this will return what I am looking for regardless.
Hoping someone can point me in the right direction or highlight where I've gone wrong so far.
Cheers
EDIT: Adding some code for what output is actually expected;
{
[test]: {
'hosts': ['host1', 'host2'],
},
[test1]: {
'hosts': ['host3', 'host4'],
}
}
A more detailed output example of what I'm trying to achieve;
{
"databases" : {
"hosts" : [ "host1.example.com", "host2.example.com" ],
"vars" : {
"a" : true
}
},
"webservers" : [ "host2.example.com", "host3.example.com" ],
"atlanta" : {
"hosts" : [ "host1.example.com", "host4.example.com",
"host5.example.com" ],
"vars" : {
"b" : false
},
},
"marietta" : [ "host6.example.com" ],
"5points" : [ "host7.example.com" ]
}
So we have a key which holds the group names and within that there are key:value pairs for hosts and vars.
After some more study I am closer to the output I desire with the following code;
both = {}
group = None
with open(filename, "r") as f:
line = f.readline().strip()
while line:
if line.startswith('#') or line.startswith(';') or len(line) == 0:
continue
if line.startswith("["):
# is a group
group = line
both[group] = {}
elif not line.startswith("["):
host = line
both[group][host] = {}
line = f.readline().strip()
f.close()
return both
Which returns the following which isn't quite what I'm after but I feel like I am making progress;
{
"[test2]": {
"host1": {},
"host2": {}
},
"[test3]": {
"host3": {}
},
"[test]": {
"host4": {},
"host5": {}
}
}
It's may help you.
import json
both = {}
start = '['
end = ']'
with open(filename, "r") as f:
line = f.readline().strip()
while line:
if start in line or end in line:
line = line.split(start)[1].split(end)[0]
both[line] = line
line = f.readline().strip()
json_dict = {'all': [both]}
print(json.dumps(json_dict))

Categories