Removing items from JSON using Python loop

Removing items from JSON using Python loop - python

how do I iterate over the data and keep object keys that have the string "Java" in the value and remove keys with the string "Javascript" in the value? In addition to the iterations I already have in my code. For example:
this key has the word 'Java' in the value.
"value" : "A vulnerability in the encryption implementation of EBICS messages in the open source librairy ebics-java/ebics-java-client allows an attacker sniffing network traffic to decrypt EBICS payloads. This issue affects: ebics-java/ebics-java-client versions prior to 1.2."
the current code below iterates thru other JSON items (that are also needed), but not the Java/Javascript issue.
from encodings import utf_8
import json
from zipfile import ZipFile
from urllib.request import urlretrieve
from io import BytesIO
import os
url = "https://nvd.nist.gov/feeds/json/cve/1.1/nvdcve-1.1-2022.json.zip"
urlretrieve(url, "nvdcve-1.1-2022.json.zip")
with ZipFile('nvdcve-1.1-2022.json.zip', 'r') as zip:
zip.extractall('.')
with open('nvdcve-1.1-2022.json', encoding='utf-8') as x:
data = json.load(x)
#function to sort through without rewriting code with parameters/arguments passed into the function(variable)
def base_score(metric):
if 'baseMetricV3' not in metric['impact']:
#no values = 0 so it will auto sort by ID
return (0, metric['cve']['CVE_data_meta']['ID'])
#sorts by ID if two or more base scores are equal
return (metric['impact']['baseMetricV3']['cvssV3']['baseScore'], metric['cve']['CVE_data_meta']['ID'])
#return allows assigment of function output to new variable
#direct python to open json file using specific encoding to avoid encoding error
for CVE_Item in data['CVE_Items']:
for node in CVE_Item['configurations']['nodes']:
#removes items while iterating through them
node['cpe_match'][:] = [item for item in node['cpe_match'] if item['vulnerable']]
#also check children objects for vulnerable
if node['children']:
for children_node in node['children']:
children_node['cpe_match'][:] = [item for item in children_node['cpe_match'] if item['vulnerable']]
#sorts data in descending order using reverse
data['CVE_Items'].sort(reverse=True, key=base_score)
#write file to current working directory
with open('sorted_nvdcve-1.1-2022.json', 'w') as new_file:
new_file.write(json.dumps(data, indent=4))
if os.path.exists('nvdcve-1.1-2022.json.zip'):
os.remove('nvdcve-1.1-2022.json.zip')
else:
print("The file does not exist")
if os.path.exists('nvdcve-1.1-2022.json'):
os.remove('nvdcve-1.1-2022.json')
else:
print("The file does not exist")
here is the link to the original JSON file (too large to post entire text here):
https://nvd.nist.gov/feeds/json/cve/1.1/nvdcve-1.1-2022.json.zip
the key 'value' is located in the 'description' list.
here is a sample of the JSON text:
{
"CVE_data_type" : "CVE",
"CVE_data_format" : "MITRE",
"CVE_data_version" : "4.0",
"CVE_data_numberOfCVEs" : "15972",
"CVE_data_timestamp" : "2022-11-01T07:00Z",
"CVE_Items" : [ {
"cve" : {
"data_type" : "CVE",
"data_format" : "MITRE",
"data_version" : "4.0",
"CVE_data_meta" : {
"ID" : "CVE-2022-0001",
"ASSIGNER" : "secure#intel.com"
},
"problemtype" : {
"problemtype_data" : [ {
"description" : [ {
"lang" : "en",
"value" : "NVD-CWE-noinfo"
} ]
} ]
},
"references" : {
"reference_data" : [ {
"url" : "https://www.intel.com/content/www/us/en/security-center/advisory/intel-sa-00598.html",
"name" : "https://www.intel.com/content/www/us/en/security-center/advisory/intel-sa-00598.html",
"refsource" : "MISC",
"tags" : [ "Vendor Advisory" ]
}, {
"url" : "http://www.openwall.com/lists/oss-security/2022/03/18/2",
"name" : "[oss-security] 20220318 Xen Security Advisory 398 v2 - Multiple speculative security issues",
"refsource" : "MLIST",
"tags" : [ "Mailing List", "Third Party Advisory" ]
}, {
"url" : "https://www.oracle.com/security-alerts/cpujul2022.html",
"name" : "N/A",
"refsource" : "N/A",
"tags" : [ "Patch", "Third Party Advisory" ]
}, {
"url" : "https://security.netapp.com/advisory/ntap-20220818-0004/",
"name" : "https://security.netapp.com/advisory/ntap-20220818-0004/",
"refsource" : "CONFIRM",
"tags" : [ "Third Party Advisory" ]
} ]
},
"description" : {
"description_data" : [ {
"lang" : "en",
"value" : "JavaScript sharing of branch predictor selectors between contexts in some Intel(R) Processors may allow an authorized user to potentially enable information disclosure via local access."
} ]
}

Add this inside the for CVE_Item loop.
CVE_Item['cve']['description']['description_data'] = [
d for d in CVE_Item['cve']['description']['description_data']
if 'Java' in d['value'] and 'JavaScript' not in d['value']]
The modified loop looks like:
for CVE_Item in data['CVE_Items']:
CVE_Item['cve']['description']['description_data'] = [
d for d in CVE_Item['cve']['description']['description_data']
if 'Java' in d['value'] and 'JavaScript' not in d['value']]
for node in CVE_Item['configurations']['nodes']:
#removes items while iterating through them
node['cpe_match'][:] = [item for item in node['cpe_match'] if item['vulnerable']]
#also check children objects for vulnerable
if node['children']:
for children_node in node['children']:
children_node['cpe_match'][:] = [item for item in children_node['cpe_match'] if item['vulnerable']]

Related

How do I parse nested json objects?

I am trying to load a JSON file to parse the contents nested in the root object. Currently I have the JSON file open and loaded as such:
with open(outputFile.name) as f:
data = json.load(f)
For the sake of the question here is an example of what the contents of the JSON file are like:
{
"rootObject" :
{
"person" :
{
"address" : "some place ave. 123",
"age" : 47,
"name" : "Joe"
},
"kids" :
[
{
"age" : 20,
"name" : "Joey",
"studySubject":"math"
},
{
"age" : 16,
"name" : "Josephine",
"studySubject":"chemistry"
}
],
"parents" :
{
"father" : "Joseph",
"mother" : "Joette"
}
How do I access the nested objects in "rootObject", such as "person", "kids" and its contents, and "parents"?

Below code using recursive function can extract values using specific key in a nested dictionary or 'lists of dictionaries':
data = {
"rootObject" :
{
"person" :
{
"address" : "some place ave. 123",
"age" : 47,
"name" : "Joe"
},
"kids" :
[
{
"age" : 20,
"name" : "Joey",
"studySubject":"math"
},
{
"age" : 16,
"name" : "Josephine",
"studySubject":"chemistry"
}
],
"parents" :
{
"father" : "Joseph",
"mother" : "Joette"
}
}}
def get_vals(nested, key):
result = []
if isinstance(nested, list) and nested != []: #non-empty list
for lis in nested:
result.extend(get_vals(lis, key))
elif isinstance(nested, dict) and nested != {}: #non-empty dict
for val in nested.values():
if isinstance(val, (list, dict)): #(list or dict) in dict
result.extend(get_vals(val, key))
if key in nested.keys(): #key found in dict
result.append(nested[key])
return result
get_vals(data, 'person')
Output
[{'address': 'some place ave. 123', 'age': 47, 'name': 'Joe'}]

The code for loading the JSON object should look like this:
from json import loads, load
with open("file.json") as file:
var = loads(load(file))
# loads() transforms the string in a python dict object

How can i use ijson to extract a set of corresponding data from json file?

I have a json file just like this:
{
"CVE_data_type" : "CVE",
"CVE_Items" : [ {
"cve" : {
"CVE_data_meta" : {
"ID" : "CVE-2020-0001",
"ASSIGNER" : "security#android.com"
},
...
"configurations" : {
"CVE_data_version" : "4.0",
"nodes" : [ {
"operator" : "OR",
"children" : [ ],
"cpe_match" : [ {
"vulnerable" : true,
"cpe23Uri" : "cpe:2.3:o:google:android:8.0:*:*:*:*:*:*:*",
"cpe_name" : [ ]
}, {
"vulnerable" : true,
"cpe23Uri" : "cpe:2.3:o:google:android:8.1:*:*:*:*:*:*:*",
"cpe_name" : [ ]
}]
} ]
},
...
"publishedDate" : "2020-01-08T19:15Z",
"lastModifiedDate" : "2020-01-14T21:52Z"
}]
}
And i want to extract the CVE-ID and corresponding CPE,so i can lcoate the CVE-ID through CPE,here is my code
import ijson
import datetime
def parse_json(filename):
with open(filename, 'rb') as input_file:
CVEID = ijson.items(input_file, 'CVE_Items.item.cve.CVE_data_meta.ID', )
for id in CVEID:
print("CVE id: %s" % id)
# for prefix, event, value in parser:
# print('prefix={}, event={}, value={}'.format(prefix, event, value))
with open(filename, 'rb') as input_file:
cpes = ijson.items(input_file, 'CVE_Items.item.configurations.nodes.item.cpe_match.item', )
for cpe in cpes:
print("cpe: %s" % cpe['cpe23Uri'])
def main():
parse_json("cve.json")
end = datetime.datetime.now()
if __name__ == '__main__':
main()
Results:
CVE id: CVE-2020-0633
CVE id: CVE-2020-0631
cpe: cpe:2.3:o:google:android:8.0:*:*:*:*:*:*:*
cpe: cpe:2.3:o:google:android:10.0:*:*:*:*:*:*:*
cpe: cpe:2.3:o:microsoft:windows_10:1607:*:*:*:*:*:*:*
cpe: cpe:2.3:o:microsoft:windows_server_2016:-:*:*:*:*:*:*:*
But above this just extract the data and no correspondence.
Could anyone help? A little help would be appreciated.

I think if you need to keep track of CVE IDs and their corresponding CPEs you'll need to iterate over whole cve items and extract the bits of data you need (so you'll only do one pass through the file). Not as efficient memory-wise as your original iteration, but if each item in CVE_Items is not too big then it's not a problem:
with open(filename, 'rb') as input_file:
for cves in ijson.items(input_file, 'CVE_Items.item')
cve_id = cve['cve']['CVE_data_meta']['ID']
cpes = [match
for node in cve['configurations']['nodes']
for match in node['cpe_match']]
If you know there's always a single cpe_match element in nodes then you can replace the last list comprehension by cve['configurations']['nodes'][0]['cpe_match']

python searching through dict that contain list of nested dict

I am looking to pull all the "symbol" from a Dict that looks like this:
file_data = json.load(f)
{
"symbolsList" : [ {
"symbol" : "SPY",
"name" : "SPDR S&P 500",
"price" : 261.56,
"exchange" : "NYSE Arca"
}, {
"symbol" : "CMCSA",
"name" : "Comcast Corporation Class A Common Stock",
"price" : 35.49,
"exchange" : "Nasdaq Global Select"
}, {
"symbol" : "KMI",
"name" : "Kinder Morgan Inc.",
"price" : 13.27,
"exchange" : "New York Stock Exchange"
}
}
after looking up I found a way to access certain symbol. but I would like to get all the symbol in a form of list or dict doesn't really matter to me.
this is what I got:
print([next(item for item in file_data["symbolsList"] if item["symbol"] == "SPY")])
I know that the problem is with the next function I just don't know how to get all the symbols

you can use a list comprehension:
[e['symbol'] for e in d['symbolsList']]
output:
['SPY', 'CMCSA', 'KMI']
the same thing using a for loop:
result = []
for e in d['symbolsList']
result.append(e['symbol'])

Extract values from oddly-nested Python

I must be really slow because I spent a whole day googling and trying to write Python code to simply list the "code" values only so my output will be Service1, Service2, Service2. I have extracted json values before from complex json or dict structure. But now I must have hit a mental block.
This is my json structure.
myjson='''
{
"formatVersion" : "ABC",
"publicationDate" : "2017-10-06",
"offers" : {
"Service1" : {
"code" : "Service1",
"version" : "1a1a1a1a",
"index" : "1c1c1c1c1c1c1"
},
"Service2" : {
"code" : "Service2",
"version" : "2a2a2a2a2",
"index" : "2c2c2c2c2c2"
},
"Service3" : {
"code" : "Service4",
"version" : "3a3a3a3a3a",
"index" : "3c3c3c3c3c3"
}
}
}
'''
#convert above string to json
somejson = json.loads(myjson)
print(somejson["offers"]) # I tried so many variations to no avail.

Or, if you want the "code" stuffs :
>>> [s['code'] for s in somejson['offers'].values()]
['Service1', 'Service2', 'Service4']

somejson["offers"] is a dictionary. It seems you want to print its keys.
In Python 2:
print(somejson["offers"].keys())
In Python 3:
print([x for x in somejson["offers"].keys()])
In Python 3 you must use the list comprehension because in Python 3 keys() is a 'view', not a list.

This should probably do the trick , if you are not certain about the number of Services in the json.
import json
myjson='''
{
"formatVersion" : "ABC",
"publicationDate" : "2017-10-06",
"offers" : {
"Service1" : {
"code" : "Service1",
"version" : "1a1a1a1a",
"index" : "1c1c1c1c1c1c1"
},
"Service2" : {
"code" : "Service2",
"version" : "2a2a2a2a2",
"index" : "2c2c2c2c2c2"
},
"Service3" : {
"code" : "Service4",
"version" : "3a3a3a3a3a",
"index" : "3c3c3c3c3c3"
}
}
}
'''
#convert above string to json
somejson = json.loads(myjson)
#Without knowing the Services:
offers = somejson["offers"]
keys = offers.keys()
for service in keys:
print(somejson["offers"][service]["code"])

Mongodb document traversing

I have a query in mongo db, tried lots of solution but still not found it working. Any help will be appreciated.
How to find all keys named "channel" in document?
db.clients.find({"_id": 69})
{
"_id" : 69,
"configs" : {
"GOOGLE" : {
"drid" : "1246ABCD",
"adproviders" : {
"adult" : [
{
"type" : "landing",
"adprovider" : "abc123",
"channel" : "abc456"
},
{
"type" : "search",
"adprovider" : "xyz123",
"channel" : "xyz456"
}
],
"nonadult" : [
{
"type" : "landing",
"adprovider" : "pqr123",
"channel" : "pqr456"
},
{
"type" : "search",
"adprovider" : "lmn123",
"channel" : "lmn456"
}
]
}
},
"channel" : "ABC786",
"_cls" : "ClientGoogleDoc"
}
}
Trying to find keys with name channel
db.clients.find({"_id": 69, "channel": true})
Expecting:
{"channels": ["abc456", "xyz456", "ABC786", "xyz456", "pqr456", "lmn456", ...]}

As far as I know, you'd have to use python to recursively traverse the dictionary yourself in order to build the list that you want above:
channels = []
def traverse(my_dict):
for key, value in my_dict.items():
if isinstance(value, dict):
traverse(value)
else:
if key == "channel":
channels.append(value)
traverse({"a":{"channel":"abc123"}, "channel":"xyzzz"})
print(channels)
output:
['abc123', 'xyzzz']
However, using a thing called projections you can get sort of close to what you want (but not really, since you have to specify all of the channels manually):
db.clients.find({"_id": 69}, {"configs.channel":1})
returns:
{ "_id" : ObjectId("69"), "configs" : { "channel" : "ABC786" } }
If you want to get really fancy, you could write a generator function to generate all the keys in a given dictionary, no matter how deep:
my_dict = { "a": {
"channel":"abc123",
"key2": "jjj",
"subdict": {"deep_key": 5, "channel": "nested"}
},
"channel":"xyzzz"}
def getAllKeys(my_dict):
for key, value in my_dict.items():
yield key, value
if isinstance(value, dict):
for key, value in getAllKeys(value):
yield key, value
for key, value in getAllKeys(my_dict):
if key == "channel":
print value
output:
nested
abc123
xyzzz

You can use the $project mongodb operator to get only the value for the specific key. Check the documentation at http://docs.mongodb.org/manual/reference/operator/aggregation/project/

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Removing items from JSON using Python loop - python

Related

How do I parse nested json objects?

How can i use ijson to extract a set of corresponding data from json file?

python searching through dict that contain list of nested dict

Extract values from oddly-nested Python

Mongodb document traversing

Categories

Resources