Return specific values from nested Elastic Object - python

I have to preface this with the fact that I'm working with Elasticsearch module, which returns elastic_transport.ObjectApiResponse. My problem is that I need to select specific keys from this json/dictionary looking log. The indices come from different sources, and thus contain different key/value pairs. They values I need to select are ip, port, username, rule_name, severity, and risk_score. The problem is that they have different key names and each dictionary is vastly different from the other, but they all contain those values. After that, I'll throw them into a Pandas dataframe and create a table with those values. Should a value be missing, I'll fill them with a '-'.
So my question is how I can iterate over these nested objects that are neither ordered nor standardized? Any help is appreciated. Below is a sample of the data.
{
'took': 11,
'timed_out': False,
'_shards': {
'total': 17,
'successful': 17,
'skipped': 0, 'failed': 0
},
'hits': {
'total': {'value': 58, 'relation': 'eq'},
'max_score': 0.0,
'hits': [
{
'_index': '.siem-signals-default-000017',
'_type': '_doc',
'_id': 'abcd1234',
'_score': 0.0,
'_source': {
'#timestamp': '2023-02-09T15:24:09.368Z',
'process': {'pid': 668, 'executable': 'C:\\Windows\\System32\\lsass.exe', 'name': 'lsass.exe'},
'ecs': {'version': '1.10.0'},
'winlog': {
'computer_name': 'SRVDC1',
'User': 'John.Smith',
'api': 'wineventlog',
'keywords': ['Audit Failure']
},
'source':{'domain': 'SRVDC1', 'ip': '10.17.13.118', 'port': 42548}}
'rule': {'id': 'aaabbb', 'actions': [], 'interval': '2m', 'name': 'More Than 3 Failed Login Attempts Within 1 Hour '}
},
{
'_index': '.siem-signals-default-000017',
'_type': '_doc',
'_id': 'abc123',
'_score': 0.0,
'_source': {
'#timestamp': '2023-02-09T15:24:09.369Z',
'log': {'level': 'information'},
'user': {
'id': 'S-1-0-0',
'name': 'John.Smith',
'domain': 'ACME'
},
'related': {
'port': '42554',
'ip': '10.17.13.118'
},
'logon': {'id': '0x3e7', 'type': 'Network', 'failure': {'sub_status': 'User logon with misspelled or bad password'}},
'meta': {'risk_score': 46, 'severity': 'medium'}}},
{
'_index': '.siem-signals-default-000017',
'_type': '_doc',
'_id': 'zzzzz',
'_score': 0.0,
'_source': {
'source': {
'port': '56489',
'ip': '10.18.13.101'
},
'observer': {
'type': 'firewall',
'name': 'pfSense',
'serial_number': 'xoxo',
'product': 'Supermicro',
'ip': '10.7.3.253'
},
'process': {'name': 'filterlog', 'pid': '45005'},
'tags': ['firewall', 'IP_Private_Source', 'IP_Private_Destination'],
'destination': {'service': 'microsoft-ds', 'port': '445', 'ip': '10.250.0.64'},
'log': {'risk_score': 73, 'severity': 'high'},
'rule':{'name': 'Logstash Firewall (NetBIOS and SMB Vulnerability)'}}}]}}
Expected Output
The sample below is possible only when the logs have the same standard structure.

Related

create new json data from values in a dictionary and nested dictionary

i have 2 dictionaries and i wish to create a new json data with values from both dictionaries as follows.
dic_a = [{'name': 'puskas',
'description': 'puskas is the command center for football',
'size': '251-1K',
'revenue': '$50M-$100M',
'industryTags': ['football federation']}]
dic_b = {'page': 1,
'total': 14,
'results': [{'id': 'i01',
'name': {'fullName': 'luka modric',
'givenName': 'luka',
'familyName': 'modric'},
'role': 'leadership',
'subRole': 'ceo',
'title': 'CEO',
'company': {'name': 'puskas'},
'email': 'luka#puskas.com',
'verified': True},
{'id': 'i02',
'name': {'fullName': 'gucci mane',
'givenName': 'gucci',
'familyName': 'mane'},
'role': 'leadership',
'subRole': 'founder',
'title': 'Co-founder, CTO',
'company': {'name': 'puskas'},
'email': 'gucchi.mane#puskas.com',
'verified': True},
{'id': 'i03',
'name': {'fullName': 'tom ford',
'givenName': 'tom',
'familyName': 'ford'},
'role': 'leadership',
'subRole': 'founder',
'title': 'founder',
'company': {'name': 'puskas'},
'email': 'tomford#puskas.com',
'verified': True}]}
i want to take select values from b, append to a and then convert to json and return as c.
i have tried a few codes off of some syntax i researched here but it don’t work. i am expecting the json result to look like this
json_c = [{'name': 'puskas',
'description': 'puskas is the command center for football',
'size': '251-1K',
'revenue': '$50M-$100M',
'industryTags': ['football federation'],
'leads': [{'id': 'i01',
'name': 'luka modric',
'title': 'CEO',
'company': {'name': 'puskas'},
'email': 'luka#puskas.co',
'verified': True},
{'id': 'i02',
'name': 'gucci mane',
'title': 'Co-founder, CTO',
'company': {'name': 'gucci'},
'email': 'gucchi.mane#gucci.com',
'verified': True},
{'id': 'i03',
'name': 'tom ford',
'title': 'founder',
'company': {'name': 'xyz'},
'email': 'tomford#xyz.co',
'verified': True}]}]
such problems can be solved easily with jmespath
import jmespath
import json
c = dic_a
c[0]['leads'] = jmespath.search('results[].{id:id, name:name.fullName,title:title ,company:company,email:email,verified:verified }',dic_b)
json_string = json.dumps(c, indent=4, ensure_ascii=False)
print(json_string)
# [
# {
# "name": "puskas",
# "description": "puskas is the command center for football",
# "size": "251-1K",
# "revenue": "$50M-$100M",
# "industryTags": [
# "football federation"
# ],
# "leads": [
# {
# "id": "i01",
# "name": "luka modric",
# "title": "CEO",
# "company": {
# "name": "puskas"
# },
# "email": "luka#puskas.com",
# "verified": true
# },
# {
# "id": "i02",
# "name": "gucci mane",
# "title": "Co-founder, CTO",
# "company": {
# "name": "puskas"
# },
# "email": "gucchi.mane#puskas.com",
# "verified": true
# },
# {
# "id": "i03",
# "name": "tom ford",
# "title": "founder",
# "company": {
# "name": "puskas"
# },
# "email": "tomford#puskas.com",
# "verified": true
# }
# ]
# }
# ]

Python, returning specified key values from a large nested dictionary also containing lists

From a nested dictionary also containing lists I would like to return the values for specified keys whether the return value be another dictionary, list or singular value. Example dictionary shown below (OSINT API data of a random IP from Shodan).
From a specified list of keys e.g: ['domains', 'html', 'CN']
Example required output:
'domains': ['facebook.com']
'CN': '*.secure.facebook.com'
'html': ''
etc.
The specified keys may appear more than once, or not at all depending on the services on different IP's.
I have code for nested dictionaries but this has stumped me:
dataoutput = {'some': 'dictionary'}
filtered_list = ['list of required keys']
def seek_keys(d, key_list):
for k, v in d.items():
if k in key_list:
if isinstance(v, dict):
print(k + ": " + list(v.keys())[0])
else:
print(k + ": " + str(v))
if isinstance(v, dict):
seek_keys(v, key_list)
seek_keys(dataoutput, filtered_list)
{'region_code': '40', 'ip': 520966673, 'postal_code': None, 'country_code': 'JP', 'city': 'Tokyo', 'dma_code': None, 'last_update': '2021-08-17T21:19:17.704800', 'latitude': 35.6895, 'tags': [], 'area_code': None, 'country_name': 'Japan', 'hostnames': ['edge-secure-shv-01-nrt1.facebook.com'], 'org': 'Facebook Ireland Ltd', 'data': [{'hash': 960245092, '_shodan': {'id': '35fbfc68-de4a-4433-8d4e-672ed558dc90', 'options': {}, 'ptr': True, 'module': 'http', 'crawler': 'f4bb88763d8ed3a0f3f91439c2c62b77fb9e06f3'}, 'http': {'robots_hash': None, 'redirects': [], 'securitytxt': None, 'title': None, 'sitemap_hash': None, 'robots': None, 'server': None, 'host': '31.13.82.17', 'html': '', 'location': '/', 'html_hash': 0, 'sitemap': None, 'securitytxt_hash': None}, 'os': None, 'opts': {}, 'timestamp': '2021-08-17T21:19:17.704800', 'isp': 'Facebook, Inc.', 'port': 80, 'hostnames': ['edge-secure-shv-01-nrt1.facebook.com'], 'location': {'city': 'Tokyo', 'region_code': '40', 'area_code': None, 'longitude': 139.69171, 'country_code3': None, 'country_name': 'Japan', 'postal_code': None, 'dma_code': None, 'country_code': 'JP', 'latitude': 35.6895}, 'ip': 520966673, 'domains': ['facebook.com'], 'org': 'Facebook Ireland Ltd', 'data': 'HTTP/1.1 301 Moved Permanently\r\nVary: Accept-Encoding\r\nLocation: http://www.facebook.com/\r\nContent-Type: text/html; charset="utf-8"\r\nX-FB-Debug: uH7oyeyXmCxXRUEDRNZW89jYu4Ncis+tsOcmWtF45ENW8qkGHHJOHpF/WMOclN/XJahPWteD9avoPOcGo4g+Iw==\r\nDate: Tue, 17 Aug 2021 21:19:11 GMT\r\nAlt-Svc: h3-29=":443"; ma=3600,h3-27=":443"; ma=3600\r\nConnection: keep-alive\r\nContent-Length: 0\r\n\r\n', 'asn': 'AS32934', 'transport': 'tcp', 'ip_str': '31.13.82.17'}, {'hash': 1932904474, '_shodan': {'id': '5e465672-0345-4d13-94b4-f4b5a68e06dd', 'options': {}, 'ptr': True, 'module': 'https', 'crawler': 'bf213bc419cc8491376c12af31e32623c1b6f467'}, 'http': {'robots_hash': None, 'redirects': [], 'securitytxt': None, 'title': None, 'sitemap_hash': None, 'robots': None, 'server': None, 'host': '31.13.82.17', 'html': '', 'location': '/', 'html_hash': 0, 'sitemap': None, 'securitytxt_hash': None}, 'os': None, 'opts': {'vulns': [], 'heartbleed': '2021/08/15 19:43:47 31.13.82.17:443 - SAFE\n'}, 'timestamp': '2021-08-15T19:43:33.722160', 'isp': 'Facebook, Inc.', 'port': 443, 'ssl': {'chain_sha256': ['1e3839fdfad7b0a9098bfe4e2853391a6230357e50c6506e26caecdf85b95c9e', '19400be5b7a31fb733917700789d2f0a2471c0c9d506c0e504c06c16d7cb17c0', '7431e5f4c3c1ce4690774f0b61e05440883ba9a01ed00ba6abd7806ed3b118cf'], 'jarm': '29d29d00029d29d00041d43d00041d92bf66d3b7eed52dd4b99d709662a4a4', 'chain': ['-----BEGIN CERTIFICATE-----\nMIIG4jCCBcqgAwIBAgIQDNU+hEWn8AdrZCyl+p9BCjANBgkqhkiG9w0BAQsFADBw\nMQswCQYDVQQGEwJVUzEVMBMGA1UEChMMRGlnaUNlcnQgSW5jMRkwFwYDVQQLExB3\nd3cuZGlnaWNlcnQuY29tMS8wLQYDVQQDEyZEaWdpQ2VydCBTSEEyIEhpZ2ggQXNz\ndXJhbmNlIFNlcnZlciBDQTAeFw0yMTA2MjcwMDAwMDBaFw0yMTA5MjUyMzU5NTla\nMHAxCzAJBgNVBAYTAlVTMRMwEQYDVQQIEwpDYWxpZm9ybmlhMRMwEQYDVQQHEwpN\nZW5sbyBQYXJrMRcwFQYDVQQKEw5GYWNlYm9vaywgSW5jLjEeMBwGA1UEAwwVKi5z\nZWN1cmUuZmFjZWJvb2suY29tMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKC\nAQEAvp5pfStLmHEZkL/h8EDLbYDAUI0A8P937l/Nn/sEPP+4qATFOpGK9r4xWl7V\nHBqv14a7FSR3Wcf17H5acZtEd2RPQXJqTjwzQ5sngQ0INFt3up1TrRyfavRJ4+jr\nVDPMeUF0nTfgBCkB0XvfSfrrTld9iYbsuINSLBRx8KGPsCqAM6AefPS/4YTXyPn+\nCZ7YBsZBbVhcQ7opioe/2yl/nxGuugxWHfqyj8wZOkLr44I9bB2lJodZbmGgHIel\nH0zetndOTS/0DuE1UW908B4cRJtnBTQ6GLi16BeW3HM1sRhtkvegfhpja72d0uJD\nVK6NJUC71KwPpI73Z69SYlRunQIDAQABo4IDdjCCA3IwHwYDVR0jBBgwFoAUUWj/\nkK8CB3U8zNllZGKiErhZcjswHQYDVR0OBBYEFEHc6kgty7L4Ssr0dntDDYKZxvtx\nMDUGA1UdEQQuMCyCFSouc2VjdXJlLmZhY2Vib29rLmNvbYITc2VjdXJlLmZhY2Vi\nb29rLmNvbTAOBgNVHQ8BAf8EBAMCBaAwHQYDVR0lBBYwFAYIKwYBBQUHAwEGCCsG\nAQUFBwMCMHUGA1UdHwRuMGwwNKAyoDCGLmh0dHA6Ly9jcmwzLmRpZ2ljZXJ0LmNv\nbS9zaGEyLWhhLXNlcnZlci1nNi5jcmwwNKAyoDCGLmh0dHA6Ly9jcmw0LmRpZ2lj\nZXJ0LmNvbS9zaGEyLWhhLXNlcnZlci1nNi5jcmwwPgYDVR0gBDcwNTAzBgZngQwB\nAgIwKTAnBggrBgEFBQcCARYbaHR0cDovL3d3dy5kaWdpY2VydC5jb20vQ1BTMIGD\nBggrBgEFBQcBAQR3MHUwJAYIKwYBBQUHMAGGGGh0dHA6Ly9vY3NwLmRpZ2ljZXJ0\nLmNvbTBNBggrBgEFBQcwAoZBaHR0cDovL2NhY2VydHMuZGlnaWNlcnQuY29tL0Rp\nZ2lDZXJ0U0hBMkhpZ2hBc3N1cmFuY2VTZXJ2ZXJDQS5jcnQwDAYDVR0TAQH/BAIw\nADCCAX0GCisGAQQB1nkCBAIEggFtBIIBaQFnAHUA9lyUL9F3MCIUVBgIMJRWjuNN\nExkzv98MLyALzE7xZOMAAAF6TpSCPgAABAMARjBEAiBYhGi2yBBG7czoeFxQyTu8\nzxxPgqUIdcmWaW3hw2EQ8wIgAidk5HQyaPlgqx0v9vgUld9K8t8/3J29NfxiyFoH\nscIAdwBc3EOS/uarRUSxXprUVuYQN/vV+kfcoXOUsl7m9scOygAAAXpOlIJQAAAE\nAwBIMEYCIQDStTIT2wWQPUFU7sr8oEguoyufiGj8q5w9RVBeHH/AXgIhAOsWE3+I\n06LQxEXIFjE8WJz/hdVpzQdFKJBap/s9fDbRAHUA7sCV7o1yZA+S48O5G8cSo2lq\nCXtLahoUOOZHssvtxfkAAAF6TpSCgwAABAMARjBEAiAvin7s6vhjamLUuQF5pZeL\np0qUdE8FER1YH/VAdtkkJgIgLnTb4+YV7Rz2HLpAWl2yo4iM+wsS3hDHYimvewnn\n4/kwDQYJKoZIhvcNAQELBQADggEBAHKAxCkCP9ymd2muUrsMlgq8g8NtvyhckKCI\n1nHrJPWu4lCPNT7nhFS2Ksl4kERGfW02feSuTm0IZOe87wEsnB5zJBdb9PeDNFTH\n97MYVvvW1+A3DX/n4Vmkq4RIUjLXYVelPgCr8Gs5CBT8+tocZNCB9AG00wwYVzvN\nbLcSPdzYEbPtfxP0RQVlAFdVKTCf/0pOt1iS9zF1wInA1MFzaZ8sj2OBKW28bFLi\ntPN8aMki5BH6Y8QUMPn6zo1P1CcBBLrd8mq0abxW1bsxk9KKULuXV1g4rspTQgO7\nlWlcgHrK154kvhhNjcx59L1iVDM4eCjgk2aDBfVUPBqwP6O+sow=\n-----END CERTIFICATE-----\n', '-----BEGIN CERTIFICATE-----\nMIIEsTCCA5mgAwIBAgIQBOHnpNxc8vNtwCtCuF0VnzANBgkqhkiG9w0BAQsFADBs\nMQswCQYDVQQGEwJVUzEVMBMGA1UEChMMRGlnaUNlcnQgSW5jMRkwFwYDVQQLExB3\nd3cuZGlnaWNlcnQuY29tMSswKQYDVQQDEyJEaWdpQ2VydCBIaWdoIEFzc3VyYW5j\nZSBFViBSb290IENBMB4XDTEzMTAyMjEyMDAwMFoXDTI4MTAyMjEyMDAwMFowcDEL\nMAkGA1UEBhMCVVMxFTATBgNVBAoTDERpZ2lDZXJ0IEluYzEZMBcGA1UECxMQd3d3\nLmRpZ2ljZXJ0LmNvbTEvMC0GA1UEAxMmRGlnaUNlcnQgU0hBMiBIaWdoIEFzc3Vy\nYW5jZSBTZXJ2ZXIgQ0EwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQC2\n4C/CJAbIbQRf1+8KZAayfSImZRauQkCbztyfn3YHPsMwVYcZuU+UDlqUH1VWtMIC\nKq/QmO4LQNfE0DtyyBSe75CxEamu0si4QzrZCwvV1ZX1QK/IHe1NnF9Xt4ZQaJn1\nitrSxwUfqJfJ3KSxgoQtxq2lnMcZgqaFD15EWCo3j/018QsIJzJa9buLnqS9UdAn\n4t07QjOjBSjEuyjMmqwrIw14xnvmXnG3Sj4I+4G3FhahnSMSTeXXkgisdaScus0X\nsh5ENWV/UyU50RwKmmMbGZJ0aAo3wsJSSMs5WqK24V3B3aAguCGikyZvFEohQcft\nbZvySC/zA/WiaJJTL17jAgMBAAGjggFJMIIBRTASBgNVHRMBAf8ECDAGAQH/AgEA\nMA4GA1UdDwEB/wQEAwIBhjAdBgNVHSUEFjAUBggrBgEFBQcDAQYIKwYBBQUHAwIw\nNAYIKwYBBQUHAQEEKDAmMCQGCCsGAQUFBzABhhhodHRwOi8vb2NzcC5kaWdpY2Vy\ndC5jb20wSwYDVR0fBEQwQjBAoD6gPIY6aHR0cDovL2NybDQuZGlnaWNlcnQuY29t\nL0RpZ2lDZXJ0SGlnaEFzc3VyYW5jZUVWUm9vdENBLmNybDA9BgNVHSAENjA0MDIG\nBFUdIAAwKjAoBggrBgEFBQcCARYcaHR0cHM6Ly93d3cuZGlnaWNlcnQuY29tL0NQ\nUzAdBgNVHQ4EFgQUUWj/kK8CB3U8zNllZGKiErhZcjswHwYDVR0jBBgwFoAUsT7D\naQP4v0cB1JgmGggC72NkK8MwDQYJKoZIhvcNAQELBQADggEBABiKlYkD5m3fXPwd\naOpKj4PWUS+Na0QWnqxj9dJubISZi6qBcYRb7TROsLd5kinMLYBq8I4g4Xmk/gNH\nE+r1hspZcX30BJZr01lYPf7TMSVcGDiEo+afgv2MW5gxTs14nhr9hctJqvIni5ly\n/D6q1UEL2tU2ob8cbkdJf17ZSHwD2f2LSaCYJkJA69aSEaRkCldUxPUd1gJea6zu\nxICaEnL6VpPX/78whQYwvwt/Tv9XBZ0k7YXDK/umdaisLRbvfXknsuvCnQsH6qqF\n0wGjIChBWUMo0oHjqvbsezt3tkBigAVBRQHvFwY+3sAzm2fTYS5yh+Rp/BIAV0Ae\ncPUeybQ=\n-----END CERTIFICATE-----\n', '-----BEGIN CERTIFICATE-----\nMIIDxTCCAq2gAwIBAgIQAqxcJmoLQJuPC3nyrkYldzANBgkqhkiG9w0BAQUFADBs\nMQswCQYDVQQGEwJVUzEVMBMGA1UEChMMRGlnaUNlcnQgSW5jMRkwFwYDVQQLExB3\nd3cuZGlnaWNlcnQuY29tMSswKQYDVQQDEyJEaWdpQ2VydCBIaWdoIEFzc3VyYW5j\nZSBFViBSb290IENBMB4XDTA2MTExMDAwMDAwMFoXDTMxMTExMDAwMDAwMFowbDEL\nMAkGA1UEBhMCVVMxFTATBgNVBAoTDERpZ2lDZXJ0IEluYzEZMBcGA1UECxMQd3d3\nLmRpZ2ljZXJ0LmNvbTErMCkGA1UEAxMiRGlnaUNlcnQgSGlnaCBBc3N1cmFuY2Ug\nRVYgUm9vdCBDQTCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBAMbM5XPm\n+9S75S0tMqbf5YE/yc0lSbZxKsPVlDRnogocsF9ppkCxxLeyj9CYpKlBWTrT3JTW\nPNt0OKRKzE0lgvdKpVMSOO7zSW1xkX5jtqumX8OkhPhPYlG++MXs2ziS4wblCJEM\nxChBVfvLWokVfnHoNb9Ncgk9vjo4UFt3MRuNs8ckRZqnrG0AFFoEt7oT61EKmEFB\nIk5lYYeBQVCmeVyJ3hlKV9Uu5l0cUyx+mM0aBhakaHPQNAQTXKFx01p8VdteZOE3\nhzBWBOURtCmAEvF5OYiiAhF8J2a3iLd48soKqDirCmTCv2ZdlYTBoSUeh10aUAsg\nEsxBu24LUTi4S8sCAwEAAaNjMGEwDgYDVR0PAQH/BAQDAgGGMA8GA1UdEwEB/wQF\nMAMBAf8wHQYDVR0OBBYEFLE+w2kD+L9HAdSYJhoIAu9jZCvDMB8GA1UdIwQYMBaA\nFLE+w2kD+L9HAdSYJhoIAu9jZCvDMA0GCSqGSIb3DQEBBQUAA4IBAQAcGgaX3Nec\nnzyIZgYIVyHbIUf4KmeqvxgydkAQV8GK83rZEWWONfqe/EW1ntlMMUu4kehDLI6z\neM7b41N5cdblIZQB2lWHmiRk9opmzN6cN82oNLFpmyPInngiK3BD41VHMWEZ71jF\nhS9OMPagMRYjyOfiZRYzy78aG6A9+MpeizGLYAiJLQwGXFK3xPkKmNEVX58Svnw2\nYzi9RKR/5CYrCsSXaQ3pjOLAEFe4yHYSkVXySGnYvCoCWw9E1CAx2/S6cCZdkGCe\nvEsXCS+0yx5DaMkHJ8HSXPfqIbloEpw8nL+e/IBcm2PN7EeqJSdnoDfzAIJ9VNep\n+OkuE6N36B9K\n-----END CERTIFICATE-----\n'], 'dhparams': None, 'versions': ['-TLSv1', '-SSLv2', '-SSLv3', '-TLSv1.1', 'TLSv1.2', 'TLSv1.3'], 'acceptable_cas': [], 'tlsext': [{'id': 65281, 'name': 'renegotiation_info'}, {'id': 11, 'name': 'ec_point_formats'}, {'id': 35, 'name': 'session_ticket'}], 'ja3s': 'ccc514751b175866924439bdbb5bba34', 'cert': {'sig_alg': 'sha256WithRSAEncryption', 'issued': '20210627000000Z', 'expires': '20210925235959Z', 'expired': False, 'version': 2, 'extensions': [{'data': '0\\x16\\x80\\x14Qh\\xff\\x90\\xaf\\x02\\x07u<\\xcc\\xd9edb\\xa2\\x12\\xb8Yr;', 'name': 'authorityKeyIdentifier'}, {'data': '\\x04\\x14A\\xdc\\xeaH-\\xcb\\xb2\\xf8J\\xca\\xf4v{C\\r\\x82\\x99\\xc6\\xfbq', 'name': 'subjectKeyIdentifier'}, {'data': '0,\\x82\\x15*.secure.facebook.com\\x82\\x13secure.facebook.com', 'name': 'subjectAltName'}, {'critical': True, 'data': '\\x03\\x02\\x05\\xa0', 'name': 'keyUsage'}, {'data': '0\\x14\\x06\\x08+\\x06\\x01\\x05\\x05\\x07\\x03\\x01\\x06\\x08+\\x06\\x01\\x05\\x05\\x07\\x03\\x02', 'name': 'extendedKeyUsage'}, {'data': '0l04\\xa02\\xa00\\x86.http://crl3.digicert.com/sha2-ha-server-g6.crl04\\xa02\\xa00\\x86.http://crl4.digicert.com/sha2-ha-server-g6.crl', 'name': 'crlDistributionPoints'}, {'data': "0503\\x06\\x06g\\x81\\x0c\\x01\\x02\\x020)0\\'\\x06\\x08+\\x06\\x01\\x05\\x05\\x07\\x02\\x01\\x16\\x1bhttp://www.digicert.com/CPS", 'name': 'certificatePolicies'}, {'data': '0u0$\\x06\\x08+\\x06\\x01\\x05\\x05\\x070\\x01\\x86\\x18http://ocsp.digicert.com0M\\x06\\x08+\\x06\\x01\\x05\\x05\\x070\\x02\\x86Ahttp://cacerts.digicert.com/DigiCertSHA2HighAssuranceServerCA.crt', 'name': 'authorityInfoAccess'}, {'critical': True, 'data': '0\\x00', 'name': 'basicConstraints'}, {'data': '\\x04\\x82\\x01i\\x01g\\x00u\\x00\\xf6\\\\\\x94/\\xd1w0"\\x14T\\x18\\x080\\x94V\\x8e\\xe3M\\x13\\x193\\xbf\\xdf\\x0c/ \\x0b\\xccN\\xf1d\\xe3\\x00\\x00\\x01zN\\x94\\x82>\\x00\\x00\\x04\\x03\\x00F0D\\x02 X\\x84h\\xb6\\xc8\\x10F\\xed\\xcc\\xe8x\\\\P\\xc9;\\xbc\\xcf\\x1cO\\x82\\xa5\\x08u\\xc9\\x96im\\xe1\\xc3a\\x10\\xf3\\x02 \\x02\\\'d\\xe4t2h\\xf9`\\xab\\x1d/\\xf6\\xf8\\x14\\x95\\xdfJ\\xf2\\xdf?\\xdc\\x9d\\xbd5\\xfcb\\xc8Z\\x07\\xb1\\xc2\\x00w\\x00\\\\\\xdcC\\x92\\xfe\\xe6\\xabED\\xb1^\\x9a\\xd4V\\xe6\\x107\\xfb\\xd5\\xfaG\\xdc\\xa1s\\x94\\xb2^\\xe6\\xf6\\xc7\\x0e\\xca\\x00\\x00\\x01zN\\x94\\x82P\\x00\\x00\\x04\\x03\\x00H0F\\x02!\\x00\\xd2\\xb52\\x13\\xdb\\x05\\x90=AT\\xee\\xca\\xfc\\xa0H.\\xa3+\\x9f\\x88h\\xfc\\xab\\x9c=EP^\\x1c\\x7f\\xc0^\\x02!\\x00\\xeb\\x16\\x13\\x7f\\x88\\xd3\\xa2\\xd0\\xc4E\\xc8\\x161<X\\x9c\\xff\\x85\\xd5i\\xcd\\x07E(\\x90Z\\xa7\\xfb=|6\\xd1\\x00u\\x00\\xee\\xc0\\x95\\xee\\x8drd\\x0f\\x92\\xe3\\xc3\\xb9\\x1b\\xc7\\x12\\xa3ij\\t{Kj\\x1a\\x148\\xe6G\\xb2\\xcb\\xed\\xc5\\xf9\\x00\\x00\\x01zN\\x94\\x82\\x83\\x00\\x00\\x04\\x03\\x00F0D\\x02 /\\x8a~\\xec\\xea\\xf8cjb\\xd4\\xb9\\x01y\\xa5\\x97\\x8b\\xa7J\\x94tO\\x05\\x11\\x1dX\\x1f\\xf5#v\\xd9$&\\x02 .t\\xdb\\xe3\\xe6\\x15\\xed\\x1c\\xf6\\x1c\\xba#Z]\\xb2\\xa3\\x88\\x8c\\xfb\\x0b\\x12\\xde\\x10\\xc7b)\\xaf{\\t\\xe7\\xe3\\xf9', 'name': 'ct_precert_scts'}], 'fingerprint': {'sha256': '1e3839fdfad7b0a9098bfe4e2853391a6230357e50c6506e26caecdf85b95c9e', 'sha1': '73f915b5c8218ff445be1de566b5c2d0a15d1f6d'}, 'serial': 17057963169357276840551436714988486922, 'subject': {'C': 'US', 'L': 'Menlo Park', 'CN': '*.secure.facebook.com', 'O': 'Facebook, Inc.', 'ST': 'California'}, 'pubkey': {'type': 'rsa', 'bits': 2048}, 'issuer': {'C': 'US', 'OU': 'www.digicert.com', 'O': 'DigiCert Inc', 'CN': 'DigiCert SHA2 High Assurance Server CA'}}, 'cipher': {'version': 'TLSv1/SSLv3', 'bits': 128, 'name': 'ECDHE-RSA-AES128-GCM-SHA256'}, 'trust': {'revoked': False, 'browser': {'mozilla': True, 'apple': True, 'microsoft': True}}, 'handshake_states': ['before/connect initialization', 'SSLv2/v3 write client hello', 'SSLv2/v3 read server hello', 'SSLv3/TLS read server hello', 'SSLv3/TLS read server certificate', 'SSLv3/TLS read server key exchange', 'SSLv3/TLS read server done', 'SSLv3/TLS write client key exchange', 'SSLv3/TLS write change cipher spec', 'SSLv3/TLS write finished', 'SSLv3/TLS flush data', 'SSLv3/TLS read server session ticket', 'SSLv3/TLS read finished', 'SSL negotiation finished successfully'], 'alpn': [], 'ocsp': {}}, 'hostnames': ['edge-secure-shv-01-nrt1.facebook.com'], 'location': {'city': 'Tokyo', 'region_code': '40', 'area_code': None, 'longitude': 139.69171, 'country_code3': None, 'country_name': 'Japan', 'postal_code': None, 'dma_code': None, 'country_code': 'JP', 'latitude': 35.6895}, 'ip': 520966673, 'domains': ['facebook.com'], 'org': 'Facebook Ireland Ltd', 'data': 'HTTP/1.1 301 Moved Permanently\r\nVary: Accept-Encoding\r\nLocation: https://www.facebook.com/\r\nContent-Type: text/html; charset="utf-8"\r\nX-FB-Debug: ryMLheDS9Y/10RtChKHVKi5BNNNfF4bl3zitEBlNHsseV3TKNfq8YBdEVE64P9DFrlWUOl+KvZm7bi77IoJS6A==\r\nDate: Sun, 15 Aug 2021 19:43:33 GMT\r\nAlt-Svc: h3-29=":443"; ma=3600,h3-27=":443"; ma=3600\r\nConnection: keep-alive\r\nContent-Length: 0\r\n\r\n', 'asn': 'AS32934', 'transport': 'tcp', 'ip_str': '31.13.82.17'}], 'asn': 'AS32934', 'isp': 'Facebook, Inc.', 'longitude': 139.69171, 'country_code3': None, 'domains': ['facebook.com'], 'ip_str': '31.13.82.17', 'os': None, 'ports': [80, 443]}

Sort and return all of nested dictionaries based on specified key value

I am trying to re-arrange the contents of a nested dictionaries where it will check the value of a specified key.
dict_entries = {
'entries': {
'AzP746r3Nl': {
'uniqueID': 'AzP746r3Nl',
'index': 2,
'data': {'comment': 'First Plastique Mat.',
'created': '17/01/19 10:18',
'project': 'EMZ',
'name': 'plastique_varA',
'version': '1'},
'name': 'plastique_varA',
'text': 'plastique test',
'thumbnail': '/Desktop/mat/plastique_varA/plastique_varA.jpg',
'type': 'matEntry'
},
'Q2tch2xm6h': {
'uniqueID': 'Q2tch2xm6h',
'index': 0,
'data': {'comment': 'Camino from John Inds.',
'created': '03/01/19 12:08',
'project': 'EMZ',
'name': 'camino_H10a',
'version': '1'},
'name': 'camino_H10a',
'text': 'John Inds : Camino',
'thumbnail': '/Desktop/chips/camino_H10a/camino_H10a.jpg',
'type': 'ChipEntry'
},
'ZeqCFCmHqp': {
'uniqueID': 'ZeqCFCmHqp',
'index': 1,
'data': {'comment': 'Prototype Bleu.',
'created': '03/01/19 14:07',
'project': 'EMZ',
'name': 'bleu_P23y',
'version': '1'},
'name': 'bleu_P23y',
'text': 'Bleu : Prototype',
'thumbnail': '/Desktop/chips/bleu_P23y/bleu_P23y.jpg',
'type': 'ChipEntry'
}
}
}
In my above nested dictionary example, I am trying to check it by the name and created key (2 functions each) and once it has been sorted, the index value will be updated accordingly as well...
Even so, I am able to query for the values of the said key(s):
for item in dict_entries.get('entries').values():
#The key that I am targetting at
tar_key = item['name']
but this is returning me the value of the name key and I am unsure on my next step as I am trying to sort by the value of the name key and capturing + re-arranging all the contents of the nested dictionaries.
This is my desired output (if checking by name):
{'entries': {
'ZeqCFCmHqp': {
'uniqueID': 'ZeqCFCmHqp',
'index': 1,
'data': {'comment': 'Prototype Bleu.',
'created': '03/01/19 14:07',
'project': 'EMZ',
'name': 'bleu_P23y',
'version': '1'},
'name': 'bleu_P23y',
'text': 'Bleu : Prototype',
'thumbnail': '/Desktop/chips/bleu_P23y/bleu_P23y.jpg',
'type': 'ChipEntry'
}
'Q2tch2xm6h': {
'uniqueID': 'Q2tch2xm6h',
'index': 0,
'data': {'comment': 'Camino from John Inds.',
'created': '03/01/19 12:08',
'project': 'EMZ',
'name': 'camino_H10a',
'version': '1'},
'name': 'camino_H10a',
'text': 'John Inds : Camino',
'thumbnail': '/Desktop/chips/camino_H10a/camino_H10a.jpg',
'type': 'ChipEntry'
},
'AzP746r3Nl': {
'uniqueID': 'AzP746r3Nl',
'index': 2,
'data': {'comment': 'First Plastique Mat.',
'created': '17/01/19 10:18',
'project': 'EMZ',
'name': 'plastique_varA',
'version': '1'},
'name': 'plastique_varA',
'text': 'plastique test',
'thumbnail': '/Desktop/mat/plastique_varA/plastique_varA.jpg',
'type': 'matEntry'
}
}
}

Mongo Distinct Query with full row object

first of all i'm new to mongo so I don't know much and i cannot just remove duplicate rows due to some dependencies.
I have following data stored in mongo
{'id': 1, 'key': 'qscderftgbvqscderftgbvqscderftgbvqscderftgbvqscderftgbv', 'name': 'some name', 'country': 'US'},
{'id': 2, 'key': 'qscderftgbvqscderftgbvqscderftgbvqscderftgbvqscderftgbv', 'name': 'some name', 'country': 'US'},
{'id': 3, 'key': 'pehnvosjijipehnvosjijipehnvosjijipehnvosjijipehnvosjiji', 'name': 'some name', 'country': 'IN'},
{'id': 4, 'key': 'pfvvjwovnewpfvvjwovnewpfvvjwovnewpfvvjwovnewpfvvjwovnew', 'name': 'some name', 'country': 'IN'},
{'id': 5, 'key': 'pfvvjwovnewpfvvjwovnewpfvvjwovnewpfvvjwovnewpfvvjwovnew', 'name': 'some name', 'country': 'IN'}
you can see some of the rows are duplicate with different id
as long as it will take to solve this issue from input I must tackle it on output.
I need the data in the following way:
{'id': 1, 'key': 'qscderftgbvqscderftgbvqscderftgbvqscderftgbvqscderftgbv', 'name': 'some name', 'country': 'US'},
{'id': 3, 'key': 'pehnvosjijipehnvosjijipehnvosjijipehnvosjijipehnvosjiji', 'name': 'some name', 'country': 'IN'},
{'id': 4, 'key': 'pfvvjwovnewpfvvjwovnewpfvvjwovnewpfvvjwovnewpfvvjwovnew', 'name': 'some name', 'country': 'IN'}
My query
keys = db.collection.distinct('key', {})
all_data = db.collection.find({'key': {$in: keys}})
As you can see it takes two queries for a same result set Please combine it to one as the database is very large
I might also create a unique key on the key but the value is so long (152 characters) that it will not help me.
Or it will??
You need to use the aggregation framework for this. There are multiple ways to do this, the solution below uses the $$ROOT variable to get the first document for each group:
db.data.aggregate([{
"$sort": {
"_id": 1
}
}, {
"$group": {
"_id": "$key",
"first": {
"$first": "$$ROOT"
}
}
}, {
"$project": {
"_id": 0,
"id":"$first.id",
"key":"$first.key",
"name":"$first.name",
"country":"$first.country"
}
}])

Python - Iterating lists and dictionaries [closed]

Closed. This question does not meet Stack Overflow guidelines. It is not currently accepting answers.
This question appears to be off-topic because it lacks sufficient information to diagnose the problem. Describe your problem in more detail or include a minimal example in the question itself.
Closed 8 years ago.
Improve this question
Hi I'm reposting this question but providing more information about what I'm trying to achieve. Its been driving me crazy for the last few days and I can't seem to make a progress. Basically, I have this data structure:
data_in =\
{'map': {'command_line': u'command goes here',
'scaninfo': {u'tcp': {'method': u'syn', 'services': u'80,443'}},
'stats': {'downhosts': u'0',
'elapsed': u'1.71',
'timestr': u'Thu Mar 20 18:18:09 2014',
'totalhosts': u'3',
'uphosts': u'3'}},
'scan': {u'2a00:2384:0:208f::13': {'addresses': {u'ipv6': u'2a00:2384:0:f467::13',
u'mac': u'00:gf:88:9:56:D5'},
'hostname': u'static.abc.com',
'status': {'reason': u'nd-response',
'state': u'up'},
u'tcp': {80: {'conf': u'3',
'cpe': '',
'extrainfo': '',
'name': u'http',
'product': '',
'reason': u'syn-ack',
'state': u'open',
'version': ''},
443: {'conf': u'3',
'cpe': '',
'extrainfo': '',
'name': u'https',
'product': '',
'reason': u'syn-ack',
'script': {u'ssl-cert': u'place holder'},
'state': u'open',
'version': ''}},
'vendor': {u'00:0C:29:7C:13:D3': u'VMware'}},
u'2a00:2384:0:208f::15': {'addresses': {u'ipv6': u'a848:2384:0:3456::15',
u'mac': u'00:gf:29:99:6D:96'},
'hostname': u'static.xyz.com',
'status': {'reason': u'nd-response',
'state': u'up'},
u'tcp': {80: {'conf': u'3',
'cpe': '',
'extrainfo': '',
'name': u'http',
'product': '',
'reason': u'syn-ack',
'state': u'open',
'version': ''},
443: {'conf': u'3',
'cpe': '',
'extrainfo': '',
'name': u'https',
'product': '',
'reason': u'syn-ack',
'script': {u'ssl-cert': u'place holder'},
'state': u'open',
'version': ''}},
'vendor': {u'00:0C:67:99:6f:96': u'VMware'}},
u'2a00:2384:0:208f::16': {'addresses': {u'ipv6': u'8938:8584:0:8685::16',
u'mac': u'00:54:29:fg:55:0F'},
'hostname': u'static.edf.com',
'status': {'reason': u'nd-response',
'state': u'up'},
u'tcp': {80: {'conf': u'3',
'cpe': '',
'extrainfo': '',
'name': u'http',
'product': '',
'reason': u'syn-ack',
'state': u'open',
'version': ''},
443: {'conf': u'3',
'cpe': '',
'extrainfo': '',
'name': u'https',
'product': '',
'reason': u'syn-ack',
'script': {u'ssl-cert': u'place holder'},
'state': u'open',
'version': ''}},
'vendor': {u'00:0C:55:AE:33:ff': u'VMware'}}}}
And need to create a simplified version of it that looks like this:
data_out =\
[{'address': u'2a00:2384:0:208f::13',
'hostname': u'static.bt.com',
'ports': [{80: {'reason': u'syn-ack', 'state': u'open'}},
{443: {'reason': u'syn-ack',
'ssl_cert': u'place holder',
'state': u'open'}}]}]
As per previous advice from #jonrsharpe I've created a helper function that enables me to find keys. This has proved helpful, but I still struggling to get the desired results.
def find_key(data, search_key, out=None):
"""Find all values from a nested dictionary for a given key."""
if out is None:
out = []
if isinstance(data, dict):
if search_key in data:
out.append(data[search_key])
for key in data:
find_key(data[key], search_key, out)
return out
Any help would be really appreciated here!
This isn't all that hard; you just have to go through and look at the data structure that leads to what you want - which is made much harder by poor formatting, so I re-indented your input and marked the keys (<==) and fields (!!!) you are seeking:
data_in = {
'map': {
'stats': {
'uphosts': u'3',
'timestr': u'Thu Mar 20 18:18:09 2014',
'downhosts': u'0',
'totalhosts': u'3',
'elapsed': u'1.71'
},
'scaninfo': {
u'tcp': {
'services': u'80,443',
'method': u'syn'
}
},
'command_line': u'command goes here'
},
'scan': { # <==
u'2a00:2384:0:208f::13': { # <== !!!
'status': {
'state': u'up',
'reason': u'nd-response'
},
'hostname': u'static.abc.com', # !!!
'vendor': {
u'00:0C:29:7C:13:D3': u'VMware'
},
'addresses': {
u'mac': u'00:gf:88:9:56:D5',
u'ipv6': u'2a00:2384:0:f467::13'
},
u'tcp': { # <==
80: { # <== !!!
'product': '',
'state': u'open', # !!!
'version': '',
'name': u'http',
'conf': u'3',
'extrainfo': '',
'reason': u'syn-ack', # !!!
'cpe': ''
},
443: { # <== !!!
'product': '',
'state': u'open', # !!!
'version': '',
'name': u'https',
'conf': u'3',
'script': { # <==
u'ssl-cert': u'place holder' # !!!
},
'extrainfo': '',
'reason': u'syn-ack', # !!!
'cpe': ''
}
}
},
u'2a00:2384:0:208f::15': {
'status': {
'state': u'up',
'reason': u'nd-response'
},
'hostname': u'static.xyz.com',
'vendor': {
u'00:0C:67:99:6f:96': u'VMware'
},
'addresses': {
u'mac': u'00:gf:29:99:6D:96',
u'ipv6': u'a848:2384:0:3456::15'
},
u'tcp': {
80: {
'product': '',
'state': u'open',
'version': '',
'name': u'http',
'conf': u'3',
'extrainfo': '',
'reason': u'syn-ack',
'cpe': ''
},
443: {
'product': '',
'state': u'open',
'version': '',
'name': u'https',
'conf': u'3',
'script': {
u'ssl-cert': u'place holder'
},
'extrainfo': '',
'reason': u'syn-ack',
'cpe': ''
}
}
},
u'2a00:2384:0:208f::16': {
'status': {
'state': u'up',
'reason': u'nd-response'
},
'hostname': u'static.edf.com',
'vendor': {
u'00:0C:55:AE:33:ff': u'VMware'
},
'addresses': {
u'mac': u'00:54:29:fg:55:0F',
u'ipv6': u'8938:8584:0:8685::16'
},
u'tcp': {
80: {
'product': '',
'state': u'open',
'version': '',
'name': u'http',
'conf': u'3',
'extrainfo': '',
'reason': u'syn-ack',
'cpe': ''
},
443: {
'product': '',
'state': u'open',
'version': '',
'name': u'https',
'conf': u'3',
'script': {
u'ssl-cert': u'place holder'
},
'extrainfo': '',
'reason': u'syn-ack',
'cpe': ''
}
}
}
}
}
and likewise for your desired output (with appropriate adjustments):
data_out = [
{
'address': u'2a00:2384:0:208f::13',
'hostname': u'static.bt.com',
'ports': {
80: {
'state': u'open',
'reason': u'syn-ack'
},
443: {
'ssl_cert': u'place holder',
'state': u'open',
'reason': u'syn-ack'
}
}
}
]
then the extraction becomes:
def remap_port(port, port_data):
result = {
"state": port_data["state"],
"reason": port_data["reason"]
}
try:
result["ssl_cert"] = port_data["script"]["ssl-cert"]
except KeyError:
pass
return port, result
def remap_scanned_address(address, address_data):
return {
"address": address,
"hostname": address_data["hostname"],
"ports": dict(remap_port(port, port_data) for port,port_data in address_data["tcp"].items())
}
def remap_scan_data(data_in):
return [remap_scanned_address(address, address_data) for address, address_data in data_in["scan"].items()]
data_out = remap_scan_data(data_in)
which results in the desired output,
[{'address': u'2a00:2384:0:208f::13',
'hostname': u'static.abc.com',
'ports': {80: {'reason': u'syn-ack', 'state': u'open'},
443: {'reason': u'syn-ack',
'ssl_cert': u'place holder',
'state': u'open'}}},
{'address': u'2a00:2384:0:208f::15',
'hostname': u'static.xyz.com',
'ports': {80: {'reason': u'syn-ack', 'state': u'open'},
443: {'reason': u'syn-ack',
'ssl_cert': u'place holder',
'state': u'open'}}},
{'address': u'2a00:2384:0:208f::16',
'hostname': u'static.edf.com',
'ports': {80: {'reason': u'syn-ack', 'state': u'open'},
443: {'reason': u'syn-ack',
'ssl_cert': u'place holder',
'state': u'open'}}}]

Categories