In python3 I need to get a JSON response from an API call,
and parse it so I will get a dictionary That only contains the data I need.
The final dictionary I ecxpt to get is as follows:
{'Severity Rules': ('cc55c459-eb1a-11e8-9db4-0669bdfa776e', ['cc637182-eb1a-11e8-9db4-0669bdfa776e']), 'auto_collector': ('57e9a4ec-21f7-4e0e-88da-f0f1fda4c9d1', ['0ab2470a-451e-11eb-8856-06364196e782'])}
the JSON response returns the following output:
{
'RuleGroups': [{
'Id': 'cc55c459-eb1a-11e8-9db4-0669bdfa776e',
'Name': 'Severity Rules',
'Order': 1,
'Enabled': True,
'Rules': [{
'Id': 'cc637182-eb1a-11e8-9db4-0669bdfa776e',
'Name': 'Severity Rule',
'Description': 'Look for default severity text',
'Enabled': False,
'RuleMatchers': None,
'Rule': '\\b(?P<severity>DEBUG|TRACE|INFO|WARN|ERROR|FATAL|EXCEPTION|[I|i]nfo|[W|w]arn|[E|e]rror|[E|e]xception)\\b',
'SourceField': 'text',
'DestinationField': 'text',
'ReplaceNewVal': '',
'Type': 'extract',
'Order': 21520,
'KeepBlockedLogs': False
}],
'Type': 'user'
}, {
'Id': '4f6fa7c6-d60f-49cd-8c3d-02dcdff6e54c',
'Name': 'auto_collector',
'Order': 4,
'Enabled': True,
'Rules': [{
'Id': '2d6bdc1d-4064-11eb-8856-06364196e782',
'Name': 'auto_collector',
'Description': 'DO NOT CHANGE!! Created via API coralogix-blocker tool',
'Enabled': False,
'RuleMatchers': None,
'Rule': 'AUTODISABLED',
'SourceField': 'subsystemName',
'DestinationField': 'subsystemName',
'ReplaceNewVal': '',
'Type': 'block',
'Order': 1,
'KeepBlockedLogs': False
}],
'Type': 'user'
}]
}
I was able to create a dictionary that contains the name and the RuleGroupsID, like that:
response = requests.get(url,headers=headers)
output = response.json()
outputlist=(output["RuleGroups"])
groupRuleName = [li['Name'] for li in outputlist]
groupRuleID = [li['Id'] for li in outputlist]
# Create a dictionary of NAME + ID
ruleDic = {}
for key in groupRuleName:
for value in groupRuleID:
ruleDic[key] = value
groupRuleID.remove(value)
break
Which gave me a simple dictionary:
{'Severity Rules': 'cc55c459-eb1a-11e8-9db4-0669bdfa776e', 'Rewrites': 'ddbaa27e-1747-11e9-9db4-0669bdfa776e', 'Extract': '0cb937b6-2354-d23a-5806-4559b1f1e540', 'auto_collector': '4f6fa7c6-d60f-49cd-8c3d-02dcdff6e54c'}
but when I tried to parse it as nested JSON things just didn't work.
In the end, I managed to create a function that returns this dictionary,
I'm doing it by breaking the JSON into 3 lists by the needed elements (which are Name, Id, and Rules from the first nest), and then create another list from the nested JSON ( which listed everything under Rule) which only create a list from the keyword "Id".
Finally creating a dictionary using a zip command on the lists and dictionaries created earlier.
def get_filtered_rules() -> List[dict]:
groupRuleName = [li['Name'] for li in outputlist]
groupRuleID = [li['Id'] for li in outputlist]
ruleIDList = [li['Rules'] for li in outputlist]
ruleIDListClean = []
ruleClean = []
for sublist in ruleIDList:
try:
lstRule = [item['Rule'] for item in sublist]
ruleClean.append(lstRule)
ruleContent=list(zip(groupRuleName, ruleClean))
ruleContentDictionary = dict(ruleContent)
lstID = [item['Id'] for item in sublist]
ruleIDListClean.append(lstID)
# Create a dictionary of NAME + ID + RuleID
ruleDic = dict(zip(groupRuleName, zip(groupRuleID, ruleIDListClean)))
except Exception as e: print(e)
return ruleDic
I want to perform searches on values in a list of nested dictionaries and return another key:value pairs. These dictionaries are metafiles. Basically I want to search the ID of each dictionary, find all dictionaries that have the same ID, and return the file location (key:value) pairs.
metafile = [{'metadata':{'Title':'The Sun Also Rises', 'ID': 'BAY121-F1164EAB499'}, 'content': 'xyz', 'File_Path': 'file_location1'},
{'metadata':{'Title':'Of Mice and Men', 'ID': '499B0BAB#dfg'}, 'content': 'abc', 'File_Path': 'file_location2'},
{'metadata':{'Title':'The Sun Also Rises Review', 'ID': 'BAY121-F1164EAB499'}, 'content': 'ftw', 'File_Path': 'file_location3'}]
I created a loop to perform my search as follows. It returns an empty list though, how should I modify this so that the file paths are returned?
search_ID = 'BAY121-F1164EAB499'
path =[]
for a in metafile:
for val in a['metadata']['ID']:
if search_ID == val:
path.append(a['File_Path'])
you don't need an inner loop for this:
correct code
search_ID = 'BAY121-F1164EAB499'
path =[]
for a in metafile:
#a['metadata']['ID'] already gives you the value of ID
if search_ID == a['metadata']['ID']:
path.append(a['File_Path'])
output
['file_location1', 'file_location3']
You don't need to iterate through a['metadata']['ID'], instead just access them directly. So the modified code would be
metafile = [{'metadata':{'Title':'The Sun Also Rises', 'ID': 'BAY121-
F1164EAB499'}, 'content': 'xyz', 'File_Path': 'file_location1'},
{'metadata':{'Title':'Of Mice and Men', 'ID': '499B0BAB#dfg'}, 'content': 'abc',
'File_Path': 'file_location2'},
{'metadata':{'Title':'The Sun Also Rises Review', 'ID': 'BAY121-F1164EAB499'},
'content': 'ftw', 'File_Path': 'file_location3'}]
search_ID = 'BAY121-F1164EAB499'
path =[]
for a in metafile:
if a["metadata"]["ID"] == search_ID:
path.append(a['File_Path'])
I have a csv file and Im trying to create a nested dictionary that looks like this:
contacts = {"Tom": {"name": "Tom Techie",
"phone": "123 123546",
"email": "tom#tom.fi",
"skype": "skypenick"},
"Mike": {"name": "Mike Mechanic",
"phone": "000 123546",
"email": "mike#mike.fi",
"skype": "-Mike-M-"}}
etc
And this is what I have written:
file = open("csv","r")
d = {}
for i in file:
f = i.strip()
x = f.split(";")
if x[4] != "":
d.update({x[0] : {"name":x[1],
"phone":x[2],
"email":x[3],
"skype":x[4]}})
else:
d.update ({x[0] : {"name": x[1],
"phone": x[2],
"email": x[3]}})
However it prints the dict as a normal dictionary with the updates as keys when they should be like stated above.
EDIT:
First lines of the csv:
key;name;phone;email;skype
Tom;Tom Techie;123 123546;tom#tom.fi;skypenick
Mike;Mike Mechanic;000 123456;mike#mike.fi;-Mike-M-
Archie;Archie Architect;050 987654;archie#archie
You can use pd.read_csv() and to_dict():
import pandas as pd
contacts = pd.read_csv('test.csv', sep=';').set_index('key').to_dict(orient='index')
Yields:
{'Tom': {'name': 'Tom Techie', 'phone': '123 123546', 'email': 'tom#tom.fi', 'skype': 'skypenick'}, 'Mike': {'name': 'Mike Mechanic', 'phone': '000 123456', 'email': 'mike#mike.fi', 'skype': '-Mike-M-'}, 'Archie': {'name': 'Archie Architect', 'phone': '050 987654', 'email': 'archie#archie', 'skype': nan}}
I like the pandas answer, but if you don't want a 3rd party library, use the built-in csv module:
import csv
from pprint import pprint
D = {}
with open('csv',newline='') as f:
r = csv.DictReader(f,delimiter=';')
for line in r:
name = line['key']
del line['key']
D[name] = dict(line)
pprint(D)
Output:
{'Archie': {'email': 'archie#archie',
'name': 'Archie Architect',
'phone': '050 987654',
'skype': None},
'Mike': {'email': 'mike#mike.fi',
'name': 'Mike Mechanic',
'phone': '000 123456',
'skype': '-Mike-M-'},
'Tom': {'email': 'tom#tom.fi',
'name': 'Tom Techie',
'phone': '123 123546',
'skype': 'skypenick'}}
You can use zip() to achieve your goal:
file = """key;name;phone;email;skype
Tom;Tom Techie;123 123546;tom#tom.fi;skypenick
Mike;Mike Mechanic;000 123456;mike#mike.fi;-Mike-M-
Archie;Archie Architect;050 987654;archie#archie""".splitlines()
d = {}
h = None
for i in file: # works the same for your csv-file
# first row == header, store in h
if h is None:
h = i.strip().split(";")[1:]
continue # done for first row
x = i.strip().split(";")
# zip pairs the read in line with the header line to get tuples
# wich are fed into the dict constructor that creates the inner dict
d[x[0]] = dict(zip(h,x[1:]+[""])) # no default for skype
# use this instead if you want the skype key always present with empty default
# d[x[0]] = dict(zip(h,x[1:]+[""]))
print(d)
zip() discards the elements of the longer list - you won't need any checks for that.
Output:
{'Tom': {'name': 'Tom Techie', 'phone': '123 123546',
'email': 'tom#tom.fi', 'skype': 'skypenick'},
'Mike': {'name': 'Mike Mechanic', 'phone': '000 123456',
'email': 'mike#mike.fi', 'skype': '-Mike-M-'},
'Archie': {'name': 'Archie Architect', 'phone': '050 987654',
'email': 'archie#archie'}}
If you use the commented line, the data will get a default value of '' for the skype - works only b/c skype is the last element of the splitted line
You can use a dict comprehension! Assuming the data is something like
with open("df.csv", "r") as file:
d = {x.split(";")[0]:{
"name": x.split(";")[2],
"phone": x.split(";")[3],
"email": x.split(";")[1],
"skype": x.split(";")[4][:-1] # Slice off trailing newline
} for x in file}
d.pop("")
We want to open files using with whenever possible to benefit from Python's context management. See https://www.python.org/dev/peps/pep-0343/ for fundamental understanding of the with statement.
Since the key "" only appears once at the head of the csv, we can pop it at the end and avoid performing a comparison at every iteration. A dict comprehension accomplishes the same thing you wanted to achieve with d.update.
More about comprehensions:
https://docs.python.org/3/tutorial/datastructures.html#list-comprehensions
Edit: refactoring to remove the repetitive calls to .split can look something like this:
def line_to_dict(x, d):
x = x.split(";")
d[x[0]] = {
"name": x[2],
"phone": x[3],
"email": x[1],
"skype": x[4][:-1] # Slice off trailing newline
}
with open("df.csv", "r") as file:
d = {}
for x in file:
line_to_dict(x, d)
d.pop("")
I am looping through an API to retrieve data for multiple ICO tokens. Now, I would like to save the data to a csv with variables in columns and 1 row for each ICO token. The basic code works, I have 2 problems:
- entries are written only in every second line, which is quite unpractical. How can I specify not to leave rows blank?
- the variable price is a list itself and thus saved in as a single item (with > 1 variables inside). How can I decompose the list to write one variable per column?
See my code here:
ICO_Wallet = '0xe8ff5c9c75deb346acac493c463c8950be03dfba',
'0x7654915a1b82d6d2d0afc37c52af556ea8983c7e',
'0x4DF812F6064def1e5e029f1ca858777CC98D2D81'
for index, Wallet in enumerate(ICO_Wallet) :
Name = ICO_name[index]
Number = ICO_No[index]
try:
URL = 'http://api.ethplorer.io/getTokenInfo/' + Wallet + '?apiKey=freekey'
except:
print(Wallet)
json_obj = urlopen(URL)
data = json.load(json_obj)
with open('token_data_test.csv','a') as f:
w = csv.writer(f, delimiter=";")
w.writerow(data.values())
time.sleep(1)
Sample output:
data Out[59]:
{'address': '0x8a854288a5976036a725879164ca3e91d30c6a1b',
'countOps': 24207,
'decimals': '18',
'ethTransfersCount': 0,
'holdersCount': 10005,
'issuancesCount': 0,
'lastUpdated': 1542599890,
'name': 'GET',
'owner': '0x9a417e4db28778b6d9a4f42a5d7d01252a3af849',
'price': {'availableSupply': '11388258.0',
'currency': 'USD',
'diff': -20.71,
'diff30d': -14.155971452386,
'diff7d': -22.52,
'marketCapUsd': '2814942.0',
'rate': '0.2471792958',
'ts': '1542641433',
'volume24h': '2371.62380719'},
'symbol': 'GET',
'totalSupply': '33368773400000170376363910',
'transfersCount': 24207}
As mentioned, it's an easy fix for the first problem, just modify the csv.writer line like this:
w = csv.writer(f, delimiter=";", lineterminator='\n')
For your second problem, you can flatten your json before passing into csv:
for k, v in data.pop('price').items():
data['price_{}'.format(k)] = v
This changes all items under price into price_itemname as a flattened key. The .pop() method also helps remove the 'price' key at the same time.
Result:
{'address': '0x8a854288a5976036a725879164ca3e91d30c6a1b',
'countOps': 24207,
'decimals': '18',
'ethTransfersCount': 0,
'holdersCount': 10005,
'issuancesCount': 0,
'lastUpdated': 1542599890,
'name': 'GET',
'owner': '0x9a417e4db28778b6d9a4f42a5d7d01252a3af849',
'price_availableSupply': '11388258.0',
'price_currency': 'USD',
'price_diff': -20.71,
'price_diff30d': -14.155971452386,
'price_diff7d': -22.52,
'price_marketCapUsd': '2814942.0',
'price_rate': '0.2471792958',
'price_ts': '1542641433',
'price_volume24h': '2371.62380719',
'symbol': 'GET',
'totalSupply': '33368773400000170376363910',
'transfersCount': 24207}
Now you can just pass that into your csv.writer().
I have a CSV files which has a header like this:
cpus/0/compatible clocks/HSE/compatible ../frequency memories/flash/compatible ../address ../size [and so on...]
I'm able to parse that header into a nested dictionaries which may look like this:
{'clocks': {'HSE': {'compatible': '[1]',
'frequency': '[2]'}},
'cpus': {'0': {'compatible': '[0]'}},
'memories': {'bkpsram': {'address': '[13]',
'compatible': '[12]',
'size': '[14]'},
'ccm': {'address': '[7]',
'compatible': '[6]',
'size': '[8]'},
'flash': {'address': '[4]',
'compatible': '[3]',
'size': '[5]'},
'sram': {'address': '[10]',
'compatible': '[9]',
'size': '[11]'}},
'pin-controller': {'GPIOA': {'enabled': '[16]'},
'GPIOB': {'enabled': '[17]'},
'GPIOC': {'enabled': '[18]'},
'GPIOD': {'enabled': '[19]'},
'GPIOE': {'enabled': '[20]'},
'GPIOF': {'enabled': '[21]'},
'GPIOG': {'enabled': '[22]'},
'GPIOH': {'enabled': '[23]'},
'GPIOI': {'enabled': '[24]'},
'GPIOJ': {'enabled': '[25]'},
'GPIOK': {'enabled': '[26]'},
'compatible': '[15]'}}
(it is a dict object, printed with pprint())
The values of keys which look like '[<number>]' reflect the index of column in the CSV file from which the data should be loaded.
As I mainly use C/C++ I would actually love to have pointers/references in Python, as then I would just put a pointer to a list element in each value and for each row I could modify list contents, but I think there's no way to obtain such behaviour easily in Python.
So now I plan to dump this dictionary into a string and perform following 3 modifications in a row:
replace { with {{,
replace } with }},
replace '[<number>]' with {<number>}.
After that I will be able to "load" the data with something like this ast.literal_eval(dictAsStr.format(*rowFromCsv)), but it seems like a waste of time to convert the whole dict to a string and then back to a dict...
Am I missing some other obvious solution here? The format of the CSV and the way I load the header is not fixed, I may alter that easily, but I would really like a solution which would not boil down to "visit each key recursively and load appropriate value from current row manually".
From the CSV file I load each row as a list of strings, for example:
['["ARM,Cortex-M4", "ARM,ARMv7-M"]',
'["ST,STM32-HSE", "fixed-clock"]',
'0',
'["on-chip-flash"]',
'0x8000000',
'131072',
'',
'',
'',
'["on-chip-ram"]',
'0x20000000',
'65536',
'',
'',
'',
'["ST,STM32-GPIOv2-pin-controller"]',
'False',
'False',
'False',
'',
'',
'',
'',
'False',
'',
'',
'']
Now I would like to insert the values from each loaded row (list of strings) into appropriate keys in the nested dictionary, so following with the examples above I would like to get:
{'clocks': {'HSE': {'compatible': '["ST,STM32-HSE", "fixed-clock"]',
'frequency': '0'}},
'cpus': {'0': {'compatible': '["ARM,Cortex-M4", "ARM,ARMv7-M"]'}},
'memories': {'bkpsram': {'address': '',
'compatible': '',
'size': ''},
'ccm': {'address': '',
'compatible': '',
'size': ''},
'flash': {'address': '0x8000000',
'compatible': '["on-chip-flash"]',
'size': '131072'},
'sram': {'address': '0x20000000',
'compatible': '["on-chip-ram"]',
'size': '65536'}},
'pin-controller': {'GPIOA': {'enabled': 'False'},
'GPIOB': {'enabled': 'False'},
'GPIOC': {'enabled': 'False'},
'GPIOD': {'enabled': ''},
'GPIOE': {'enabled': ''},
'GPIOF': {'enabled': ''},
'GPIOG': {'enabled': ''},
'GPIOH': {'enabled': 'False'},
'GPIOI': {'enabled': ''},
'GPIOJ': {'enabled': ''},
'GPIOK': {'enabled': ''},
'compatible': '["ST,STM32-GPIOv2-pin-controller"]'}}
For completeness, here are a few first lines from the CSV file I would like to load. The first column is not part of the dictionary presented above, as it is used for indexing.
chip,cpus/0/compatible,clocks/HSE/compatible,../frequency,memories/flash/compatible,../address,../size,memories/ccm/compatible,../address,../size,memories/sram/compatible,../address,../size,memories/bkpsram/compatible,../address,../size,pin-controller/compatible,pin-controller/GPIOA/enabled,pin-controller/GPIOB/enabled,pin-controller/GPIOC/enabled,pin-controller/GPIOD/enabled,pin-controller/GPIOE/enabled,pin-controller/GPIOF/enabled,pin-controller/GPIOG/enabled,pin-controller/GPIOH/enabled,pin-controller/GPIOI/enabled,pin-controller/GPIOJ/enabled,pin-controller/GPIOK/enabled
STM32F401CB,"[""ARM,Cortex-M4"", ""ARM,ARMv7-M""]","[""ST,STM32-HSE"", ""fixed-clock""]",0,"[""on-chip-flash""]",0x8000000,131072,,,,"[""on-chip-ram""]",0x20000000,65536,,,,"[""ST,STM32-GPIOv2-pin-controller""]",False,False,False,,,,,False,,,
STM32F401CC,"[""ARM,Cortex-M4"", ""ARM,ARMv7-M""]","[""ST,STM32-HSE"", ""fixed-clock""]",0,"[""on-chip-flash""]",0x8000000,262144,,,,"[""on-chip-ram""]",0x20000000,65536,,,,"[""ST,STM32-GPIOv2-pin-controller""]",False,False,False,,,,,False,,,
STM32F401CD,"[""ARM,Cortex-M4"", ""ARM,ARMv7-M""]","[""ST,STM32-HSE"", ""fixed-clock""]",0,"[""on-chip-flash""]",0x8000000,393216,,,,"[""on-chip-ram""]",0x20000000,98304,,,,"[""ST,STM32-GPIOv2-pin-controller""]",False,False,False,,,,,False,,,
The code used to parse the header:
import csv
with open("some-path-to-CSV-file") as csvFile:
csvReader = csv.reader(csvFile)
header = next(csvReader)
previousKeyElements = header[1].split('/')
dictionary = {}
for index, key in enumerate(header[1:]):
keyElements = key.split('/')
i = 0
while keyElements[i] == '..':
i += 1
keyElements[0:i] = previousKeyElements[0:-i]
previousKeyElements = keyElements
node = dictionary
for keyElement in keyElements[:-1]:
node = node.setdefault(keyElement, {})
node[keyElements[-1]] = '[{}]'.format(index)
What about just using the actual row index (as integer) as value in the "parsed" header, ie:
{'clocks': {'HSE': {'compatible': 1,
'frequency': 2}},
# etc
Then using recursion on a parsed header copy to populate it from the row values ?:
import csv
import sys
import copy
import pprint
def parse_header(header):
previousKeyElements = header[1].split('/')
dictionary = {}
for index, key in enumerate(header[1:]):
keyElements = key.split('/')
i = 0
while keyElements[i] == '..':
i += 1
keyElements[0:i] = previousKeyElements[0:-i]
previousKeyElements = keyElements
node = dictionary
for keyElement in keyElements[:-1]:
node = node.setdefault(keyElement, {})
node[keyElements[-1]] = index
return dictionary
def _rparse(d, k, v, row):
if isinstance(v, dict):
for subk, subv in v.items():
_rparse(v, subk, subv, row)
elif isinstance(v, int):
d[k] = row[v]
else:
raise ValueError("'v' should be either a dict or an int (got : %s(%s))" % (type(v), v))
def parse_row(header, row):
struct = copy.deepcopy(header)
for k, v in struct.items():
_rparse(struct, k, v, row)
return struct
def main(*args):
path = args[0]
with open(path) as f:
reader = csv.reader(f)
header = parse_header(next(reader))
results = [parse_row(header, row[1:]) for row in reader]
pprint.pprint(results)
if __name__ == "__main__":
main(*sys.argv[1:])
Another solution (that might actually be faster) would be to build a reverse mapping with row indices as keys and dict "path" as values ie:
{0: ("cpus", "0", "compatible"),
1: ("clocks", "HSE", "compatible"),
2: ("clocks", "HSE", "frequency"),
# etc
}
and then:
def parse_row(template, map, row):
# 'template' is your parsed header dict
struct = copy.deepcopy(template)
target = struct
for index, path in map.items():
for key in path[:-1]:
target = target[key]
target[key[-1] = row[index]
Oh and yes, as an added bonus, you may want to use ast.literal_eval() to turn your values into proper python types:
>>> import ast
>>> ast.literal_eval("False")
False
>>> ast.literal_eval('["on-chip-flash"]')
['on-chip-flash']
>>> ast.literal_eval('0x8000000')
134217728
>>> ast.literal_eval('["ARM,Cortex-M4", "ARM,ARMv7-M"]')
['ARM,Cortex-M4', 'ARM,ARMv7-M']
>>> ast.literal_eval("this should fail")
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/usr/lib/python2.7/ast.py", line 49, in literal_eval
node_or_string = parse(node_or_string, mode='eval')
File "/usr/lib/python2.7/ast.py", line 37, in parse
return compile(source, filename, mode, PyCF_ONLY_AST)
File "<unknown>", line 1
this should fail
^
SyntaxError: invalid syntax
>>> def to_python(value):
... try:
... return ast.literal_eval(value)
... except Exception as e:
... return value
...
>>> to_python('["on-chip-flash"]')
['on-chip-flash']
>>> to_python('wtf')
'wtf'
>>>