Python version 2.7.10
I have this script (which grabs AWS EBS volume meta data) which currently generates a key=value pair data which is CSV (comma separated value) output per line.
Python script:
#!/usr/bin/python
#Do `sudo pip install boto3` first
import boto3
import json
def generate(key, value):
"""
Creates a nicely formatted Key(Value) item for output
"""
return '{}={}'.format(key, value)
#if isinstance(value,int):
# return '\"{}\": {}'.format(key, value)
#else:
# return '\"{}\": \"{}\"'.format(key, value)
def main():
ec2 = boto3.resource('ec2', region_name="us-west-2")
volumes = ec2.volumes.all()
for vol in volumes:
if vol.state == "available":
vol_state_num_value = 1
else:
vol_state_num_value = 0
if vol.snapshot_id == "":
vol_snapshot_id = "None"
else:
vol_snapshot_id = vol.snapshot_id
output_parts = [
# Volume level details
generate('vol_id', vol.volume_id),
generate('az', vol.availability_zone),
generate('vol_type', vol.volume_type),
generate('size', vol.size),
generate('iops', vol.iops),
generate('snapshot_id', vol_snapshot_id),
generate('vol_state', vol_state_num_value),
]
# only process when there are tags to process
if vol.tags:
for _ in vol.tags:
# Get all of the tags
output_parts.extend([
generate(_.get('Key'), _.get('Value')),
])
# At last put volume state numberic value
# i.e. 0 (in-use) and 1 (available/unattached) volume
output_parts.extend([
generate('state', vol_state_num_value),
])
# output everything at once.
print ','.join(output_parts)
#print '{}{}{}'.format('{',output_parts,'}')
if __name__ == '__main__':
main()
Currently the output it generates looks like this:
vol_id=vol-0abcdab1b68111f8b,az=us-west-2b,vol_type=gp2,size=5,iops=100,snapshot_id=snap-0abcdab1b68111f8b,vol_state=0,mirror=primary,autoscale=true,cluster=customer,Name=[customer-2b-app41] primary,role=app,hostname=customer-2b-app41-i-0abcdab1b68111f8b,state=0
vol_id=vol-0abcdab1b68111f8c,az=us-west-2b,vol_type=gp2,size=12,iops=100,snapshot_id=snap-0abcdab1b68111f9c,vol_state=0,state=0
I'm trying to convert the script so that instead of generating a key=value pair CSV row per line, it'll generate a JSON object.
I tried to tweak the script by using IF statement as shown in the script i.e. isinstance() for the value part (to wrap the value with double quote or ignore it if it's number and using the following line:
#if isinstance(value,int):
# return '\"{}\": {}'.format(key, value)
#else:
# return '\"{}\": \"{}\"'.format(key, value)
and
#print '{}{}{}'.format('{',output_parts,'}')
But, that's not giving me the desired result.
Expecting the desired result as something like:
{
{
"vol_id": "vol-0abcdab1b68111f8b",
"az": "us-west-2b",
"vol_type": "gp2",
"size": 5,
"iops": 100,
"snapshot_id":"snap-0abcdab1b68111f8b",
...,
.....,
},
{
"vol_id": "vol-0abcdab1b68111f8c",
"az": "us-west-2b",
"vol_type": "gp2",
"size": 12,
"iops": 100,
"snapshot_id": "snap-0abcdab1b68111f9c",
...,
.....
}
}
I tried to use json.dumps(output_parts) but that didn't help to get the desired output. I don't need the CSV output.
To convert your csv lines into a dict can be done with:
Code:
def my_csv_to_dict(csv_line):
return dict(csv.split('=') for csv in csv_line.split(','))
Test Code:
To convert those dictionaries into json you can use the json lib.
test_data = [x.strip() for x in """
vol_id=vol-0abcdab1b68111f8b,az=us-west-2b,vol_type=gp2,size=5,iops=100,snapshot_id=snap-0abcdab1b68111f8b,vol_state=0,mirror=primary,autoscale=true,cluster=customer,Name=[customer-2b-app41] primary,role=app,hostname=customer-2b-app41-i-0abcdab1b68111f8b,state=0
vol_id=vol-0abcdab1b68111f8c,az=us-west-2b,vol_type=gp2,size=12,iops=100,snapshot_id=snap-0abcdab1b68111f9c,vol_state=0,state=0
""".split('\n')[1:-1]]
import json
print(json.dumps([my_csv_to_dict(x) for x in test_data], indent=2))
Results:
[
{
"az": "us-west-2b",
"autoscale": "true",
"Name": "[customer-2b-app41] primary",
"mirror": "primary",
"cluster": "customer",
"state": "0",
"iops": "100",
"role": "app",
"vol_type": "gp2",
"snapshot_id": "snap-0abcdab1b68111f8b",
"vol_id": "vol-0abcdab1b68111f8b",
"vol_state": "0",
"hostname": "customer-2b-app41-i-0abcdab1b68111f8b",
"size": "5"
},
{
"az": "us-west-2b",
"state": "0",
"iops": "100",
"vol_type": "gp2",
"snapshot_id": "snap-0abcdab1b68111f9c",
"vol_id": "vol-0abcdab1b68111f8c",
"vol_state": "0",
"size": "12"
}
]
Related
I need to sort and create a new array based on the value of the JSON. I need to filter repositories under each team and store repositories into a different array.
Input array:
{
"repo_list": [
{
"repo_name": "MaticCorporation/Sample-Repo-1",
"team_name": "AFIN",
"tlt_member": "Sample-TLT-Member-1",
"matix.properties": "Valid"
},
{
"repo_name": "MaticCorporation/Sample-Repo-2",
"team_name": "AFIN",
"tlt_member": "Sample-TLT-Member-1",
"matix.properties": "Valid"
},
{
"repo_name": "MaticCorporation/Sample-Repo-3",
"team_name": "-",
"tlt_member": "Sample-TLT-Member-2",
"matix.properties": "Invalid"
},
{
"repo_name": "MaticCorporation/Sample-Repo-4",
"team_name": "RETIX",
"tlt_member": "-",
"matix.properties": "Invalid"
},
{
"repo_name": "MaticCorporation/Sample-Repo-5",
"team_name": "-",
"tlt_member": "-",
"matix.properties": "No"
}
]
}
Output:
{
"repo_by_team": [
{
"team": "AFIN",
"repo_count": 2,
"repo_list": [
"MaticCorporation/Sample-Repo-1",
"MaticCorporation/Sample-Repo-2"
]
},
{
"team": "RETIX",
"repo_count": 1,
"repo_list": [
"MaticCorporation/Sample-Repo-4"
]
}
]
}
I've implemented the solution to filter and store all team names into an array, but I'm having difficulty how to get the result like output array.
Here is my code for extracting team names:
def get_team_names(repo_list):
repos=valid_repos(repo_list)
team_name=[item.get('team') for item in repos]
return team_name
You can use a dict[str, list[str]] to map between a team and its repositories, and you can use the json module to transform data between Python dictionaries and a JSON representation.
import json
with open('input.json') as input_file, open('output.json', 'w') as output_file:
repo_data = json.load(input_file)['repo_list']
team_repos = {}
for repo in repo_data:
if repo['team_name'] != '-':
if repo['team_name'] not in team_repos:
team_repos[repo['team_name']] = []
team_repos[repo['team_name']].append(repo['repo_name'])
result = []
for team, repo_list in team_repos.items():
result.append({
"team": team,
"repo_count": len(repo_list),
"repo_list": repo_list
})
json.dump({'repo_by_team': result}, output_file, indent=4)
The following is functional. The function may perform slowly on large input, but it uses no more than the necessary amount of space. It does, however, accept and return a Python dictionary. To convert to and from a dictionary use the Python json module.
def sort_by_team(repo_list: dict) -> dict:
ans = {"repo_by_team": []}
for repo in repo_list:
if repo["team_name"] != "-" and repo["team_name"] not in [r["team"] for r in ans["repo_by_team"]]:
ans["repo_by_team"].append({"team": repo["team_name"], "repo_count": 1, "repo_list": [repo["repo_name"]]})
else:
for r in ans["repo_by_team"]:
if r["team"] != repo["team_name"]:
continue
r["repo_count"] += 1
r["repo_list"].append(repo["repo_name"])
break
return ans
I have json file which has a list of ids and date. How to write a python program to print all the ids for a particular month from the json file
Below is the sample json data
{
"entities": [
{
"Fields": [
{
"Name": "version",
"values": [
{
"value": "Cycle 1"
}
]
},
{
"Name": "subject",
"values": [
{
"value": "1008"
}
]
},
{
"Name": "project",
"values": [
{}
]
},
{
"Name": "linkage",
"values": [
{
"value": "N"
}
]
},
{
"Name": "cycle-id",
"values": []
},
{
"Name": "creation-time",
"values": [
{
"value": "2016-07-12"
}
]
},
{
"Name": "id",
"values": [
{
"value": "1"
}
]
}]}]}
I have just tried to load the json file from below code.
import json
f = open('defects-export-0-100.json')
data = json.load(f)
print(data)
# month = str("MM")
month = '09'
defect_items = []
defectIDs = []
for item in data["entities"]:
for container in item["Fields"]:
if container["Name"] == "creation-time":
if container["values"][0]["value"].split("-")[1] == month:
defect_items.append(item)
for item in defect_items:
for container in item["Fields"]:
if container["Name"] == "id":
defectIDs.append(container["values"][0]["value"])
My desired output: All the IDs from the one particular month of creation date.
The biggest issue is how you're referencing keys in a dictionary. You can get the value at a particular key with:
x = {"key": value}
x["key"]
# value
I've made some assumptions about your data set, but this code works with the sample you gave.
import json
with open("data.txt", "r") as f:
data = json.load(f)
#month = str("MM")
month = "07"
defect_items = []
defectIDs = []
# Loop through each entity
for item in data["entities"]:
# Loop through each field
for container in item["Fields"]:
# Find the field with the name "creation-item"
if container["Name"] == "creation-time":
# Check if the value matches with the desired date
# Assuming there can only be one value
if container["values"][0]["value"].split("-")[1] == month:
defect_items.append(item)
# Loop through the defective items
for item in defect_items:
# Loop through the fields
for container in item["Fields"]:
# Find the field with the name "id"
if container["Name"] == "id":
# Grab the value
# Assuming there can only be one value
defectIDs.append(container["values"][0]["value"])
Once the data is loaded, you can interact with it as you would any Python object. Get all the items with:
items = data['entities']
For the code below to work, create a variable month and set it to a string with the format MM (where M is a digit of the month: e.g. month='01' for January) so it exactly matches the correct month format of the data.
Then, run the following loop to collect the IDs:
ids = []
for item in items.keys():
id = None
time = False
for container in items[item].keys():
if items[item][container]['Name'] == 'creation-time':
if items[item][container]['values']['value'].split('-')[1] == month:
time = True
if items[item][container]['Name'] == 'id':
id = items[item][container]['values']['value']
if time and id: ids.append(id)
I have functions witch generate data witch I add t dict the think is there I want my json file to look like this 1.to have multiple data not only one key value pair like in my code:
{"data":[
{"key":"Shyam", "value":10.4},
{"key":"Bob", "value":12.5},
{"key":"Jai", "value":24.2}
]}
This is how is look like the moment only one key value pair is added:
{
"key": "Amadm",
"value": 14.5
}
This is my code to assign to dict before json dumps.
:
def gen_dict(key, value, ts):
data = {
"name": key,
"value": value,
"ts": ts
}
return data
json_object = json.dumps(gen_dict(gen_key(), gen_value()), indent = 4)
So my question is how to assign more than one key value pair in the dict and later to transform to json obj like in the example I show in the example.
You are creating a single dictionary, what you want is a list of dictionaries:
Assuming that you each call to gen_key() and gen_value() generates a single instance of the data, you can use:
# Some random key
def gen_key():
return ''.join((random.choice(string.ascii_lowercase) for x in range(5)))
# Some random value
def gen_value():
return random.choice(range(1000))
s = json.dumps({"data": [ {
"name": gen_key(), "value": gen_value()} for i in range(3)] }, indent = 4)
output:
{
"data": [
{
"name": "rrqct",
"value": 162
},
{
"name": "vbuyq",
"value": 422
},
{
"name": "kfyqt",
"value": 7
}
]
}
I have the following data in my JSON file:
{
"first": {
"name": "James",
"age": 30
},
"second": {
"name": "Max",
"age": 30
},
"third": {
"name": "Norah",
"age": 30
},
"fourth": {
"name": "Sam",
"age": 30
}
}
I want to print the top-level key and object as follows:
import json
import ijson
fname = "data.json"
with open(fname) as f:
raw_data = f.read()
data = json.loads(raw_data)
for k in data.keys():
print k, data[k]
OUTPUT:
second {u'age': 30, u'name': u'Max'}
fourth {u'age': 30, u'name': u'Sam'}
third {u'age': 30, u'name': u'Norah'}
first {u'age': 30, u'name': u'James'}
So, far so good. However if I want to this same thing for a huge file, I would have to read it all in-memory. This very slow and requires lots of memory.
I want use an incremental JSON parser ( ijson in this case ) to achieve what I described earlier:
The above code was taken from: No access to top level elements with ijson?
with open(fname) as f:
json_obj = ijson.items(f,'').next() # '' loads everything as only one object.
for (key, value) in json_obj.items():
print key + " -> " + str(value)
This is not suitable either, because it also reads the whole file in memory. This not truly incremental.
How can I do incremental parsing of top-level keys and corresponding objects, of a JSON file in Python?
Since essentially json files are text files, consider stripping the top level as string. Basically, use a read file iterable approach where you concatenate a string with each line and then break out of the loop once the string contains the double braces }} signaling the end of the top level. Of course the double brace condition must strip out spaces and line breaks.
toplevelstring = ''
with open('data.json') as f:
for line in f:
if not '}}' in toplevelstring.replace('\n', '').replace('\s+',''):
toplevelstring = toplevelstring + line
else:
break
data = json.loads(toplevelstring)
Now if your larger json is wrapped in square brackets or other braces, still run above routine but add the below line to slice out first character, [, and last two characters for comma and line break after top level's final brace:
[{
"first": {
"name": "James",
"age": 30
},
"second": {
"name": "Max",
"age": 30
},
"third": {
"name": "Norah",
"age": 30
},
"fourth": {
"name": "Sam",
"age": 30
}
},
{
"data1": {
"id": "AAA",
"type": 55
},
"data2": {
"id": "BBB",
"type": 1601
},
"data3": {
"id": "CCC",
"type": 817
}
}]
...
toplevelstring = toplevelstring[1:-2]
data = json.loads(toplevelstring)
Since version 2.6 ijson comes with a kvitems function that achieves exactly this.
Answer from github issue [file name changed]
import ijson
from ijson.common import ObjectBuilder
def objects(file):
key = '-'
for prefix, event, value in ijson.parse(file):
if prefix == '' and event == 'map_key': # found new object at the root
key = value # mark the key value
builder = ObjectBuilder()
elif prefix.startswith(key): # while at this key, build the object
builder.event(event, value)
if event == 'end_map': # found the end of an object at the current key, yield
yield key, builder.value
for key, value in objects(open('data.json', 'rb')):
print(key, value)
I'm in over my head, trying to parse JSON for my first time and dealing with a multi dimensional array.
{
"secret": "[Hidden]",
"minutes": 20,
"link": "http:\/\/www.1.com",
"bookmark_collection": {
"free_link": {
"name": "#free_link#",
"bookmarks": [
{
"name": "1",
"link": "http:\/\/www.1.com"
},
{
"name": "2",
"link": "http:\/\/2.dk"
},
{
"name": "3",
"link": "http:\/\/www.3.in"
}
]
},
"boarding_pass": {
"name": "Boarding Pass",
"bookmarks": [
{
"name": "1",
"link": "http:\/\/www.1.com\/"
},
{
"name": "2",
"link": "http:\/\/www.2.com\/"
},
{
"name": "3",
"link": "http:\/\/www.3.hk"
}
]
},
"sublinks": {
"name": "sublinks",
"link": [
"http:\/\/www.1.com",
"http:\/\/www.2.com",
"http:\/\/www.3.com"
]
}
}
}
This is divided into 3 parts, the static data on my first dimension (secret, minutes, link) Which i need to get as seperate strings.
Then I need a dictionary per "bookmark collection" which does not have fixed names, so I need the name of them and the links/names of each bookmark.
Then there is the seperate sublinks which is always the same, where I need all the links in a seperate dictionary.
I'm reading about parsing JSON but most of the stuff I find is a simple array put into 1 dictionary.
Does anyone have any good techniques to do this ?
After you parse the JSON, you will end up with a Python dict. So, suppose the above JSON is in a string named input_data:
import json
# This converts from JSON to a python dict
parsed_input = json.loads(input_data)
# Now, all of your static variables are referenceable as keys:
secret = parsed_input['secret']
minutes = parsed_input['minutes']
link = parsed_input['link']
# Plus, you can get your bookmark collection as:
bookmark_collection = parsed_input['bookmark_collection']
# Print a list of names of the bookmark collections...
print bookmark_collection.keys() # Note this contains sublinks, so remove it if needed
# Get the name of the Boarding Pass bookmark:
print bookmark_collection['boarding_pass']['name']
# Print out a list of all bookmark links as:
# Boarding Pass
# * 1: http://www.1.com/
# * 2: http://www.2.com/
# ...
for bookmark_definition in bookmark_collection.values():
# Skip sublinks...
if bookmark_definition['name'] == 'sublinks':
continue
print bookmark_definition['name']
for bookmark in bookmark_definition['bookmarks']:
print " * %(name)s: %(link)s" % bookmark
# Get the sublink definition:
sublinks = parsed_input['bookmark_collection']['sublinks']
# .. and print them
print sublinks['name']
for link in sublinks['link']:
print ' *', link
Hmm, doesn't json.loads do the trick?
For example, if your data is in a file,
import json
text = open('/tmp/mydata.json').read()
d = json.loads(text)
# first level fields
print d['minutes'] # or 'secret' or 'link'
# the names of each of bookmark_collections's items
print d['bookmark_collection'].keys()
# the sublinks section, as a dict
print d['bookmark_collection']['sublinks']
The output of this code (given your sample input above) is:
20
[u'sublinks', u'free_link', u'boarding_pass']
{u'link': [u'http://www.1.com', u'http://www.2.com', u'http://www.3.com'], u'name': u'sublinks'}
Which, I think, gets you what you need?