Looping throuh json - python

I'am trying to add json data to the lists.
Json looks like this:
[{'genus': 'Musa', 'name': 'Banana', 'id': 1, 'family': 'Musaceae', 'order': 'Zingiberales', 'nutritions': {'carbohydrates': 22, 'protein': 1, 'fat': 0.2, 'calories': 96, 'sugar': 17.2}}]
But with my function, i can append to lists only those objects:
genus': 'Musa', 'name': 'Banana', 'id': 1, 'family': 'Musaceae', 'order': 'Zingiberales'
Can't get anything from 'nutritions'.
Adding code:
import requests
import json
name = []
id = []
family = []
genus = []
order = []
carbohydrates = []
protein = []
fat = []
calories = []
sugar = []
def scrape_all_fruits():
data_list = []
try:
for ID in range(1, 10):
url = f'https://www.fruityvice.com/api/fruit/{ID}'
response = requests.get(url)
data = response.json()
data_list.append(data)
except:
pass
return data_list
def listify(fruit_stats):
alist = json.dumps(scrape_all_fruits())
jsonSTr = json.loads(alist)
for i in jsonSTr:
try:
name.append(i['name'])
id.append(i['id'])
family.append(i['family'])
genus.append(i['genus'])
order.append(i['order'])
carbohydrates.append(i['carbohydrates'])
protein.append(i['protein'])
# fat.append(i['fat'])
calories.append(i['calories'])
sugar.append(i['sugar'])
for nutrs in i:
fat.append(nutrs.a['fat'])
except:
pass
return fruit_stats
print(listify(fat))
Can anyone explain to me what iam doing wrong ? Thank You in advance.

jsonSTr in your code is a dictionary. By default looping over a dictionary returns its keys.
You can fix this by either looking up via the key you receive:
name.append(jsonSTr[i]["name"])
or by looping over the values:
for i in jsonSTr.values():
if you need both the key and the value you can use the items() method.

IMO, your code is too complicated. If you have a nested dictionary, you can flatten it with a special function (example from here).
from collections.abc import MutableMapping
import pandas as pd
import json
def flatten_dict(d: MutableMapping, sep: str= ".") -> MutableMapping:
[flat_dict] = pd.json_normalize(d, sep=sep).to_dict(orient="records")
return flat_dict
Data = []
test_response = '[{"genus": "Musa", "name": "Banana", "id": 1, "family": "Musaceae", "order": "Zingiberales", "nutritions": {"carbohydrates": 22, "protein": 1, "fat": 0.2, "calories": 96, "sugar": 17.2}}]'
ResponseJSON = json.loads(test_response)
Data.append(flatten_dict(ResponseJSON[0]))
print(json.dumps(Data, indent=4))
Output:
[
{
"genus": "Musa",
"name": "Banana",
"id": 1,
"family": "Musaceae",
"order": "Zingiberales",
"nutritions.carbohydrates": 22,
"nutritions.protein": 1,
"nutritions.fat": 0.2,
"nutritions.calories": 96,
"nutritions.sugar": 17.2
}
]
What to do further, is up to you, but the output structure is good for pandas tables, which can be easily manipulated:
Table = pd.DataFrame(Data)
Hope that helps!

Related

copy json objects and add to main json object, if keys has more than one value

I am having JSON data, where some key has more than one value. I want to make a copy of json body and add it to main json.
For example, in my given.json, number has two values 156 and 158 and respectively. I want to make copy and add of it, please see my expected.json result.
given.json
[{"fields": {"Start": "yes1",
"number": [156, 158],
"time": 1600,
"total": 8}}]
expected.json
[{"fields": {"Start": "yes1",
"number": [156],
"time": 1600,
"total": 8}},
{"fields": {"Start": "yes1",
"number": [158],
"time": 1600,
"total": 8}}]
python
import json
# Opening JSON file
f = open('given.json',)
# returns JSON object as
# a dictionary
main_data = json.load(f)
my_dict = []
main_data[0]["fields"]["number"] = 156
main_data_1 = []
main_data_1.append(main_data)
my_dict.append(main_data_1)
main_data[0]["fields"]["number"] = 158
main_data_2 = []
main_data_2.append(main_data)
my_dict.append(main_data_2)
print(my_dict)
You seem to be confused between dictionaries and lists. json.load() can return either and in this case it will be a list (with only one element in it, which is a dictionary).
In the code below, a second element which is a copy of the first made, and then each element is modified according. The copy is a "deep copy" so the values in the dictionary copies are all independent.
from copy import deepcopy
import json
from pprint import pprint
with open('given.json') as f:
main_data = json.load(f)
numbers = main_data[0]["fields"]["number"]
main_data = [deepcopy(main_data[0]) for _ in numbers]
for i, number in enumerate(numbers):
main_data[i]["fields"]["number"] = [number]
pprint(main_data, sort_dicts=False)
Output:
[{'fields': {'Start': 'yes1', 'number': [156], 'time': 1600, 'total': 8}},
{'fields': {'Start': 'yes1', 'number': [158], 'time': 1600, 'total': 8}}]

How to print out values of dictionary in a list of another dictionary

I'd like to do is render/display isbn and work_ratings_count values on bookdetail.html
res output is:
{'books': [{'id': 29207858, 'isbn': '1632168146', 'isbn13': '9781632168146', 'ratings_count': 0, 'reviews_count': 2, 'text_reviews_count': 0, 'work_ratings_count': 28, 'work_reviews_count': 123, 'work_text_reviews_count': 10, 'average_rating': '4.07'}]}
code:
import requests, json
res= requests.get("https://www.goodreads.com/book/review_counts.json", params={"key": "xxx", "isbns": "9781632168146"})
JS = res.json()
bs = json.loads(JS)
print(bs.get['isbn'])
You might make things easier on yourself if you set up your data differently. Why not use nested dictionaries?
nested_dict = {"book": {"ISBN": 0000, "Num": 0001}, "book2": {"ISBN": 0000, "Num": 0001}}
Then to print you can call the key/value as:
nested_dict["book2"]["ISBN"]
Hope this helped, if I misinterpreted your question let me know.

Use list comprehension to return a new list

I have a dict array that I want to get a specific attribute and create a new dict list based on those attribute.
I'm trying to use list comprehension to parse every row instead of use a traditional loop. Is possible to do it?
from datetime import datetime
from dateutil.parser import parse
def _format_string_to_timestamp(dt, output):
if dt is None or type(dt) == float:
return ""
origin_dt = parse(dt)
return origin_dt.strftime(output)
def extract_tickets_tags_history(audit):
tag_history = []
sync = "1234"
tags = [d for d in audit['events'] if d.get('field_name', '') == 'tags']
if len(tags) > 0:
return [
{
'tag': tag,
'updated': _format_string_to_timestamp(audit['created_at'], "%Y-%m-%d %H:%M:%S"),
'ticket_id': audit['ticket_id'],
'tagged': False,
'user_id': audit['author_id'],
'sync': sync
}
for tag in tags[-1]['value']]
return None
audit = {
'ticket_id': 123,
'author_id': 654,
'created_at': '2019-04-07T01:09:40Z',
'events': [
{
'field_name': 'tags',
'value': ['taga', 'tagb']
}
]
}
example = [
{
'id': 123,
'data': [audit]
}
]
result = [extract_tickets_tags_history(data) for data in x['data'] for x in example]
I'm getting an error NameError: name 'x' is not defined
...
And the result should be something like [{"tag": "...", "updated": "...", ...}]
You swapped the two for loops in your list comprehension
result = [extract_tickets_tags_history(data) for x in example for data in x['data'] ]
which is equivalent to
result = []
for x in example:
for data in x['data']:
result.append(extract_tickets_tags_history(data))

Make List and Dictionaries dynamically in Python. I am trying to create the array in Python

I am new in Python and getting the problem in creating the dynamically list and dictionaries.
I want the result like:
{
'page1':
[{'email':
[{'emailAddress':'abc#gmail.com'}],[{'emailAddress1':'abc#gmail.com1'}],...
}],[{'url':
[{'url':'abc.com','cordinates','10,10,100,45'}],[{'url':'abc.com','cordinates','10,10,100,45'}]...
}]
}
{
'page1':
[{'email':
[{'emailAddress':'abc#gmail.com'}],[{'emailAddress1':'abc#gmail.com1'}],...
}],[{'url':
[{'url':'abc.com','cordinates','10,10,100,45'}],[{'url':'abc.com','cordinates','10,10,100,45'}]...
}]
}
{
'page1':
[{'email':
[{'emailAddress':'abc#gmail.com'}],[{'emailAddress1':'abc#gmail.com1'}],...
}],[{'url':
[{'url':'abc.com','cordinates','10,10,100,45'}],[{'url':'abc.com','cordinates','10,10,100,45'}]...
}]
}.....
I cannot give you the original data for review but I am going to create this type of the data using the code below:
from pprint import pprint
dict1 = {}
dict2 = {}
dict3 = {}
newObj = {'name'}
finalVar = []
finalVars = []
newVal = {}
for i in range(10):
temp = {'page'+str(i)}
for k in range(10):
newtemp = {'page'+str(i):{'email':[{
'emailId':str(i),
'test':str(k)
}]}}
finalVar.append(newtemp)
for i in range(10):
temp = {'page'+str(i)}
for k in range(10):
newtemp = {'page'+str(i):{'email':[{
'url':str(i),
'testing':str(k)
}]}}
finalVars.append(newtemp)
pprint(finalVars)
I am new in Python so, even I don't know that I am creating the valid list and array dictionary. So, please sorry if there is any thing wrong.
From the question I am assuming that a dictionary is needed whose keys are page0, page1 and so on, and value of each key is another dictionary with keys as emails, urls and value of emails is an array of dicts like {'email': '0#mail.com'} and value of urls is an array of dicts like {'coordinates': [10, 10, 100, 45], 'url': '0.com'}. Here is a sample code which produces the following output (as explained above)
{'page0': {'emails': [{'email': '0#mail.com'}, {'email': '1#mail.com'}],
'urls': [{'coordinates': [10, 10, 100, 45], 'url': '0.com'},
{'coordinates': [10, 10, 100, 45], 'url': '1.com'}]},
'page1': {'emails': [{'email': '0#mail.com'}, {'email': '1#mail.com'}],
'urls': [{'coordinates': [10, 10, 100, 45], 'url': '0.com'},
{'coordinates': [10, 10, 100, 45], 'url': '1.com'}]}}
Corresponding python code:
import pprint
result = {}
emails_key = "emails"
urls_key = "urls"
for i in range(2):
page_key = "page" + str(i)
result[page_key] = {emails_key: [], urls_key: []}
for j in range(2):
result[page_key][emails_key].append({'email': str(j) + '#mail.com'})
result[page_key][urls_key].append({'url': str(j) + '.com', 'coordinates': [10,10,100,45]})
pprint.pprint(result)
Hope this helps.
I believe something similar to the following code will generate the type of array you desire.
email_entries = []
for i in range(num_email_entries):
email_entries.append([{'emailAddress' + str(i)*(i>0):'abc#gmail.com'}])
email_container = [{'email':tuple(email_entries)}]
url_entries = []
for i in range(num_url_entries):
url_entries.append([{'url' + str(i)*(i>0):'abc.com',
'coordinates' + str(i)*(i>0):'10,10,100,45'}])
url_container = [{'url':tuple(url_entries)}]
array = {'page': (email_container,url_container)}
The output when num_email_entries = num_url_entries = 2 is
{'page':
([{'email':
([{'emailAddress': 'abc#gmail.com'}],
[{'emailAddress1': 'abc#gmail.com'}])
}],
[{'url':
([{'url': 'abc.com',
'coordinates': '10,10,100,45'}],
[{'url1': 'abc.com',
'coordinates1': '10,10,100,45'}])
}]
)
}

Formatting a string in required format in Python

I have a data in format:
id1 id2 value
Something like
1 234 0.2
1 235 0.1
and so on.
I want to convert it in json format:
{
"nodes": [ {"name":"1"}, #first element
{"name":"234"}, #second element
{"name":"235"} #third element
] ,
"links":[{"source":1,"target":2,"value":0.2},
{"source":1,"target":3,"value":0.1}
]
}
So, from the original data to above format.. the nodes contain all the set of (distinct) names present in the original data and the links are basically the line number of source and target in the values list returned by nodes.
For example:
1 234 0.2
1 is in the first element in the list of values holded by the key "nodes"
234 is the second element in the list of values holded by the key "nodes"
Hence the link dictionary is {"source":1,"target":2,"value":0.2}
How do i do this efficiently in python.. I am sure there should be better way than what I am doing which is so messy :(
Here is what I am doing
from collections import defaultdict
def open_file(filename,output=None):
f = open(filename,"r")
offset = 3429
data_dict = {}
node_list = []
node_dict = {}
link_list = []
num_lines = 0
line_ids = []
for line in f:
line = line.strip()
tokens = line.split()
mod_wid = int(tokens[1]) + offset
if not node_dict.has_key(tokens[0]):
d = {"name": tokens[0],"group":1}
node_list.append(d)
node_dict[tokens[0]] = True
line_ids.append(tokens[0])
if not node_dict.has_key(mod_wid):
d = {"name": str(mod_wid),"group":1}
node_list.append(d)
node_dict[mod_wid] = True
line_ids.append(mod_wid)
link_d = {"source": line_ids.index(tokens[0]),"target":line_ids.index(mod_wid),"value":tokens[2]}
link_list.append(link_d)
if num_lines > 10000:
break
num_lines +=1
data_dict = {"nodes":node_list, "links":link_list}
print "{\n"
for k,v in data_dict.items():
print '"'+k +'"' +":\n [ \n "
for each_v in v:
print each_v ,","
print "\n],"
print "}"
open_file("lda_input.tsv")
I'm assuming by "efficiently" you're talking about programmer efficiency—how easy it is to read, maintain, and code the logic—rather than runtime speed efficiency. If you're worried about the latter, you're probably worried for no reason. (But the code below will probably be faster anyway.)
The key to coming up with a better solution is to think more abstractly. Think about rows in a CSV file, not lines in a text file; create a dict that can be rendered in JSON rather than trying to generate JSON via string processing; wrap things up in functions if you want to do them repeatedly; etc. Something like this:
import csv
import json
import sys
def parse(inpath, namedict):
lastname = [0]
def lookup_name(name):
try:
print('Looking up {} in {}'.format(name, names))
return namedict[name]
except KeyError:
lastname[0] += 1
print('Adding {} as {}'.format(name, lastname[0]))
namedict[name] = lastname[0]
return lastname[0]
with open(inpath) as f:
reader = csv.reader(f, delimiter=' ', skipinitialspace=True)
for id1, id2, value in reader:
yield {'source': lookup_name(id1),
'target': lookup_name(id2),
'value': value}
for inpath in sys.argv[1:]:
names = {}
links = list(parse(inpath, names))
nodes = [{'name': name} for name in names]
outpath = inpath + '.json'
with open(outpath, 'w') as f:
json.dump({'nodes': nodes, 'links': links}, f, indent=4)
Don't construct the JSON manually. Make it out of an existing Python object with the json module:
def parse(data):
nodes = set()
links = set()
for line in data.split('\n'):
fields = line.split()
id1, id2 = map(int, fields[:2])
value = float(fields[2])
nodes.update((id1, id2))
links.add((id1, id2, value))
return {
'nodes': [{
'name': node
} for node in nodes],
'links': [{
'source': link[0],
'target': link[1],
'value': link[2]
} for link in links]
}
Now, you can use json.dumps to get a string:
>>> import json
>>> data = '1 234 0.2\n1 235 0.1'
>>> parsed = parse(data)
>>> parsed
{'links': [{'source': 1, 'target': 235, 'value': 0.1},
{'source': 1, 'target': 234, 'value': 0.2}],
'nodes': [{'name': 1}, {'name': 234}, {'name': 235}]}
>>> json.dumps(parsed)
'{"nodes": [{"name": 1}, {"name": 234}, {"name": 235}], "links": [{"source": 1, "target": 235, "value": 0.1}, {"source": 1, "target": 234, "value": 0.2}]}'

Categories