Create json with a query on mongodb by python - python

create api with python and use this code
def findusers():
x = mycollection_users.find()
return x
#app.route('/api/users', methods=['GET'])
def users():
db = findusers()
js = jsonpickle.encode(db)
return Response(response=js, status=200, mimetype="application/json")
app.run(host="0.0.0.0", port=4000)
then json is:
{
"py/iterator": [
{
"City": "Us",
"Phone": "02",
"_id": 1,
"name": "Tom"
},
{
"City": "EN",
"Phone": "11",
"_id": 2,
"name": "Jack"
},
],
"py/object": "pymongo.cursor.Cursor"
}
How to delete "py/iterator": and "py/object": pymongo.cursor.Cursor ?
i want this format: [{},{}]

How does this look? If you don't want those two keys in the dictionary, you can simply access the portion you do care about.
>>> a = {
... "py/iterator": [
... {
... "City": "Us",
... "Phone": "02",
... "_id": 1,
... "name": "Tom"
... },
... {
... "City": "EN",
... "Phone": "11",
... "_id": 2,
... "name": "Jack"
... },
... ],
... "py/object": "pymongo.cursor.Cursor"
... }
...
>>> b = a['py/iterator']
>>> print(b)
[{'City': 'Us', '_id': 1, 'name': 'Tom', 'Phone': '02'}, {'City': 'EN', '_id': 2, 'name': 'Jack', 'Phone': '11'}]

Related

How to group a json by a nested key using Python?

Lets say we have a json object in Python:
myJson = [
{
"id": "123",
"name": "alex",
"meta": {
"city": "boston"
}
},
{
"id": "234",
"name": "mike",
"meta": {
"city": "seattle"
}
},
{
"id": "345",
"name": "jess",
"meta": {
"city": "boston"
}
}
]
What is the most efficient way to group this data by city, so that we end up with a json in which we group the data by city such that we end up with a json as:
myNewJson = [
{
"city": "boston",
"people": [ ... ... ]
},
{
"city": "seattle",
"people": [ ... ]
}
]
... in which the content of the people are included in "people" key.
Thanks!
Try:
myJson = [
{"id": "123", "name": "alex", "meta": {"city": "boston"}},
{"id": "234", "name": "mike", "meta": {"city": "seattle"}},
{"id": "345", "name": "jess", "meta": {"city": "boston"}},
]
out = {}
for d in myJson:
out.setdefault(d["meta"]["city"], []).append(d["name"])
out = [{"city": k, "people": v} for k, v in out.items()]
print(out)
Prints:
[
{"city": "boston", "people": ["alex", "jess"]},
{"city": "seattle", "people": ["mike"]},
]
Seems like a dictionary could work. Use city names as the keys, and a list as the value. Then at the end, go through the dictionary and convert it to a list.
myJson = [
{
"id": "123",
"name": "alex",
"meta": {
"city": "boston"
}
},
{
"id": "234",
"name": "mike",
"meta": {
"city": "seattle"
}
},
{
"id": "345",
"name": "jess",
"meta": {
"city": "boston"
}
}
]
d = dict() # dictionary of {city: list of people}
for e in myJson:
city = e['meta']['city']
if city not in d:
d[city] = list()
d[city].append(e['name'])
# convert dictionary to list of json
result = list()
for key, val in d.items():
result.append({'city': key, 'people': val})
print(result)

Merge Json with same key value pairs

I got a resultant json from an API in the following format
[{
"Uid": "40cc6103-1cf0-4735-b882-d14d32018e58",
"Id": "9e1a0057-4570-4a6e-8ff5-88b2facbaf4e",
"Details": {
"Name": "Kiran"
}
}, {
"Uid": "40cc6103-1cf0-4735-b882-d14d32018e58",
"Id": "9e1a0057-4570-4a6e-8ff5-88b2facbaf4e",
"Details": {
"Age": "24"
}
},
{
"Uid": "196f5865-e9fe-4847-86ae-97d0bf57b816",
"Id": "84909ecb-c92e-48a7-bcaa-d478bf3a9220",
"Details": {
"Name": "Shreyas"
}
}
]
since the Uid and Id are same for multiple entires, can I club them togeather with Details key being the comma seperate key,value pair? Something like mentioned below
[{
"Uid": "40cc6103-1cf0-4735-b882-d14d32018e58",
"Id": "9e1a0057-4570-4a6e-8ff5-88b2facbaf4e",
"Details": {
"Name": "Kiran",
"Age": "24"
}
},
{
"Uid": "196f5865-e9fe-4847-86ae-97d0bf57b816",
"Id": "84909ecb-c92e-48a7-bcaa-d478bf3a9220",
"Details": {
"Name": "Shreyas"
}
}]
Please Guide me on this for the approach to be followed. Thanks
What you need is the dictionary function update(). Here's an example:
A = [{
"Uid": "40cc6103-1cf0-4735-b882-d14d32018e58",
"Id": "9e1a0057-4570-4a6e-8ff5-88b2facbaf4e",
"Details": {
"Name": "Kiran"
}
}, {
"Uid": "40cc6103-1cf0-4735-b882-d14d32018e58",
"Id": "9e1a0057-4570-4a6e-8ff5-88b2facbaf4e",
"Details": {
"Age": "24"
}
},
{
"Uid": "196f5865-e9fe-4847-86ae-97d0bf57b816",
"Id": "84909ecb-c92e-48a7-bcaa-d478bf3a9220",
"Details": {
"Name": "Shreyas"
}
}
]
B = []
def find(uid, id_):
for i, d in enumerate(B):
if d['Uid'] == uid and d['Id'] == id_:
return i
return -1
for d in A:
if (i := find(d['Uid'], d['Id'])) < 0:
B.append(d)
else:
B[i]['Details'].update(d['Details'])
print(B)
Prettyfied output:
[
{
"Uid": "40cc6103-1cf0-4735-b882-d14d32018e58",
"Id": "9e1a0057-4570-4a6e-8ff5-88b2facbaf4e",
"Details": {
"Name": "Kiran",
"Age": "24"
}
},
{
"Uid": "196f5865-e9fe-4847-86ae-97d0bf57b816",
"Id": "84909ecb-c92e-48a7-bcaa-d478bf3a9220",
"Details": {
"Name": "Shreyas"
}
}
]
Note:
This could be very inefficient if your API response contains very large numbers of dictionaries. You might need a completely different approach
You should iterate over the list and merge with accumulator with (Uid, Id) as key:
from typing import Dict, List
l = [{
"Uid": "40cc6103-1cf0-4735-b882-d14d32018e58",
"Id": "9e1a0057-4570-4a6e-8ff5-88b2facbaf4e",
"Details": {
"Name": "Kiran"
}
}, {
"Uid": "40cc6103-1cf0-4735-b882-d14d32018e58",
"Id": "9e1a0057-4570-4a6e-8ff5-88b2facbaf4e",
"Details": {
"Age": "24"
}
},
{
"Uid": "196f5865-e9fe-4847-86ae-97d0bf57b816",
"Id": "84909ecb-c92e-48a7-bcaa-d478bf3a9220",
"Details": {
"Name": "Shreyas"
}
}
]
def mergeItem(it: Dict, acc: Dict) -> Dict:
uid = it["Uid"]
id = it["Id"]
if (uid, id) in acc:
acc[(uid, id)] = {"Uid": uid, "Id": id, "Details": {**acc[(uid, id)]["Details"], **it["Details"]}}
else:
acc[(uid, id)] = {"Uid": uid, "Id": id, "Details": it["Details"]}
return acc
def mergeList(a:List) -> Dict:
acc = {}
for v in a:
acc = mergeItem(v, acc)
return acc
print(list(mergeList(l).values()))
# [
# {
# 'Uid': '40cc6103-1cf0-4735-b882-d14d32018e58',
# 'Id': '9e1a0057-4570-4a6e-8ff5-88b2facbaf4e',
# 'Details': {'Name': 'Kiran', 'Age': '24'}},
# {
# 'Uid': '196f5865-e9fe-4847-86ae-97d0bf57b816',
# 'Id': '84909ecb-c92e-48a7-bcaa-d478bf3a9220',
# 'Details': {'Name': 'Shreyas'}
# }
# ]

Is there more effective way to get result (O(n+m) rather than O(n*m))?

Origin data as below show, every item has a type mark, such as interests, family, behaviors, etc and I want to group by this type field.
return_data = [
{
"id": "112",
"name": "name_112",
"type": "interests",
},
{
"id": "113",
"name": "name_113",
"type": "interests",
},
{
"id": "114",
"name": "name_114",
"type": "interests",
},
{
"id": "115",
"name": "name_115",
"type": "behaviors",
},
{
"id": "116",
"name": "name_116",
"type": "family",
},
{
"id": "117",
"name": "name_117",
"type": "interests",
},
...
]
And expected ouput data format like:
output_data = [
{"interests":[
{
"id": "112",
"name": "name_112"
},
{
"id": "113",
"name": "name_113"
},
...
]
},
{
"behaviors": [
{
"id": "115",
"name": "name_115"
},
...
]
},
{
"family": [
{
"id": "116",
"name": "name_116"
},
...
]
},
...
]
And here is my trial:
type_list = []
for item in return_data:
if item['type'] not in type_list:
type_list.append(item['type'])
interests_list = []
for type in type_list:
temp_list = []
for item in return_data:
if item['type'] == type:
temp_list.append({"id": item['id'], "name": item['name']})
interests_list.append({type: temp_list})
Obviously my trial is low efficient as it is O(n*m), but I cannot find the more effective way to solve the problem.
Is there more effective way to get the result? any commentary is great welcome, thanks.
Use a defaultdict to store a list of items for each type:
from collections import defaultdict
# group by type
temp_dict = defaultdict(list)
for item in return_data:
temp_dict[item["type"]].append({"id": item["id"], "name": item["name"]})
# convert back into a list with the desired format
output_data = [{k: v} for k, v in temp_dict.items()]
Output:
[
{
'behaviors': [
{'name': 'name_115', 'id': '115'}
]
},
{
'family': [
{'name': 'name_116', 'id': '116'}
]
},
{
'interests': [
{'name': 'name_112', 'id': '112'},
{'name': 'name_113', 'id': '113'},
{'name': 'name_114', 'id': '114'},
{'name': 'name_117', 'id': '117'}
]
},
...
]
If you don't want to import defaultdict, you could use a vanilla dictionary with setdefault:
# temp_dict = {}
temp_dict.setdefault(item["type"], []).append(...)
Behaves in exactly the same way, if a little less efficient.
please see Python dictionary for map.
for item in return_data:
typeMap[item['type']] = typeMap[item['type']] + delimiter + item['name']

Json format for python

I'm rewriting a view based on what I know the final output should be in json but it's returning the dictionary as a string.
new output
{
"results":
["
{
'plot': u'',
'runtime': u'N/A',
'description': u'x',
'videos': [
{
'id': 823,
'name': u'x',
'youtube_id': u'FtcubOnXgZk'
}
],
'country': u'India',
'writer': u'Neetu Varma, Ranjeev Verma',
'name': u'Chalk N Duster',
'id': 940,
'director': u'Jayant Gilatar',
'hot': True,
'content': u'x',
'actors': u'Shabana Azmi, Arya Babbar, Gavie Chahal, Juhi Chawla',
'year': 2015,
'images': [
{'small': '/media/cache/62/fd/62fd5158d281c042e3cf1f919183e94e.jpg', 'medium': '/media/cache/5e/32/5e32ebb1a4d25bba0d0c70b4b448e948.jpg'}],
'trailer_youtube_id': u'FtcubOnXgZk',
'type': 'movie',
'slug': u'chalk-n-duster',
'categories': [{'parent_id': 2, 'id': 226, 'name': u'Drama'}],
'shows': {
'starts': '2016-01-16',
'booking_url': u'',
'venue': {
'address': u'',
'id': 854,
'name': u'Nyali Cinemax',
'area': {
'id': 52,
'parent': {
'id': 48,
'name': u'Mombasa'
},
'name': u'Nyali'
}
},
'starts_time': '18:30:00'
}
}", "{'plot': u'' ....
old output
"results": [
{
"actors": "x",
"categories": [
{
"id": 299,
"name": "Biography",
"parent_id": 2
},
],
"content": "x",
"country": "x",
"description": "x",
"director": "x",
"hot": true,
"id": 912,
"images": [
{
"medium": "/media/cache/d2/b3/d2b3a7885e7c39bfc5c2b297b66619c5.jpg",
"small": "/media/cache/e2/d0/e2d01b2c7c77d3590536666de4a7fd7d.jpg"
}
],
"name": "Bridge of Spies",
"plot": "x",
"runtime": "141 min",
"shows": [
{
"booking_url": "",
"starts": "2015-11-27",
"starts_time": "16:30:00",
"venue": {
"address": "The Junction Shopping Mall",
"area": {
"id": 68,
"name": "Ngong Road",
"parent": {
"id": 2,
"name": "Nairobi"
}
},
"id": 1631,
"name": "Century Cinemax Junction"
}
},
],
"slug": "bridge-of-spies",
"trailer_youtube_id": "",
"type": "movie",
"videos": [
{
"id": "795",
"name": "Bridge of Spies",
"youtube_id": "2-2x3r1m2I4"
}
],
"writer": "Matt Charman, Ethan Coen, Joel Coen",
"year": 2015
}, ...
]
Here's the view, I know the shows should also be a list, but in order to start testing I'll need the data to come in the right format. If it's involves too much rewriting I'm okay with links and explanation.
#memoize(timeout=60*60)
def movies_json():
today = datetime.date.today()
movies = Movie.objects.filter(shows__starts__gte=today)
results = []
number = len(movies)
for movie in movies:
print "Now Remaining: {0}".format(number)
number -= 1
medium = get_thumbnail(movie.picture(), '185x274', crop='center', quality=99).url
small = get_thumbnail(movie.picture(), '50x74', crop='center', quality=99).url
movie_details = {
'director':movie.director,
'plot':movie.plot,
'actors':movie.actors,
'content':movie.content,
'country':movie.country,
'description':movie.description,
'hot':movie.hot,
'id':movie.id,
'images':[{'medium':medium, 'small':small}],
'name':movie.name,
'plot':movie.plot,
'runtime':movie.runtime,
'slug':movie.slug,
'type':'movie',
'writer':movie.writer,
'year':movie.year,
}
youtube_details = movie.videos.filter(youtube_id__isnull=False)[0]
movie_details['trailer_youtube_id'] = youtube_details.youtube_id if youtube_details.youtube_id else ""
movie_details['videos'] = [
{
'id':youtube_details.id,
'name':movie.name,
'youtube_id':youtube_details.youtube_id,
}
]
shows = []
for show in movie.shows.all():
show_details = {
'booking_url':show.booking_url,
'starts':show.starts.isoformat(),
'starts_time':show.starts_time.isoformat(),
'venue': {
'address':show.venue.address,
'area': {
'id': show.venue.area.id,
'name': show.venue.area.name,
'parent': {
'id': show.venue.area.parent.id,
'name': show.venue.area.parent.name,
}
},
'id': show.venue.id,
'name': show.venue.name,
}
}
shows.append(show_details)
movie_details['shows'] = show_details
category_list = []
for category in movie.categories.all():
category_details = {
'id':category.id,
'name':category.name,
'parent_id':category.parent.id,
}
category_list.append(category_details)
movie_details['categories'] = category_list
results.append(movie_details)
return results
The data is returned by django rest framework 0.4.0
import json
json_obj = json.load(json_string)

Scrapy data selection

I would like to scrap data from this plain text :
"data": [
{
"id": "10150635906994798_21377910",
"from": {
"id": "100001249878256",
"location" : "Stockholm"
"name": "Mouhamadoul Moussa"
},
"message": "#Yeaaaahh!!! \u2665",
},
{
"id": "10150635906994798_21392047",
"from": {
"id": "100000648164454",
"location" : "Malmo"
"name": "mallow ty"
},
"message": "droit au butttttttttttttttttt",
},
]
but I would like to retrieve only second id, xpath for id selection
response.selector.xpath ('//*[contains(text(), "id")]')
Output should be :
100000648164454
100001249878256
That's not a plain text ! that's a json. However, you can store it as a dictionary:
>>> a = {'data': [{'from': {'id': '100001249878256',
... 'location': 'Stockholm',
... 'name': 'Mouhamadoul Moussa'},
... 'id': '10150635906994798_21377910',
... 'message': '#Yeaaaahh!!! \\u2665'},
... {'from': {'id': '100000648164454', 'location': 'Malmo', 'name': 'mallow ty'},
... 'id': '10150635906994798_21392047',
... 'message': 'droit au butttttttttttttttttt'}]}
>>> for data in a['data']:
... print data['from']['id']
...
100001249878256
100000648164454

Categories