How to get the particular values from response using python - python

I am trying to get the values from JSON which is nested.
response = {
"Instance":[
{
"id":"id-1",
"Tags":[],
},
{
"id":"id-2",
"Tags":[],
},
{
"id":"id-3",
"Tags":[
{
"Key":"test",
"Value":"test"
}
],
}
]
}
and the python code i tried is
if response["Instance"]:
print("1-->",response["Instance"])
for identifier in response["Instance"]:
print("2-->", identifier)
if identifier["id"]:
print("3-->", identifier["id"])
if identifier["Tags"]: #ERROR Thrown here as 'Tags' in exception
print("4-->", identifier["Tags"])
for tag in identifier["Tags"]:
print("5-->", identifier["Tags"])
if tag['Key'] == 'test' and tag['Value'] == 'test':
print("6--> test present ")
I am trying to parse through all ids and get the tags that contain the test key. But getting error in 3rd loop when tags has some values. Error in exception is just telling 'Tags'
How can I modify the code?

You are testing for the presence of the key "Tags" in the identifier dict as follows:
if identifier["Tags"]:
# iterate over identifier["Tags"]
If "Tags" is not present in the identifier dict, this will result in:
KeyError: 'Tags'
Instead, you should use:
if "Tags" in identifier:
# iterate over identifier["Tags"]

Related

An expression attribute name used in the document path is not defined; attribute name: #buyer

I am using reserve DynamoDB keywords live value,users, name. I have create entry in DynamoDB with
{
"id":1
"poc_name": "ABC"
}
I want to update exiting records with
{
"id": 1,
"poc_name": "ABC",
"buyer": {
"value": "id1",
"label": "Test"
}
}
I am using reserve keyword "value". When I try to update the record I am getting error:
An expression attribute name used in the document path is not defined; attribute name: #buyer
Update is not working because buyer map does not exist in DynamoDB. That's why I am getting document path is not found. I using following code snippet to handle the reserve keyword, It will generate the following updateValues, updateExpression, expression_attributes_names
updateValues: {':poc_name': ABC, ':buyer_value': 'id1', ':buyer_label': 'Test'}
updateExpression: ['set ','poc_name = :poc_name,','#buyer.#value = :buyer_value,', 'buyer.label = :buyer_label,']
expression_attributes_names: {'#demand_poc_action': 'demand_poc_action', '#value': 'value', '#buyer': 'buyer'}
Code snippet:
for key, value in dictData.items():
if key in RESERVER_DDB_KEYWORDS or (
"." in key and key.split(".")[1] in RESERVER_DDB_KEYWORDS
):
key1 = key.replace(".", ".#")
updateExpression.append(f"#{key1} = :{key.replace('.', '_')},")
updateValues[f":{key.replace('.', '_')}"] = value
if "." in key:
expression_attributes_names[f"#{key.split('.')[0]}"] = key.split(".")[0]
expression_attributes_names[f"#{key.split('.')[1]}"] = key.split(".")[1]
else:
expression_attributes_names[f"#{key}"] = key
else:
updateExpression.append(f"{key} = :{key.replace('.', '_')},")
updateValues[f":{key.replace('.', '_')}"] = value
UpdateExpression="".join(updateExpression)[:-1],
ExpressionAttributeValues=updateValues,
ReturnValues="UPDATED_NEW",
ExpressionAttributeNames=expression_attributes_names,
The problem is that if buyer already exist in the DynamoDB then I will be able to update buyer record, however I am not able to update record for buyer which doesn't have buyer In DynamoDB then I am getting document path error. So my approach is that I will create the entry of buyer every time I do the update. However I am not able to fix above code.
This happens because you are trying to update nested properties of top level property buyer, which does not exist yet (OR is not of map type)
So before running this update you have to ensure that the top level attribute buyer already exists.
Or you can just catch the exception if it is thrown, and perform an another update creating the buyer attribute, like shown below:
import boto3
from botocore.exceptions import ClientError
table = boto3.resource('dynamodb', region_name='eu-west-1').Table('test1')
try:
table.update_item(
Key={
'pk': '1'
},
UpdateExpression="SET #buyer.#value = :val1, #buyer.#label = :val2",
ExpressionAttributeNames={
'#label': 'label',
'#value': 'value',
'#buyer': 'buyer',
},
ExpressionAttributeValues={
':val1': 'id1',
':val2': 'Test'
}
)
except ClientError as e:
if e.response['Error']['Code'] == 'ValidationException':
# Creating new top level attribute `buyer` (with nested props)
# if the previous query failed
response = table.update_item(
Key={
'pk': '1'
},
UpdateExpression="set #buyer = :val1",
ExpressionAttributeNames={
'#buyer': 'buyer'
},
ExpressionAttributeValues={
':val1': {
'value': 'id1',
'label': 'Test'
}
}
)
else:
raise
An alternative approach is to simple set buyer to an empty map when creating the item:
{
"id":1,
"poc_name": "ABC",
"buyer": {}
}
This would allow you to update nested attributes as buyer would exist.

Extracting certain value from MongoDB using Python

I have a mongo database including the following collection:
"
"_id": {
"$oid": "12345"
},
"id": "333555",
"token": [
{
"access_token": "ac_33bc",
"expires_in": 3737,
"token_type": "bearer",
"expires_at": {
"$date": "2021-07-02T13:37:28.123Z"
}
}
]
}
In the next python script I'm trying to return and print only the access_token but can't figure out how to do so. I've tried various methods which none of the worked.I've given the "id" as a parameter
def con_mongo():
try:
client = pymongo.MongoClient("mongodb:localhost")
#DB name
db = client["db1"]
#Collection
coll = db["coll1"]
#1st method
x = coll.find({"id":"333555"},{"token":"access_token"})
for data in x:
print(x)
#2nd method
x= coll.find({"id":"333555"})
tok=x.distinct("access_token")
#print(x[0])
for data in tok:
print(data)
except Exception:
logging.info(Exception)
It doesn't work this way, although if I replace (or remove) the "access_token" with simply "token" it works but I get back all the informations included in the field "token" where I only need the value of the "access_token".
Since access_token is an array element, you need to qualify it's name with the name of the array, to properly access its value.
Actually you can first extract the whole document and get the desired value through simple list and dict indexing.
So, assuming you are retrieving many documents with that same id:
x = [doc["token"][0]["access_token"] for doc in coll.find({"id":"333555"})]
The above, comprehensively creates a list with the access_tokens of all the documents matching the given id.
If you just need the first (and maybe only) occurrence of a document with that id, you can use find_one() instead:
x = coll.find_one({"id":"333555"})["token"][0]["access_token"]
# returns ac_33bc
token is a list so you have to reference the list element, e.g.
x = coll.find({"id":"333555"},{"token.access_token"})
for data in x:
print(data.get('token')[0].get('access_token'))
prints:
ac_33bc

How to extract specific data from JSON object using python?

I'm trying to scrape a website and get items list from it using python. I parsed the html using BeaufitulSoup and made a JSON file using json.loads(data). The JSON object looks like this:
{ ".1768j8gv7e8__0":{
"context":{
//some info
},
"pathname":"abc",
"showPhoneLoginDialog":false,
"showLoginDialog":false,
"showForgotPasswordDialog":false,
"isMobileMenuExpanded":false,
"showFbLoginEmailDialog":false,
"showRequestProductDialog":false,
"isContinueWithSite":true,
"hideCoreHeader":false,
"hideVerticalMenu":false,
"sequenceSeed":"web-157215950176521",
"theme":"default",
"offerCount":null
},
".1768j8gv7e8.6.2.0.0__6":{
"categories":[
],
"products":{
"count":12,
"items":[
{
//item info
},
{
//item info
},
{
//item info
}
],
"pageSize":50,
"nextSkip":100,
"hasMore":false
},
"featuredProductsForCategory":{
},
"currentCategory":null,
"currentManufacturer":null,
"type":"Search",
"showProductDetail":false,
"updating":false,
"notFound":false
}
}
I need the items list from product section. How can I extract that?
Just do:
products = jsonObject[list(jsonObject.keys())[1]]["products"]["items"]
import json packagee and map every entry to a list of items if it has any:
This solution is more universal, it will check all items in your json and find all the items without hardcoding the index of an element
import json
data = '{"p1": { "pathname":"abc" }, "p2": { "pathname":"abcd", "products": { "items" : [1,2,3]} }}'
# use json package to convert json string to dictionary
jsonData = json.loads(data)
type(jsonData) # dictionary
# use "list comprehension" to iterate over all the items in json file
# itemData['products']["items"] - select items from data
# if "products" in itemData.keys() - check if given item has products
[itemData['products']["items"] for itemId, itemData in jsonData.items() if "products" in itemData.keys()]
Edit: added comments to code
I'll just call the URL of the JSON file you got from BeautifulSoup "response" and then put in a sample key in the items array, like itemId:
import json
json_obj = json.load(response)
array = []
for i in json_obj['items']:
array[i] = i['itemId']
print(array)

How to use find() nested documents for two levels or more?

Here is my sample mongodb database
database image for one object
The above is a database with an array of articles. I fetched only one object for simplicity purposes.
database image for multiple objects ( max 20 as it's the size limit )
I have about 18k such entries.
I have to extract the description and title tags present inside the (articles and 0) subsections.
The find() method is the question here.. i have tried this :
for i in db.ncollec.find({'status':"ok"}, { 'articles.0.title' : 1 , 'articles.0.description' : 1}):
for j in i:
save.write(j)
After executing the code, the file save has this :
_id
articles
_id
articles
and it goes on and on..
Any help on how to print what i stated above?
My entire code for reference :
import json
import newsapi
from newsapi import NewsApiClient
import pymongo
from pymongo import MongoClient
client = MongoClient()
db = client.dbasenews
ncollec = db.ncollec
newsapi = NewsApiClient(api_key='**********')
source = open('TextsExtractedTemp.txt', 'r')
destination = open('NewsExtracteddict.txt', "w")
for word in source:
if word == '\n':
continue
all_articles = newsapi.get_everything(q=word, language='en', page_size=1)
print(all_articles)
json.dump(all_articles, destination)
destination.write("\n")
try:
ncollec.insert(all_articles)
except:
pass
Okay, so I checked a little to update my rusty memory of pymongo, and here is what I found.
The correct query should be :
db.ncollec.find({ 'status':"ok",
'articles.title' : { '$exists' : 'True' },
'articles.description' : { '$exists' : 'True' } })
Now, if you do this :
query = { 'status' : "ok",
'articles.title' : { '$exists' : 'True' },
'articles.description' : { '$exists' : 'True' } }
for item in db.ncollect.find(query):
print item
And that it doesn't show anything, the query is correct, but you don't have the right database, or the right tree, or whatever.
But I assure you, that with the database you showed me, that if you do...
query = { 'status' : "ok",
'articles.title' : { '$exists' : 'True' },
'articles.description' : { '$exists' : 'True' } }
for item in db.ncollect.find(query):
save.write(item[0]['title'])
save.write(item[0]['description'])
It'll do what you wished to do in the first place.
Now, the key item[0] might not be good, but for this, I can't really be of any help since it is was you are showing on the screen. :)
Okay, now. I have found something for you that is a bit more complicated, but is cool :)
But I'm not sure if it'll work for you. I suspect you're giving us a wrong tree, since when you do .find( {'status' : 'ok'} ), it doesn't return anything, and it should return all the documents with a 'status' : 'ok', and since you have lots...
Anyways, here is the query, that you should use with .aggregate() method, instead of .find() :
elem = { '$match' : { 'status' : 'ok', 'articles.title' : { '$exists' : 'True'}, 'articles.description' : { '$exists' : 'True'}} }
[ elem, { '$unwind' : '$articles' }, elem ]
If you want an explanation as to how this works, I invite you to read this page.
This query will return ONLY the elements in your array that have a title, and a description, with a status OK. If an element doesn't have a title, or a description, it will be ignored.

How to Filter EmbeddedDocument in mongoengine and get value of a field?

I am working on Credits model with transactions as embedded documents. Following is th structure in which it is stored.
{
"_id" : ObjectId("546dae8cc09e5f0d9602e632"),
"user" : ObjectId("53e7fdaac09e5f12a1230c14"),
"transaction" : [
{
"date" : ISODate("2014-11-20T12:34:12.878Z"),
"amount" : 100,
"follow_num" : "d5571d91-e434-4b10-bbd8-2a6511e78011",
"memo" : "test1",
"trans_type" : "deposit",
"status" : "success"
},
{
"date" : ISODate("2014-11-20T13:03:49.851Z"),
"amount" : 500,
"follow_num" : "2fd57cf4-eb5d-4751-9c88-6158adda6572",
"memo" : "test2",
"trans_type" : "withdraw",
"status" : "failed"
},
{
"date" : ISODate("2014-11-20T22:54:19.892Z"),
"amount" : 20,
"follow_num" : "c2bd7dd2-3b17-41c2-9513-60a058a5622a",
"memo" : "test3",
"trans_type" : "deposit",
"status" : "success"
}
]
}
I want to retrieve amount of the last successful deposit transaction
(i.e. transaction.trans_type="deposit" and transaction.status = "success").
#property
def last_deposit(self):
credit_obj = Credits.objects.get(user=self,
transaction__match={"trans_type":"deposit","status":"success"})
If I understood you correctly, you're having trouble using the fields within your embedded document for querying. You can use a double underscore to query nested fields. Also, you should use filter() instead of get(), as het is intended for queries which will only match a single document (and will actually return an error if more than one document is found).
credit_obj = Credits.objects.filter(
user=user_id,
transaction__trans_type="deposit",
transaction__status="success"
).order_by('-transaction__date').first()
Also, if you're using get() you should catch possible exceptions.
from mongoengine.errors import DoesNotExist, MultipleObjectsReturned
from bson.errors import InvalidId
try:
credit_obj = Credits.objects.get(
user=user_id,
)
# verify that user_id is a valid ObjectID
except InvalidId:
print "Not a valid ObjectId: '%s'." % str(user_id)
# code to handle error
except DoesNotExist, e:
print "Could not get '%s'. Error: %s" % (user_id, e)
# code to handle error
except MultipleObjectsReturned:
print "Multiple objects matched query."
# code to handle error
Since u wanted only the amount of the last successful deposit transaction this would do the job
import pymongo
c=pymongo.Connection(host="localhost",port=27017)
db=c["family"]
i= db.tran.aggregate([ { "$unwind":"$transaction"}, {"$match": {"$and":[{"transaction.status":"success" , "transaction.trans_type":"deposit"}]}},{"$sort":{"date":-1}},{"$limit":1}])
a=i["result"]
a=a[0]
b=a['transaction']
print b['amount']
What i have done :
Connected to local mongodb family database with in the trans collections did aggregate function to get embedded document in which i found the value for key amount from the dictionary obtained from the query

Categories