Does mongoengine support lookup in aggregate method? - python

Im currently using mongodb version v3.0
this is my code:
{'$lookup': {
'from': 'Matrix',
'localField': 'account_id',
'foreignField': 'account_id',
'as': 'Matrix'
}
}
Im having this error:
Exception calling application: exception: Unrecognized pipeline stage name: '$lookup'

Query using the aggregation framework with PyMongo.This requires two connections to MongoDB (one for PyMongo to perform the aggregation query, and a second for the regular query or insert or updating via MongoEngine).
But _get_collection() resolve this problem.
In below example, we use two model Plans and Addons and in both relation is quote_id
collection = Plans._get_collection()
pipeline = [
{
"$lookup":
{
"from":"addons",
"localField":"plans.quote_id",
"foreignField":"addons.quote_id",
"as": "addons_docs"
}
},
{
"$match":{
"addons_docs":{
"$ne":[]
}
}
},
{
"$addFields":
{
"addons_docs":
{
"$arrayElemAt":["$addons_docs",0]
}
}
},
{
"$replaceRoot":
{
"newRoot":
{
"$mergeObjects":["$addons_docs","$$ROOT"]
}
}
},
{
"$project":
{
"addons_docs":0
}
},
{
"$sort":
{
"_id":-1
}
},
{
"$limit":100
}
]
cursor = collection.aggregate(pipeline)
try:
for doc in cursor:
print(doc)
finally:
cursor.close()

Related

MongoDB: Update element in an array where the index of the element is saved in the document

I have the following document structure.
{
_id: ...,
unique_id: 1234,
config_no: 1,
configs: [
{
data: "qwertyuiop" // random string
},
{
data: "asdfghjkl" // random string
}
]
}
I want to update value of data from one of the configs. The index of the config that needs to be updated is available in the config_no key.
Is there any way to update the value without querying the document.
This is what I am currently doing
doc = db.collection.findOne({"unique_id": 1234})
config_no = doc.config_no
db.collection.updateOne(
{"unique_id": 1234},
{"$set": {"configs."+config_no+".data": "zxcvbnm"}} //"configs.1.data"
)
Following is something what i would like to achive.
db.collection.updateOne(
{"unique_id": 1234},
{"$set": {"configs.${config_no}.data": "zxcvbnm"}}
)
You can $unwind with includeArrayIndex option. Use the index to perform conditional update and $merge back into the collection.
db.collection.aggregate([
{
$match: {
unique_id: 1234
}
},
{
"$unwind": {
path: "$configs",
includeArrayIndex: "idx"
}
},
{
$set: {
"configs.data": {
"$cond": {
"if": {
$eq: [
"$config_no",
"$idx"
]
},
"then": "zxcvbnm",
"else": "$configs.data"
}
}
}
},
{
$group: {
_id: "$_id",
config_no: {
$first: "$config_no"
},
configs: {
$push: "$configs"
},
unique_id: {
$first: "$unique_id"
}
}
},
{
"$merge": {
"into": "collection",
"on": "_id",
"whenMatched": "merge"
}
}
])
Mongo Playground

PyMongo - Aggregate doesn't work with group

I'm trying to make the following aggregate in my Python Project:
pipeline = [
{
'$group': {
'date': { '$max': "$date" },
'_id': {
'interface': "$interface",
'message': "$message",
'server': "$server"
},
'record_count': {
'$sum': '1'
}
}
}
]
errors = EntryError.objects.aggregate(pipeline)
But when the aggregate function is executed, it gives me the following error:
pymongo.errors.OperationFailure: Each element of the 'pipeline' array must be an object
But the same pipeline code works on Robo3T and when using mongo shell.
What am I doing wrong?
I figured out what I was doing wrong.
The code that solved everything it's this one:
pipeline = {
"$group": {
"date": {"$max": "$date"},
"_id": {
"interface": "$interface",
"message": "$message",
"server": "$server"
},
"record_count": {
"$sum": 1
}
}
}
errors = EntryError.objects.filter(
date__gte=start_date,
date__lte=end_date
).aggregate(pipeline)
"pipeline" as dict instead of list.

How Iterate or remove MongoDb array list item using pymongo?

I want to iterate Mongodb database Arraylist items(TRANSACTION list) and remove Arraylist specific(TRANSACTION List) item using pymongo ?
I create Mongo collection as above using python pymongo. I want to iterate array list item using pymongo and remove final item only in Arraylist?
Data insert query using Python pymongo
# added new method create block chain_structure
def addCoinWiseTransaction(self, senz, coin, format_date):
self.collection = self.db.block_chain
coinValexists = self.collection.find({"_id": str(coin)}).count()
print('coin exists : ', coinValexists)
if (coinValexists > 0):
print('coin hash exists')
newTransaction = {"$push": {"TRANSACTION": {"SENDER": senz.attributes["#SENDER"],
"RECIVER": senz.attributes["#RECIVER"],
"T_NO_COIN": int(1),
"DATE": datetime.datetime.utcnow()
}}}
self.collection.update({"_id": str(coin)}, newTransaction)
else:
flag = senz.attributes["#f"];
print flag
if (flag == "ccb"):
print('new coin mined othir minner')
root = {"_id": str(coin)
, "S_ID": int(senz.attributes["#S_ID"]), "S_PARA": senz.attributes["#S_PARA"],
"FORMAT_DATE": format_date,
"NO_COIN": int(1),
"TRANSACTION": [{"MINER": senz.attributes["#M_S_ID"],
"RECIVER": senz.attributes["#RECIVER"],
"T_NO_COIN": int(1),
"DATE": datetime.datetime.utcnow()
}
]
}
self.collection.insert(root)
else:
print('new coin mined')
root = {"_id": str(coin)
, "S_ID": int(senz.attributes["#S_ID"]), "S_PARA": senz.attributes["#S_PARA"],
"FORMAT_DATE": format_date,
"NO_COIN": int(1),
"TRANSACTION": [{"MINER": "M_1",
"RECIVER": senz.sender,
"T_NO_COIN": int(1),
"DATE": datetime.datetime.utcnow()
}
]
}
self.collection.insert(root)
return 'DONE'
To remove the last entry, the general idea (as you have mentioned) is to iterate the array and grab the index of the last element as denoted by its DATE field, then update the collection by removing it using $pull. So the crucial piece of data you need for this to work is the DATE value and the document's _id.
One approach you could take is to first use the aggregation framework to get this data. With this, you can run a pipeline where the first step if filtering the documents in the collection by using the $match operator which uses standard MongoDB queries.
The next stage after filtering the documents is to flatten the TRANSACTION array i.e. denormalise the documents in the list so that you can filter the final item i.e. get the last document by the DATE field. This is made possible with the $unwind operator, which for each input document, outputs n documents where n is the number of array elements and can be zero for an empty array.
After deconstructing the array, in order to get the last document, use the $group operator where you can regroup the flattened documents and in the process use the group accumulator operators to obtain
the last TRANSACTION date by using the $max operator applied to its embedded DATE field.
So in essence, run the following pipeline and use the results to update the collection. For example, you can run the following pipeline:
mongo shell
db.block_chain.aggregate([
{ "$match": { "_id": coin_id } },
{ "$unwind": "$TRANSACTION" },
{
"$group": {
"_id": "$_id",
"last_transaction_date": { "$max": "$TRANSACTION.DATE" }
}
}
])
You can then get the document with the update data from this aggregate operation using the toArray() method or the aggregate cursor and update your collection:
var docs = db.block_chain.aggregate([
{ "$match": { "_id": coin_id } },
{ "$unwind": "$TRANSACTION" },
{
"$group": {
"_id": "$_id",
"LAST_TRANSACTION_DATE": { "$max": "$TRANSACTION.DATE" }
}
}
]).toArray()
db.block_chain.updateOne(
{ "_id": docs[0]._id },
{
"$pull": {
"TRANSACTION": {
"DATE": docs[0]["LAST_TRANSACTION_DATE"]
}
}
}
)
python
def remove_last_transaction(self, coin):
self.collection = self.db.block_chain
pipe = [
{ "$match": { "_id": str(coin) } },
{ "$unwind": "$TRANSACTION" },
{
"$group": {
"_id": "$_id",
"last_transaction_date": { "$max": "$TRANSACTION.DATE" }
}
}
]
# run aggregate pipeline
cursor = self.collection.aggregate(pipeline=pipe)
docs = list(cursor)
# run update
self.collection.update_one(
{ "_id": docs[0]["_id"] },
{
"$pull": {
"TRANSACTION": {
"DATE": docs[0]["LAST_TRANSACTION_DATE"]
}
}
}
)
Alternatively, you can run a single aggregate operation that will also update your collection using the $out pipeline which writes the results of the pipeline to the same collection:
If the collection specified by the $out operation already
exists, then upon completion of the aggregation, the $out stage atomically replaces the existing collection with the new results collection. The $out operation does not
change any indexes that existed on the previous collection. If the
aggregation fails, the $out operation makes no changes to
the pre-existing collection.
For example, you could run this pipeline:
mongo shell
db.block_chain.aggregate([
{ "$match": { "_id": coin_id } },
{ "$unwind": "$TRANSACTION" },
{ "$sort": { "TRANSACTION.DATE": 1 } }
{
"$group": {
"_id": "$_id",
"LAST_TRANSACTION": { "$last": "$TRANSACTION" },
"FORMAT_DATE": { "$first": "$FORMAT_DATE" },
"NO_COIN": { "$first": "$NO_COIN" },
"S_ID": { "$first": "$S_ID" },
"S_PARA": { "$first": "$S_PARA" },
"TRANSACTION": { "$push": "$TRANSACTION" }
}
},
{
"$project": {
"FORMAT_DATE": 1,
"NO_COIN": 1,
"S_ID": 1,
"S_PARA": 1,
"TRANSACTION": {
"$setDifference": ["$TRANSACTION", ["$LAST_TRANSACTION"]]
}
}
},
{ "$out": "block_chain" }
])
python
def remove_last_transaction(self, coin):
self.db.block_chain.aggregate([
{ "$match": { "_id": str(coin) } },
{ "$unwind": "$TRANSACTION" },
{ "$sort": { "TRANSACTION.DATE": 1 } },
{
"$group": {
"_id": "$_id",
"LAST_TRANSACTION": { "$last": "$TRANSACTION" },
"FORMAT_DATE": { "$first": "$FORMAT_DATE" },
"NO_COIN": { "$first": "$NO_COIN" },
"S_ID": { "$first": "$S_ID" },
"S_PARA": { "$first": "$S_PARA" },
"TRANSACTION": { "$push": "$TRANSACTION" }
}
},
{
"$project": {
"FORMAT_DATE": 1,
"NO_COIN": 1,
"S_ID": 1,
"S_PARA": 1,
"TRANSACTION": {
"$setDifference": ["$TRANSACTION", ["$LAST_TRANSACTION"]]
}
}
},
{ "$out": "block_chain" }
])
Whilst this approach can be more efficient than the first, it requires knowledge of the existing fields first so in some cases the solution cannot be practical.

Elasticsearch querying nested objects returning no results

I have created Elasticsearch index and one of the nested field has mapping as following.
"groups": {
"type": "nested",
"properties": {
"name": {
"type": "text"
},
"value": {
"type": "text"
}
}
}
On details about ES version, its 5.0 and I am using official python client elasticsearch-py on client side. I want to query this nested field based on its value.
Lets say there is another field called name which is a text type field. I want to find all name starting with A and falling under group specified.
Some sample data,
Groups - HR(name=HR, value=hr), Marketing(name=Marketing, value=marketing)
Names - Andrew, Alpha, Barry, John
Andrew and Alpha belong to group HR.
Based on this I tried a query
{
'query': {
'bool': {
'must': [{
'match_phrase_prefix': {
'title': 'A'
}
}]
},
'nested': {
'path': 'groups',
'query': {
'bool': {
'must': [{
'match': {
'groups.value': 'hr'
}
}]
}
}
}
}
}
For this query I referred ES docs but this query does not return anything. It would be great if someone can point out what is wrong with this query or mapping itself.
You're almost there, you simply need to move the nested query inside the bool/must query:
{
'query': {
'bool': {
'must': [
{
'match_phrase_prefix': {
'title': 'A'
}
},
{
'nested': {
'path': 'groups',
'query': {
'bool': {
'must': [{
'match': {
'groups.value': 'hr'
}
}]
}
}
}
}
]
}
}
}

Elastic Search Function_Score Query with Query_String

I was doing search using elastic search using the code:
es.search(index="article-index", fields="url", body={
"query": {
"query_string": {
"query": "keywordstr",
"fields": [
"text",
"title",
"tags",
"domain"
]
}
}
})
Now I want to insert another parameter in the search scoring - "recencyboost".
I was told function_score should solve the problem
res = es.search(index="article-index", fields="url", body={
"query": {
"function_score": {
"functions": {
"DECAY_FUNCTION": {
"recencyboost": {
"origin": "0",
"scale": "20"
}
}
},
"query": {
{
"query_string": {
"query": keywordstr
}
}
},
"score_mode": "multiply"
}
}
})
It gives me error that dictionary {"query_string": {"query": keywordstr}} is not hashable.
1) How can I fix the error?
2) How can I change the decay function such that it give higher weight to higher recency boost?
You appear to have an extra query in your search (giving a total of three), which is giving you an unwanted top-level. You need to remove the top-level query and replace it with function_score as the top level key.
res = es.search(index="article-index", fields="url", body={"function_score": {
"query": {
{ "query_string": {"query": keywordstr} }
},
"functions": {
"DECAY_FUNCTION": {
"recencyboost": {
"origin": "0",
"scale": "20"
}
}
},
"score_mode": "multiply"
})
Note: score_mode defaults to "multiply", as does the unused boost_mode, so it should be unnecessary to supply it.
You cant use dictionary as a key in the dictionary. You are doing this in the following segment of the code:
"query": {
{"query_string": {"query": keywordstr}}
},
Following should work fine
"query": {
"query_string": {"query": keywordstr}
},
use it like this
query: {
function_score: {
query: {
filtered: {
query: {
bool: {
must: [
{
query_string: {
query: shop_search,
fields: [ 'shop_name']
},
boost: 2.0
},
{
query_string: {
query: shop_search,
fields: [ 'shop_name']
},
boost: 3.0
}
]
}
},
filter: {
// { term: { search_city: }}
}
},
exp: {
location: {
origin: { lat: 12.8748964,
lon: 77.6413239
},
scale: "10000m",
offset: "0m",
decay: "0.5"
}
}
// score_mode: "sum"
}

Categories