I am new to Stackoverflow and I have the exact same issue from this question. This topic is marked as an answer but this doesn't really help me to solve my problem. Do you have a clue about the cause of this problem?
I have this piece of code that return an empty object :
tmp = client['test']['Prenom'].aggregate([
{
'$match': {
'annais': 2014,
'preusuel': {
'$ne': '_PRENOMS_RARES'
}
}
}, {
'$group': {
'_id': '$preusuel',
'nombre': {
'$sum': '$nombre'
},
'occurence': {
'$sum': 1
}
}
}, {
'$sort': {
'nombre': -1
}
}, {
'$limit': 10
}
])
whereas :
tmp = client['test']['Prenom'].aggregate([
{
'$match': {
'annais': annee_j,
'preusuel': {
'$ne': '_PRENOMS_RARES'
}
}
}, {
'$group': {
'_id': '$preusuel',
'nombre': {
'$sum': '$nombre'
},
'occurence': {
'$sum': 1
}
}
}, {
'$sort': {
'nombre': -1
}
}, {
'$limit': 10
}
])
work perfectly and return 10 names.
Thank you
Related
I need to Sum all values from different time series.
yesterday = datetime.datetime.combine(datetime.datetime.now(), datetime.time.min) - datetime.timedelta(days=1)
test = db.aggregate([
{ "$match": { "date": yesterday, "location": location } },
{ '$group': { '_id': "$location", 'cost': { '$sum': '$cost' } } }
])
This works for me to get the sum for yesterday, but for flexibility i want to change it.
test = db.aggregate([
{ "$project": { "year": { "$year": "$date" }, "month": { "$month": "$date" } } },
{ "$match": { "month": 1, "year": 2023, "location": location } },
{ '$group': { '_id': "$location", 'cost': { '$sum': '$cost' } } }
])
When I change it like that, i don't get a result.
I notices, when i run this Code
test = db.aggregate([{ "$project": { "year" : { "$year": "$posting_date" } } }])
I get the output like {'_id': ObjectId('63b6e6c215161672a159417f'), 'year': 2023} so the field year should be created correctly, shouldn't it? So why it is not working with the $match?
I've this json file with some 14 arrays each one with a number and I'm trying to get the average of all in MongoDB with python but I'm getting null in the answer.
{'_id': ObjectId('618af03902cd107477e3f2b9'),
"Time":[1364,1374,1384],
"yaw":[0.15,0.3,0.45],
"pitch":[0.36,0.76,1.08],
"roll":[-0.13,-0.25,-0.35],
"ax":[-0.42,-0.41,-0.41],
"ay":[-0.15,-0.13,-0.1],
"az":[0.9,0.91,1],
"gx":[0,0,0],
"gy":[-0.01,0,-0.01],
"gz":[0.02,0.02,0.02],
"mx":[0.26,0.26,0.26],
"my":[0.01,0.01,0.01],
"mz":[-0.04,-0.04,-0.07]
}
I want to average time, yaw, pitch and I have this query in Python for MongoDB:
#app.route('/sta')
def sta():
docs = db.basetest.aggregate([{"$group": {"_id": '618af03902cd107477e3f2b9', "avgTest" : {"$avg":"Time"}} }])
for document in docs:
return document
I'm getting this return:
{
"_id": "618af03902cd107477e3f2b9",
"avgTest": null
}
Can anyone help?
A more dynamic solution would be this one:
db.collection.aggregate([
{
$project: {
data: {
$filter: {
input: { $objectToArray: "$$ROOT" },
cond: { $ne: ["$$this.k", "_id"] }
}
}
}
},
{ $set: { data: { $map: { input: "$data", in: { k: "$$this.k", v: { $avg: "$$this.v" } } } } } },
{ $replaceWith: { $mergeObjects: [{ _id: "$_id" }, { $arrayToObject: "$data" }] } }
])
If you want the average over all documents, then it becomes more complex. I found this solution, maybe there is a shorter/better one:
db.collection.aggregate([
{ $group: { _id: null, data: { $push: { $objectToArray: "$$ROOT" } } } },
{
$set: {
data: {
$reduce: {
input: "$data",
initialValue: [],
in: { $concatArrays: ["$$value", "$$this"] }
}
}
}
},
{ $unwind: "$data" },
{ $group: { _id: "$data.k", v: { $push: "$data.v" } } },
{ $match: { _id: { $ne: "_id" } } },
{
$set: {
v: {
$reduce: {
input: "$v",
initialValue: [],
in: { $concatArrays: ["$$value", "$$this"] }
}
}
}
},
{ $set: { _id: "$$REMOVE", k: "$_id", v: { $avg: "$v" } } },
{ $group: { _id: null, data: { $push: "$$ROOT" } } },
{ $replaceWith: { $arrayToObject: "$data" } }
])
A different solution is this one, it may have better performance:
db.collection.aggregate([
{ $unset: "_id" },
{ $group: { _id: null, data: { $push: { $objectToArray: "$$ROOT" } } } },
{
$set: {
data: {
$reduce: {
input: "$data",
initialValue: [],
in: { $concatArrays: ["$$value", "$$this"] }
}
}
}
},
{
$set: {
data: {
$map: {
input: { $setUnion: "$data.k" },
as: "k",
in: { $filter: { input: "$data", cond: { $eq: ["$$this.k", "$$k"] } } }
}
}
}
},
{
$set: {
data: {
$map: {
input: "$data",
as: "val",
in: {
k: { $first: "$$val.k" },
v: {
$avg: {
$reduce: {
input: "$$val.v",
initialValue: [],
in: { $concatArrays: ["$$value", "$$this"] }
}
}
}
}
}
}
}
},
{ $replaceWith: { $arrayToObject: "$data" } }
])
You can use $avg operator in a $project stage (or $set or $addFields if you prefer) like this:
db.collection.aggregate([
{
"$project": {
"Time": {"$avg": "$Time"},
"yaw": {"$avg": "$yaw"},
"pitch": {"$avg": "$pitch"},
"roll": {"$avg": "$roll"},
"ax": {"$avg": "$ax"},
"ay": {"$avg": "$ay"},
"az": {"$avg": "$az"},
"gx": {"$avg": "$gx"},
"gy": {"$avg": "$gy"},
"gz": {"$avg": "$gz"},
"mx": {"$avg": "$mx"},
"my": {"$avg": "$my"},
"mz": {"$avg": "$mz"}
}
}
])
Example here
done_status = ['BAD_PU', 'TO_WH', 'RCVDPORT', 'RCVD', 'BAD_DEL', 'MISSFLT', 'OFFLOAD']
shipments = db.db_shipment.aggregate([{
"$lookup":{
"from":"db_shipment_status_history",
"localField":"_id",
"foreignField":"fk_shipment_id",
"as":"shipment_status_history_collection"
}
},
{"$unwind":
"$shipment_status_history_collection"},
{"$redact":{"$cond":{ "$if": { "status_value": {"$in": done_status } } },
"$then": "$$KEEP"
,"$else":"$$PRUNE"
}
},
{"$sort":
{'shipment_status_history_collection.rec_timestamp':-1}},
{"$limit":1},
{"$project":{"pkey":"$pkey","_code":"$_code"}}
])
error:
pymongo.errors.OperationFailure: An object representing an expression must have exactly one field: { $cond: { $if: { status_value: { $in: [ "BAD_PU", "TO_WH", "RCVDPORT", "RCVD", "BAD_DEL", "MISSFLT", "OFFLOAD" ] } } }, $else: "$$PRUNE", $then: "$$KEEP" }
how to fix this error? Im trying to add the latest shipment status history in the shipment record where the status value is in the given status value.
Update the redact stage for your aggregation pipeline. if, then and else are a part of the $cond operator and they're not operators in themselves.
Also, $in operator is passed an array where its first item is checked for presence in the second item. The second item is usually an iterable.
Mongo 3.6
messenger_pipeline_status = (
messenger_active_status['data']['pending']
+ messenger_active_status['data']['processing']
)
assigned_status = ['DEL_ASSIGNED','PU_ASSIGNED']
subpipeline = [
{
'$match': {
'$expr': {
'$and': [
{'$eq': ['$fk_shipment_id', '$$pkey']},
{'$eq': ['$fk_messenger_id', fk_user_id]},
{'$in': ['$status_value', assigned_status]}
]
}
}
},
{
'$sort': {
'rec_timestamp': -1
}
},
{
'$limit': 1
},
{
'$project': {
'fk_shipment_id': 1
}
}
]
pipeline = [
{
'$match': {
'status_value': {'$in': messenger_pipeline_status}
'is_deleted': False,
'is_postponed': False,
'is_active': True,
}
},
{
'$lookup': {
'from': 'db_shipment_status_history',
'let': {'pkey': '$pkey'},
'pipeline': subpipeline,
'as': 'shipment_status_history'
}
},
{
'$match': {
'shipment_status_history': {
'$ne': []
}
}
},
{
'$unwind': '$shipment_status_history'
},
{
'$project': {
'_id': 1,
'pkey': 1,
'_code': 1,
'date_created': 1,
'sender_full_name': '$sender.full_name',
'sender_raw_address': '$sender.raw_address',
'sender_formatted_address': '$sender.formatted_address',
'receiver_full_name': '$receiver.full_name',
'receiver_raw_address': '$receiver.raw_address',
'receiver_formatted_address': '$receiver.formatted_address',
'status_name': 1,
'team_value': 1,
'cs_name': 1,
'fk_messenger_id': '$shipment_status_history.fk_shipment_id'
}
}
]
result = db.db_shipment.aggregate(pipeline)
print(list(result))
[Edit] Mongo 3.2
The following aggregation pipeline produces similar results as the above and is valid query for Mongo 3.2.
messenger_pipeline_status = ['MISSFLT', 'OFFLOAD']
pipeline = [
{
'$match': {
'status_value': { '$in': messenger_pipeline_status}
'is_deleted': False,
'is_postponed': False,
'is_active': True,
}
},
{
"$lookup": {
'from': 'db_shipment_status_history',
'localField': 'pkey',
'foreignField': 'fk_shipment_id',
'as': 'shipment_status_history'
}
},
{
'$match': {
'shipment_status_history': {
'$ne': []
}
}
},
{
'$project': {
'_id': 1,
'pkey': 1,
'_code': 1,
'date_created': 1,
'sender_full_name': '$sender.full_name',
'sender_raw_address': '$sender.raw_address',
'sender_formatted_address': '$sender.formatted_address',
'receiver_full_name': '$receiver.full_name',
'receiver_raw_address': '$receiver.raw_address',
'receiver_formatted_address': '$receiver.formatted_address',
'status_name': 1,
'team_value': 1,
'cs_name': 1,
'shipment_status_history': {
'$filter': {
'input': '$shipment_status_history',
'as': 'shipment',
'cond': {
'$and': [
{'$eq': ['$$shipment.fk_shipment_id', fk_user_id]},
{'$in': ['$$shipment.status_value', assigned_status]},
]
}
}
},
}
},
{
'$unwind': '$shipment_status_history'
},
{
'$sort': {
'shipment_status_history.rec_timestamp': -1,
}
},
{
'$group': {
'_id': '$pkey',
'doc': {
'$first': '$$CURRENT'
}
}
},
{
'$unwind': '$doc'
},
{ # last projection, I promise
'$project': {
'_id': '$doc.id',
'pkey': '$doc.pkey',
'_code': '$doc._code',
'date_created': '$doc.date_created',
'sender_full_name': '$doc.sender_full_name',
'sender_raw_address': '$doc.sender_raw_address',
'sender_formatted_address': '$doc.sender_formatted_address',
'receiver_full_name': '$doc.receiver_full_name',
'receiver_raw_address': '$doc.receiver_raw_address',
'receiver_formatted_address': '$doc.receiver_formatted_address',
'status_name': '$doc.status_name',
'team_value': '$doc.team_value',
'cs_name': '$doc.cs_name',
'fk_messenger_id': '$doc.shipment_status_history.fk_shipment_id'
}
},
]
res = db.db_shipment.aggregate(pipeline)
I have some elasticsearch records that are being stored as either an empty string, or a null value. I am trying to develop a query that will allow me to return these from the index. I came up with:
{
'query': {
'filtered': {
'filter': {
'bool': {
'should': [
{'term': {'field1': ''}},
{"missing" : {"field": "field1"}},
],
}
}
}
}
}
Which works as intended for my purpose, and returns the correct row. However, if I try and search for any more than a single field, the 'should' clause OR's the two fields together. This is a problem, because I want there to be an AND relationship:
{
'query': {
'filtered': {
'filter': {
'bool': {
'should': [
{'term': {'field1': ''}},
{"missing" : {"field": "field1"}},
# these fields should be AND but are OR
{'term': {'field2': ''}},
{"missing" : {"field": "field2"}},
],
}
}
}
}
}
Is there anyway I can do the above with a single filter, or AND the two filters together?
You could use the and filter for that purpose, and AND the two bool/should filters, like this:
{
"query": {
"filtered": {
"filter": {
"and": [
{
"bool": {
"should": [
{
"term": {
"field1": ""
}
},
{
"missing": {
"field": "field1"
}
}
]
}
},
{
"bool": {
"should": [
{
"term": {
"field2": ""
}
},
{
"missing": {
"field": "field2"
}
}
]
}
}
]
}
}
}
}
Or you can also bool/must two or filters like this:
{
"query": {
"filtered": {
"filter": {
"bool": {
"must": [
{
"or": [
{
"term": {
"field1": ""
}
},
{
"missing": {
"field": "field1"
}
}
]
},
{
"or": [
{
"term": {
"field2": ""
}
},
{
"missing": {
"field": "field2"
}
}
]
}
]
}
}
}
}
}
I am using elasticsearch where the query is to be posted in json and should be in standard order or else the result will be wrong. the problem is that the python is changing my json ordering. my original json query is.
x= {
"query": {
"filtered": {
"query": {
"query_string": {
"query": "*a*"
}
},
"filter": {
"and": {
"filters": [
{
"term": {
"city": "london"
}
},
{
"term": {
"industry.industry_not_analyed": "oil"
}
}
]
}
}
}
},
"facets": {
"industry": {
"terms": {
"field": "industry.industry_not_analyed"
}
},
"city": {
"terms": {
"field": "city.city_not_analyzed"
}
}
}
}
but the resulting python object is as follow.
{
'query': {
'filtered': {
'filter': {
'and': {
'filters': [
{
'term': {
'city': 'london'
}
},
{
'term': {
'industry.industry_not_analyed': 'oil'
}
}
]
}
},
'query': {
'query_string': {
'query': '*a*'
}
}
}
},
'facets': {
'city': {
'terms': {
'field': 'city.city_not_analyzed'
}
},
'industry': {
'terms': {
'field': 'industry.industry_not_analyed'
}
}
}
}
the result is different than what I need how do I solve this.
Use OrderedDict() instead of {}. Note that you can't simply use OrderedDict(query=...) because that would create an unordered dict in the background. Use this code instead:
x = OrderedDict()
x['query'] = OrderedDict()
...
I suggest to implement a builder for this:
x = Query().filtered().query_string("*a*").and()....