Average Mongodb with python - python

I've this json file with some 14 arrays each one with a number and I'm trying to get the average of all in MongoDB with python but I'm getting null in the answer.
{'_id': ObjectId('618af03902cd107477e3f2b9'),
"Time":[1364,1374,1384],
"yaw":[0.15,0.3,0.45],
"pitch":[0.36,0.76,1.08],
"roll":[-0.13,-0.25,-0.35],
"ax":[-0.42,-0.41,-0.41],
"ay":[-0.15,-0.13,-0.1],
"az":[0.9,0.91,1],
"gx":[0,0,0],
"gy":[-0.01,0,-0.01],
"gz":[0.02,0.02,0.02],
"mx":[0.26,0.26,0.26],
"my":[0.01,0.01,0.01],
"mz":[-0.04,-0.04,-0.07]
}
I want to average time, yaw, pitch and I have this query in Python for MongoDB:
#app.route('/sta')
def sta():
docs = db.basetest.aggregate([{"$group": {"_id": '618af03902cd107477e3f2b9', "avgTest" : {"$avg":"Time"}} }])
for document in docs:
return document
I'm getting this return:
{
"_id": "618af03902cd107477e3f2b9",
"avgTest": null
}
Can anyone help?

A more dynamic solution would be this one:
db.collection.aggregate([
{
$project: {
data: {
$filter: {
input: { $objectToArray: "$$ROOT" },
cond: { $ne: ["$$this.k", "_id"] }
}
}
}
},
{ $set: { data: { $map: { input: "$data", in: { k: "$$this.k", v: { $avg: "$$this.v" } } } } } },
{ $replaceWith: { $mergeObjects: [{ _id: "$_id" }, { $arrayToObject: "$data" }] } }
])
If you want the average over all documents, then it becomes more complex. I found this solution, maybe there is a shorter/better one:
db.collection.aggregate([
{ $group: { _id: null, data: { $push: { $objectToArray: "$$ROOT" } } } },
{
$set: {
data: {
$reduce: {
input: "$data",
initialValue: [],
in: { $concatArrays: ["$$value", "$$this"] }
}
}
}
},
{ $unwind: "$data" },
{ $group: { _id: "$data.k", v: { $push: "$data.v" } } },
{ $match: { _id: { $ne: "_id" } } },
{
$set: {
v: {
$reduce: {
input: "$v",
initialValue: [],
in: { $concatArrays: ["$$value", "$$this"] }
}
}
}
},
{ $set: { _id: "$$REMOVE", k: "$_id", v: { $avg: "$v" } } },
{ $group: { _id: null, data: { $push: "$$ROOT" } } },
{ $replaceWith: { $arrayToObject: "$data" } }
])
A different solution is this one, it may have better performance:
db.collection.aggregate([
{ $unset: "_id" },
{ $group: { _id: null, data: { $push: { $objectToArray: "$$ROOT" } } } },
{
$set: {
data: {
$reduce: {
input: "$data",
initialValue: [],
in: { $concatArrays: ["$$value", "$$this"] }
}
}
}
},
{
$set: {
data: {
$map: {
input: { $setUnion: "$data.k" },
as: "k",
in: { $filter: { input: "$data", cond: { $eq: ["$$this.k", "$$k"] } } }
}
}
}
},
{
$set: {
data: {
$map: {
input: "$data",
as: "val",
in: {
k: { $first: "$$val.k" },
v: {
$avg: {
$reduce: {
input: "$$val.v",
initialValue: [],
in: { $concatArrays: ["$$value", "$$this"] }
}
}
}
}
}
}
}
},
{ $replaceWith: { $arrayToObject: "$data" } }
])

You can use $avg operator in a $project stage (or $set or $addFields if you prefer) like this:
db.collection.aggregate([
{
"$project": {
"Time": {"$avg": "$Time"},
"yaw": {"$avg": "$yaw"},
"pitch": {"$avg": "$pitch"},
"roll": {"$avg": "$roll"},
"ax": {"$avg": "$ax"},
"ay": {"$avg": "$ay"},
"az": {"$avg": "$az"},
"gx": {"$avg": "$gx"},
"gy": {"$avg": "$gy"},
"gz": {"$avg": "$gz"},
"mx": {"$avg": "$mx"},
"my": {"$avg": "$my"},
"mz": {"$avg": "$mz"}
}
}
])
Example here

Related

MongoDB: Update element in an array where the index of the element is saved in the document

I have the following document structure.
{
_id: ...,
unique_id: 1234,
config_no: 1,
configs: [
{
data: "qwertyuiop" // random string
},
{
data: "asdfghjkl" // random string
}
]
}
I want to update value of data from one of the configs. The index of the config that needs to be updated is available in the config_no key.
Is there any way to update the value without querying the document.
This is what I am currently doing
doc = db.collection.findOne({"unique_id": 1234})
config_no = doc.config_no
db.collection.updateOne(
{"unique_id": 1234},
{"$set": {"configs."+config_no+".data": "zxcvbnm"}} //"configs.1.data"
)
Following is something what i would like to achive.
db.collection.updateOne(
{"unique_id": 1234},
{"$set": {"configs.${config_no}.data": "zxcvbnm"}}
)
You can $unwind with includeArrayIndex option. Use the index to perform conditional update and $merge back into the collection.
db.collection.aggregate([
{
$match: {
unique_id: 1234
}
},
{
"$unwind": {
path: "$configs",
includeArrayIndex: "idx"
}
},
{
$set: {
"configs.data": {
"$cond": {
"if": {
$eq: [
"$config_no",
"$idx"
]
},
"then": "zxcvbnm",
"else": "$configs.data"
}
}
}
},
{
$group: {
_id: "$_id",
config_no: {
$first: "$config_no"
},
configs: {
$push: "$configs"
},
unique_id: {
$first: "$unique_id"
}
}
},
{
"$merge": {
"into": "collection",
"on": "_id",
"whenMatched": "merge"
}
}
])
Mongo Playground

Sort an array by occurances mongodb

Is it possible to sort an array by occurrences?
For Example, given
{
"_id": {
"$oid": "60d20d342c7951852a21s53a"
},
"site": "www.xyz.ie",
"A": ["mary", "jamie", "john", "mary", "mary", "john"],
}
return
{
"_id": {
"$oid": "60d20d342c7951852a21s53a"
},
"site": "www.xyz.ie",
"A": ["mary", "jamie", "john", "mary", "mary", "john"],
"sorted_A" : ["mary","john","jamie"]
}
I am able to get it most of the way there but I cannot figure out how to join them all back together in an array.
I have been using an aggregation pipeline
Starting with $match to find the site I want
Then $unwind on with path: "$A"
Next $sortByCount on "$A"
???? I can't figure out how to group it all back together.
Here is the pipeline:
[
{
'$match': {
'site': 'www.xyz.ie'
}
}, {
'$unwind': {
'path': '$A'
}
}, {
'$sortByCount': '$A'
}, {
????
}
]
$group nu _id and A, get first site and count total elements
$sort by count in descending order
$group by only _id and get first site, and construct array of A
[
{ $match: { site: "www.xyz.ie" } },
{ $unwind: "$A" },
{
$group: {
_id: { _id: "$_id", A: "$A" },
site: { $first: "$site" },
count: { $sum: 1 }
}
},
{ $sort: { count: -1 } },
{
$group: {
_id: "$_id._id",
site: { $first: "$site" },
A: { $push: "$_id.A" }
}
}
]
Playground

Filter MongoDB query to find documents only if a field in a list of objects is not empty

I have a MongoDB document structure like following:
Structure
{
"stores": [
{
"items": [
{
"feedback": [],
"item_category": "101",
"item_id": "10"
},
{
"feedback": [],
"item_category": "101",
"item_id": "11"
}
]
},
{
"items": [
{
"feedback": [],
"item_category": "101",
"item_id": "10"
},
{
"feedback": ["A feedback"],
"item_category": "101",
"item_id": "11"
},
{
"feedback": [],
"item_category": "101",
"item_id": "12"
},
{
"feedback": [],
"item_category": "102",
"item_id": "13"
},
{
"feedback": [],
"item_category": "102",
"item_id": "14"
}
],
"store_id": 500
}
]
}
This is a single document in a collection. Some field are deleted to produce minimal representation of the data.
What I want is to get items only if the feedback field in the items array is not empty. The expected result is:
Expected result
{
"stores": [
{
"items": [
{
"feedback": ["A feedback"],
"item_category": "101",
"item_id": "11"
}
],
"store_id": 500
}
]
}
This is what I tried based on examples in this, which I think pretty same situation, but it didn't work. What's wrong with my query, isn't it the same situation in zipcode search example in the link? It returns everything like in the first JSON code, Structure:
What I tried
query = {
'date': {'$gte': since, '$lte': until},
'stores.items': {"$elemMatch": {"feedback": {"$ne": []}}}
}
Thanks.
Please try this :
db.yourCollectionName.aggregate([
{ $match: { 'date': { '$gte': since, '$lte': until }, 'stores.items': { "$elemMatch": { "feedback": { "$ne": [] } } } } },
{ $unwind: '$stores' },
{ $match: { 'stores.items': { "$elemMatch": { "feedback": { "$ne": [] } } } } },
{ $unwind: '$stores.items' },
{ $match: { 'stores.items.feedback': { "$ne": [] } } },
{ $group: { _id: { _id: '$_id', store_id: '$stores.store_id' }, items: { $push: '$stores.items' } } },
{ $project: { _id: '$_id._id', store_id: '$_id.store_id', items: 1 } },
{ $group: { _id: '$_id', stores: { $push: '$$ROOT' } } },
{ $project: { 'stores._id': 0 } }
])
We've all these stages as you need to operate on an array of arrays, this query is written assuming you're dealing with a large set of data, Since you're filtering on dates just in case if your documents size is way less after first $match then you can avoid following $match stage which is in between two $unwind's.
Ref 's :
$match,
$unwind,
$project,
$group
This aggregate query gets the needed result (using the provided sample document and run from the mongo shell):
db.stores.aggregate( [
{ $unwind: "$stores" },
{ $unwind: "$stores.items" },
{ $addFields: { feedbackExists: { $gt: [ { $size: "$stores.items.feedback" }, 0 ] } } },
{ $match: { feedbackExists: true } },
{ $project: { _id: 0, feedbackExists: 0 } }
] )

Finding ElasticSearch records matching empty and null values

I have some elasticsearch records that are being stored as either an empty string, or a null value. I am trying to develop a query that will allow me to return these from the index. I came up with:
{
'query': {
'filtered': {
'filter': {
'bool': {
'should': [
{'term': {'field1': ''}},
{"missing" : {"field": "field1"}},
],
}
}
}
}
}
Which works as intended for my purpose, and returns the correct row. However, if I try and search for any more than a single field, the 'should' clause OR's the two fields together. This is a problem, because I want there to be an AND relationship:
{
'query': {
'filtered': {
'filter': {
'bool': {
'should': [
{'term': {'field1': ''}},
{"missing" : {"field": "field1"}},
# these fields should be AND but are OR
{'term': {'field2': ''}},
{"missing" : {"field": "field2"}},
],
}
}
}
}
}
Is there anyway I can do the above with a single filter, or AND the two filters together?
You could use the and filter for that purpose, and AND the two bool/should filters, like this:
{
"query": {
"filtered": {
"filter": {
"and": [
{
"bool": {
"should": [
{
"term": {
"field1": ""
}
},
{
"missing": {
"field": "field1"
}
}
]
}
},
{
"bool": {
"should": [
{
"term": {
"field2": ""
}
},
{
"missing": {
"field": "field2"
}
}
]
}
}
]
}
}
}
}
Or you can also bool/must two or filters like this:
{
"query": {
"filtered": {
"filter": {
"bool": {
"must": [
{
"or": [
{
"term": {
"field1": ""
}
},
{
"missing": {
"field": "field1"
}
}
]
},
{
"or": [
{
"term": {
"field2": ""
}
},
{
"missing": {
"field": "field2"
}
}
]
}
]
}
}
}
}
}

ordering json in python mapping object

I am using elasticsearch where the query is to be posted in json and should be in standard order or else the result will be wrong. the problem is that the python is changing my json ordering. my original json query is.
x= {
"query": {
"filtered": {
"query": {
"query_string": {
"query": "*a*"
}
},
"filter": {
"and": {
"filters": [
{
"term": {
"city": "london"
}
},
{
"term": {
"industry.industry_not_analyed": "oil"
}
}
]
}
}
}
},
"facets": {
"industry": {
"terms": {
"field": "industry.industry_not_analyed"
}
},
"city": {
"terms": {
"field": "city.city_not_analyzed"
}
}
}
}
but the resulting python object is as follow.
{
'query': {
'filtered': {
'filter': {
'and': {
'filters': [
{
'term': {
'city': 'london'
}
},
{
'term': {
'industry.industry_not_analyed': 'oil'
}
}
]
}
},
'query': {
'query_string': {
'query': '*a*'
}
}
}
},
'facets': {
'city': {
'terms': {
'field': 'city.city_not_analyzed'
}
},
'industry': {
'terms': {
'field': 'industry.industry_not_analyed'
}
}
}
}
the result is different than what I need how do I solve this.
Use OrderedDict() instead of {}. Note that you can't simply use OrderedDict(query=...) because that would create an unordered dict in the background. Use this code instead:
x = OrderedDict()
x['query'] = OrderedDict()
...
I suggest to implement a builder for this:
x = Query().filtered().query_string("*a*").and()....

Categories