OperationFailure: unknown top level operator: $ne (Monogbd)

OperationFailure: unknown top level operator: $ne (Monogbd) - python

What is wrong with this code? When I try to run it I get OperationFailure: unknown top level operator: $ne full error: {'ok': 0.0, 'errmsg': 'unknown top level operator: $ne', 'code': 2, 'codeName': 'BadValue'}.
Any ideas what this means? Thank you in advance :)
import pandas as pd
def length_vs_references(articles):
res = {"1-5" : 0, "6-10" : 0, "11-15" : 0, "16-20" : 0, "21-25" : 0, "25-30" : 0, ">30" :0}
n = {"1-5" : 0, "6-10" : 0, "11-15" : 0, "16-20" : 0, "21-25" : 0, "25-30" : 0, ">30" :0}
cursor = articles.aggregate([
{'$match': {'$and' : [{'references': {'$exists': False}
}, {'$ne':['$page_end', '']}, {'$ne':['$page_start', '']} ]}},
{'$project': {'len_refernces': {"$size": '$references'},
'pages': {'$subtract': [{"$toInt": 'page_end'},
{"$toInt" : 'page_start'}]}}},
{'$bucket' :{
'$groupBy': '$pages',
'boundaries': [ 0, 6, 11, 16, 21, 26, 31, 1000000],
'default': 'Other',
'key': {
'output': {"average": {"$avg" : '$len_references'}},
}
}
}
])
return cursor
print(length_vs_references(articles))

Reading between the lines I suspect you want:
cursor = articles.aggregate([
{'$match': {'references': {'$exists': False}, 'page_end': {'$ne': ''}, 'page_start': {'$ne': ''}}},
{'$project': {'len_refernces': {"$size": '$references'},
'pages': {'$subtract': [{"$toInt": '$page_end'},
{"$toInt": '$page_start'}]}}},
{'$bucket': {
'groupBy': '$pages',
'boundaries': [0, 6, 11, 16, 21, 26, 31, 1000000],
'default': 'Other'
}
}
])
You don't need to AND your match filters as they are ANDed by default. I'm guessing you are trying to filter out blank page_end and page_start items. If not, please describe what you are trying to do.

Related

Fuzzywuzzy for a list of dictionaries

I have a list of dictionaries (API response) and I use the following function to search for certain nations:
def nation_search(self):
result = next((item for item in nations_v2 if (item["nation"]).lower() == (f"{self}").lower()), False)
if result:
return result
else:
return next((item for item in nations_v2 if (item["leader"]).lower() == (f"{self}").lower()), False)
2 examples :
nations_v2 = [{'nation_id': 5270, 'nation': 'Indo-Froschtia', 'leader': 'Saxplayer', 'continent': 2, 'war_policy': 4, 'domestic_policy': 2, 'color': 15, 'alliance_id': 790, 'alliance': 'Rose', 'alliance_position': 3, 'cities': 28, 'offensive_wars': 0, 'defensive_wars': 0, 'score': 4945, 'v_mode': False, 'v_mode_turns': 0, 'beige_turns': 0, 'last_active': '2020-08-10 04:04:48', 'founded': '2014-08-05 00:09:31', 'soldiers': 0, 'tanks': 0, 'aircraft': 2100, 'ships': 0, 'missiles': 0, 'nukes': 0},
{'nation_id': 582, 'nation': 'Nightsilver Woods', 'leader': 'Luna', 'continent': 4, 'war_policy': 4, 'domestic_policy': 2, 'color': 10, 'alliance_id': 615, 'alliance': 'Seven Kingdoms', 'alliance_position': 2, 'cities': 23, 'offensive_wars': 0, 'defensive_wars': 0, 'score': 3971.25, 'v_mode': False, 'v_mode_turns': 0, 'beige_turns': 0, 'last_active': '2020-08-10 00:22:16', 'founded': '2014-08-05 00:09:35', 'soldiers': 0, 'tanks': 0, 'aircraft': 1725, 'ships': 115, 'missiles': 0, 'nukes': 0}]
I want to add a fuzzy-search using fuzzywuzzy to get 5 possible matches in case there's a spelling error in the argument passed into the function but I can't seem to figure it out.
I only want to search in values for nation and leader.

If you need 5 possible matches, use process.
from fuzzywuzzy import process
def nation_search(self):
nations_only = [ v2['nation'].lower() for v2 in nations_v2 ]
leaders_only = [ v2['leader'].lower() for v2 in nations_v2 ]
matched_nations = process.extract((f"{self}").lower(), nations_only, limit=5)
matched_leaders = process.extract((f"{self}").lower(), leaders_only, limit=5)
return matched_nations, matched_leaders

Partial word search not working in elasticsearch (elasticsearch-py) using mongo-connector

Currently I've indexed my mongoDB collection into Elasticsearch running in a docker container. I am able to query a document by it's exact name, but Elasticsearch is unable to match the query if it is only part of the name. Here is an example:
>>> es = Elasticsearch('0.0.0.0:9200')
>>> es.indices.get_alias('*')
{'mongodb_meta': {'aliases': {}}, 'sigstore': {'aliases': {}}, 'my-index': {'aliases': {}}}
>>> x = es.search(index='sigstore', body={'query': {'match': {'name': 'KEGG_GLYCOLYSIS_GLUCONEOGENESIS'}}})
>>> x
{'took': 198, 'timed_out': False, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}, 'hits': {'total': {'value': 1, 'relation': 'eq'}, 'max_score': 8.062855, 'hits': [{'_index': 'sigstore', '_type': 'sigs', '_id': '5d66c23228144432307c2c49', '_score': 8.062855, '_source': {'id': 1, 'name': 'KEGG_GLYCOLYSIS_GLUCONEOGENESIS', 'description': 'http://www.broadinstitute.org/gsea/msigdb/cards/KEGG_GLYCOLYSIS_GLUCONEOGENESIS', 'members': ['ACSS2', 'GCK', 'PGK2', 'PGK1', 'PDHB', 'PDHA1', 'PDHA2', 'PGM2', 'TPI1', 'ACSS1', 'FBP1', 'ADH1B', 'HK2', 'ADH1C', 'HK1', 'HK3', 'ADH4', 'PGAM2', 'ADH5', 'PGAM1', 'ADH1A', 'ALDOC', 'ALDH7A1', 'LDHAL6B', 'PKLR', 'LDHAL6A', 'ENO1', 'PKM2', 'PFKP', 'BPGM', 'PCK2', 'PCK1', 'ALDH1B1', 'ALDH2', 'ALDH3A1', 'AKR1A1', 'FBP2', 'PFKM', 'PFKL', 'LDHC', 'GAPDH', 'ENO3', 'ENO2', 'PGAM4', 'ADH7', 'ADH6', 'LDHB', 'ALDH1A3', 'ALDH3B1', 'ALDH3B2', 'ALDH9A1', 'ALDH3A2', 'GALM', 'ALDOA', 'DLD', 'DLAT', 'ALDOB', 'G6PC2', 'LDHA', 'G6PC', 'PGM1', 'GPI'], 'user': 'naji.taleb#medimmune.com', 'type': 'public', 'level1': 'test', 'level2': 'test2', 'time': '08-28-2019 14:03:29 EDT-0400', 'source': 'File', 'mapped': [''], 'notmapped': [''], 'organism': 'human'}}]}}
When using the full name of the document, elasticsearch is able to successfully query it. But this is what happens when I attempt to search part of the name or use a wildcard:
>>> x = es.search(index='sigstore', body={'query': {'match': {'name': 'KEGG'}}})
>>> x
{'took': 17, 'timed_out': False, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}, 'hits': {'total': {'value': 0, 'relation': 'eq'}, 'max_score': None, 'hits': []}}
>>> x = es.search(index='sigstore', body={'query': {'match': {'name': 'KEGG*'}}})
>>> x
{'took': 3, 'timed_out': False, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}, 'hits': {'total': {'value': 0, 'relation': 'eq'}, 'max_score': None, 'hits': []}}
In addition to the default index settings I also tried making an index that allows the use of the nGram tokenizer to enable me to do partial search, but that also didn't work. These are the settings I used for that index:
{
"sigstore": {
"aliases": {},
"mappings": {},
"settings": {
"index": {
"max_ngram_diff": "99",
"number_of_shards": "1",
"provided_name": "sigstore",
"creation_date": "1579200699718",
"analysis": {
"filter": {
"substring": {
"type": "nGram",
"min_gram": "1",
"max_gram": "20"
}
},
"analyzer": {
"str_index_analyzer": {
"filter": [
"lowercase",
"substring"
],
"tokenizer": "keyword"
},
"str_search_analyzer": {
"filter": [
"lowercase"
],
"tokenizer": "keyword"
}
}
},
"number_of_replicas": "1",
"uuid": "3nf915U6T9maLdSiJozvGA",
"version": {
"created": "7050199"
}
}
}
}
}
and this is the corresponding python command that created it:
es.indices.create(index='sigstore',body={"mappings": {},"settings": { 'index': { "analysis": {"analyzer": {"str_search_analyzer": {"tokenizer": "keyword","filter": ["lowercase"]},"str_index_analyzer": {"tokenizer": "keyword","filter": ["lowercase", "substring"]}},"filter": {"substring": {"type": "nGram","min_gram": 1,"max_gram": 20}}}},'max_ngram_diff': '99'}})
I use mongo-connector as the pipeline between my mongoDB collection and elasticsearch. This is the command I use to start it:
mongo-connector -m mongodb://username:password#xx.xx.xxx.xx:27017/?authSource=admin -t elasticsearch:9200 -d elastic2_doc_manager -n sigstore.sigs
I'm unsure as to why my elasticsearch is unable to get a partial match, and wondering if there is some setting I'm missing or if there's some crucial mistake I've made somewhere. Thanks for reading.
Versions
MongoDB 4.0.10
elasticsearch==7.1.0
elastic2-doc-manager[elastic5]

Updated after checked your gist:
You need to apply the mapping to your field as written in the doc, cf the first link I share in the comment.
You need to do it after applying the settings on your index according to the gist it's line 11.
Something like:
PUT /your_index/_mapping
{
"properties": {
"name": {
"type": "keyword",
"ignore_above": 256,
"fields": {
"str_search_analyzer": {
"type": "text",
"analyzer": "str_search_analyzer"
}
}
}
}
}
After you set the mapping need to apply it to your document, using update_by_query
https://www.elastic.co/guide/en/elasticsearch/reference/master/docs-update-by-query.html
So you can continue to search with term search on your field name as it will be indexed with a keyword mapping (exact match) and on the sub_field name.str_search_analyzer with part of the word.
your_keyword = 'KEGG_GLYCOLYSIS_GLUCONEOGENESIS' OR 'KEGG*'
x = es.search(index='sigstore', body={'query': {'bool': {'should':[{'term': {'name': your_keyword}},
{'match': {'name.str_search_analyzer': your_keyword}}
]}}
})

parse data from Dictionary in python

So i have this dictionary "runs":
[{
'id': 12,
'suite_id': 2,
'name': 'name',
'description': "desc.",
'nice_id': 3,
'joku_id': None,
'onko': False,
'eikai': False,
'tehty': None,
'config': None,
'config_ids': [],
'passed_count': 1,
'blocked_count': 2,
'untested_count': 3,
'retest_count': 4,
'failed_count': 5,
'custom_status1_count': 0,
'custom_status2_count': 0,
'custom_status3_count': 0,
'custom_status4_count': 0,
'custom_status5_count': 0,
'custom_status6_count': 0,
'custom_status7_count': 0,
'projekti_id': 1,
'plan_id': None,
'created_on': 12343214,
'created_by': 11,
'url': 'google.com'
}, {
'id': 16,
'suite_id': 2,
'name': 'namae)',
'description': "desc1",
'nice_id': 5,
'joku_id': None,
'onko': False,
'eikai': False,
'tehty': None,
'config': None,
'config_ids': [],
'passed_count': 100,
'blocked_count': 1,
'untested_count': 3,
'retest_count': 2,
'failed_count': 5,
'custom_status1_count': 0,
'custom_status2_count': 0,
'custom_status3_count': 0,
'custom_status4_count': 0,
'custom_status5_count': 0,
'custom_status6_count': 0,
'custom_status7_count': 0,
'prokti_id': 7,
'plan_id': None,
'created_on': 4321341644,
'created_by': 11,
'url': 'google.com/2' }
there is "id" for about 50 times. that is just a part of it.
i need to find all "id":s (Not joku_ids, ncie_ids etc. Only "id") and make a string/dict of them
and same for name, and description
i have tried:
j = json.load(run)
ids = (j["id"])
j = json.load(run)
names = (j["name"])
j = json.load(run)
descriptions = (j["description"])
but it returns:
AttributeError: 'list' object has no attribute 'read'
I also need to send a request with specific id and in this case the specific id is marked by o. so id[o]
the request code is below:
test = client.send_get('get_tests/1/ ')
so i need to have the id[o] instead of the 1.
i have tried
test = client.send_get('get_tests/' + id[o] + '/ ')
but it returns:
TypeError: 'int' object is not subscriptable

May be this can help you.
id = []
for i in runs :
id.append(i.get('id'))
[12, 16]

You are trying to pass a list to a json.load function. Please read the docs. Load() does not accep lists, it accepts
a .read()-supporting file-like object containing a JSON document

If you want your result in list of dictionary then:
result = [{x:y} for i in range(len(data)) for x,y in data[i].items() if x=='id' or x=='name' or x=='description']
output:
[{'name': 'name'}, {'id': 12}, {'description': 'desc.'}, {'name': 'namae)'}, {'id': 16}, {'description': 'desc1'}]
the data is your list of dictionary data.
hope this answer helpful for you.

Can't access dictionary data in boto3

I'm using boto3, and I ran this loop:
for i in x["Instances"]
print(i)
Then I get:
{
'AmiLaunchIndex': 0,
'Hypervisor': 'xen',
'VpcId': 'vpc-a790ac1',
'Architecture': 'x86_64',
'InstanceId': 'i-0bab3fb8314',
'PrivateDnsName': 'ip-10-c2.internal',
'BlockDeviceMappings': [{
'Ebs': {
'DeleteOnTermination': True,
'AttachTime': datetime.datetime(2017, 4, 4, 20, 44, 27, tzinfo = tzutc()),
'VolumeId': 'vol-07fd506f45',
'Status': 'attached'
},
'DeviceName': '/dev/xvda'
}, {
'Ebs': {
'DeleteOnTermination': False,
'AttachTime': datetime.datetime(2017, 4, 6, 1, 12, 45, tzinfo = tzutc()),
'VolumeId': 'vol-01ef36c45',
'Status': 'attached'
},
'DeviceName': '/dev/sdf'
}],
'RootDeviceName': '/dev/xvda',
'InstanceType': 't2.micro',
'EnaSupport': True,
'ClientToken': 'ODrMT1465413',
'EbsOptimized': False,
'SubnetId': 'subnet-fb1a4',
'Monitoring': {
'State': 'disabled'
},
'PublicDnsName': '',
'StateTransitionReason': 'User initiated (2017-04-06 01:15:22 GMT)',
'PrivateIpAddress': '10.10.4.116',
'RootDeviceType': 'ebs',
'Tags': [{
'Value': 'wp2',
'Key': 'Name'
}, {
'Value': 'true',
'Key': 'backup'
}],
'ImageId': 'ami-0976f01f',
'StateReason': {
'Code': 'Client.UserInitiadShutdown',
'Message': 'Client.UserInitiatedShutdown: User initiated shutdown'
},
'KeyName': 'pair2',
'ProductCodes': [],
'State': {
'Name': 'stopped',
'Code': 80
},
'LaunchTime': datetime.datetime(2017, 4, 6, 1, 13, 1, tzinfo = tzutc()),
'Placement': {
'AvailabilityZone': 'us-east-1b',
'GroupName': '',
'Tenancy': 'default'
},
'SourceDestCheck': True,
'NetworkInterfaces': [{
'Description': 'Primary network interface',
'PrivateIpAddress': '10.10.4.116',
'PrivateIpAddresses': [{
'Primary': True,
'PrivateIpAddress': '10.10.4.116'
}],
'Status': 'in-use',
'SubnetId': 'subnet-ffbcba4',
'VpcId': 'vpc-a790a7c1',
'Attachment': {
'DeleteOnTermination': True,
'AttachTime': datetime.datetime(2017, 4, 4, 20, 44, 26, tzinfo = tzutc()),
'DeviceIndex': 0,
'AttachmentId': 'eni-attach-c8398',
'Status': 'attached'
},
'Ipv6Addresses': [],
'OwnerId': '895548',
'MacAddress': '0e:31:4c4:b6',
'Groups': [{
'GroupId': 'sg-26c59',
'GroupName': 'web-dmz'
}],
'NetworkInterfaceId': 'eni-5383',
'SourceDestCheck': True
}],
'SecurityGroups': [{
'GroupId': 'sg-2cab59',
'GroupName': 'web-dmz'
}],
'VirtualizationType': 'hvm'
}
I'm trying to access the 'VolumeId' using something like:
for x in ["BlockDeviceMappings"][0]["Ebs"]["VolumeId"]:
print(x)
I get TypeError: string indices must be integers
It looks like 'BlockDeviceMappings' starts as a list with a dictionary in it, but I can't get to 'VolumeId'.
I've also tried:
for x in ["BlockDeviceMappings"][0]:
for k,v in ["Ebs"]:
print(v)
And I get:
ValueError: too many values to unpack (expected 2)
And I tried:
for x in ["BlockDeviceMappings"][0]:
for v in ["Ebs"]:
print(v)
Which prints 'Ebs' several times.
Could someone please point me in the right direction?

To get VolumeId please use
print x["Instances"][0]["BlockDeviceMappings"][0]["Ebs"]["VolumeId"]
you just missed x or _.
You are getting an error because ["BlockDeviceMappings"][0] evalutates to "B".
So you trying getting "orb" from "B"
To get all volumes:
for i in x["Instances"]:
for b in i["BlockDeviceMappings"]
print b["Ebs"]["VolumeId"]
If you have to extract data from complex sturctures like that often, try some quirky search library like github.com/akesterson/dpath-python , it can extract data just using keywords

limit() and sort() order pymongo and mongodb

Despite reading peoples answers stating that the sort is done first, evidence shows something different that the limit is done before the sort. Is there a way to force sort always first?
views = mongo.db.view_logging.find().sort([('count', 1)]).limit(10)
Whether I use .sort().limit() or .limit().sort(), the limit takes precedence. I wonder if this is something to do with pymongo...

According to the documentation, regardless of which goes first in your chain of commands, sort() would be always applied before the limit().
You can also study the .explain() results of your query and look at the execution stages - you will find that the sorting input stage examines all of the filtered (in your case all documents in the collection) and then the limit is applied.
Let's go through an example.
Imagine there is a foo database with a test collection having 6 documents:
>>> col = db.foo.test
>>> for doc in col.find():
... print(doc)
{'time': '2016-03-28 12:12:00', '_id': ObjectId('56f9716ce4b05e6b92be87f2'), 'value': 90}
{'time': '2016-03-28 12:13:00', '_id': ObjectId('56f971a3e4b05e6b92be87fc'), 'value': 82}
{'time': '2016-03-28 12:14:00', '_id': ObjectId('56f971afe4b05e6b92be87fd'), 'value': 75}
{'time': '2016-03-28 12:15:00', '_id': ObjectId('56f971b7e4b05e6b92be87ff'), 'value': 72}
{'time': '2016-03-28 12:16:00', '_id': ObjectId('56f971c0e4b05e6b92be8803'), 'value': 81}
{'time': '2016-03-28 12:17:00', '_id': ObjectId('56f971c8e4b05e6b92be8806'), 'value': 90}
Now, let's execute queries with different order of sort() and limit() and check the results and the explain plan.
Sort and then limit:
>>> from pprint import pprint
>>> cursor = col.find().sort([('time', 1)]).limit(3)
>>> sort_limit_plan = cursor.explain()
>>> pprint(sort_limit_plan)
{u'executionStats': {u'allPlansExecution': [],
u'executionStages': {u'advanced': 3,
u'executionTimeMillisEstimate': 0,
u'inputStage': {u'advanced': 6,
u'direction': u'forward',
u'docsExamined': 6,
u'executionTimeMillisEstimate': 0,
u'filter': {u'$and': []},
u'invalidates': 0,
u'isEOF': 1,
u'nReturned': 6,
u'needFetch': 0,
u'needTime': 1,
u'restoreState': 0,
u'saveState': 0,
u'stage': u'COLLSCAN',
u'works': 8},
u'invalidates': 0,
u'isEOF': 1,
u'limitAmount': 3,
u'memLimit': 33554432,
u'memUsage': 213,
u'nReturned': 3,
u'needFetch': 0,
u'needTime': 8,
u'restoreState': 0,
u'saveState': 0,
u'sortPattern': {u'time': 1},
u'stage': u'SORT',
u'works': 13},
u'executionSuccess': True,
u'executionTimeMillis': 0,
u'nReturned': 3,
u'totalDocsExamined': 6,
u'totalKeysExamined': 0},
u'queryPlanner': {u'indexFilterSet': False,
u'namespace': u'foo.test',
u'parsedQuery': {u'$and': []},
u'plannerVersion': 1,
u'rejectedPlans': [],
u'winningPlan': {u'inputStage': {u'direction': u'forward',
u'filter': {u'$and': []},
u'stage': u'COLLSCAN'},
u'limitAmount': 3,
u'sortPattern': {u'time': 1},
u'stage': u'SORT'}},
u'serverInfo': {u'gitVersion': u'6ce7cbe8c6b899552dadd907604559806aa2e9bd',
u'host': u'h008742.mongolab.com',
u'port': 53439,
u'version': u'3.0.7'}}
Limit and then sort:
>>> cursor = col.find().limit(3).sort([('time', 1)])
>>> limit_sort_plan = cursor.explain()
>>> pprint(limit_sort_plan)
{u'executionStats': {u'allPlansExecution': [],
u'executionStages': {u'advanced': 3,
u'executionTimeMillisEstimate': 0,
u'inputStage': {u'advanced': 6,
u'direction': u'forward',
u'docsExamined': 6,
u'executionTimeMillisEstimate': 0,
u'filter': {u'$and': []},
u'invalidates': 0,
u'isEOF': 1,
u'nReturned': 6,
u'needFetch': 0,
u'needTime': 1,
u'restoreState': 0,
u'saveState': 0,
u'stage': u'COLLSCAN',
u'works': 8},
u'invalidates': 0,
u'isEOF': 1,
u'limitAmount': 3,
u'memLimit': 33554432,
u'memUsage': 213,
u'nReturned': 3,
u'needFetch': 0,
u'needTime': 8,
u'restoreState': 0,
u'saveState': 0,
u'sortPattern': {u'time': 1},
u'stage': u'SORT',
u'works': 13},
u'executionSuccess': True,
u'executionTimeMillis': 0,
u'nReturned': 3,
u'totalDocsExamined': 6,
u'totalKeysExamined': 0},
u'queryPlanner': {u'indexFilterSet': False,
u'namespace': u'foo.test',
u'parsedQuery': {u'$and': []},
u'plannerVersion': 1,
u'rejectedPlans': [],
u'winningPlan': {u'inputStage': {u'direction': u'forward',
u'filter': {u'$and': []},
u'stage': u'COLLSCAN'},
u'limitAmount': 3,
u'sortPattern': {u'time': 1},
u'stage': u'SORT'}},
u'serverInfo': {u'gitVersion': u'6ce7cbe8c6b899552dadd907604559806aa2e9bd',
u'host': u'h008742.mongolab.com',
u'port': 53439,
u'version': u'3.0.7'}}
As you can see, in both cases the sort is applied first and affects all the 6 documents and then the limit limits the results to 3.
And, the execution plans are exactly the same:
>>> from copy import deepcopy # just in case
>>> cursor = col.find().sort([('time', 1)]).limit(3)
>>> sort_limit_plan = deepcopy(cursor.explain())
>>> cursor = col.find().limit(3).sort([('time', 1)])
>>> limit_sort_plan = deepcopy(cursor.explain())
>>> sort_limit_plan == limit_sort_plan
True
Also see:
How do you tell Mongo to sort a collection before limiting the results?

The mongodb documentation states that the skip() method controls the starting point of the results set, followed by sort() and ends with the limit() method.
This is regardless the order of your code. The reason is that mongo gets all the methods for the query, then it orders the skip-sort-limit methods in that exact order, and then runs the query.

I suspect, you're passing wrong key in sort parameter. something like "$key_name" instead of just "key_name"
refer How do you tell Mongo to sort a collection before limiting the results?solution for same problem as yours

Logically it should be whatever comes first in pipeline, But MongoDB always sort first before limit.
In my test Sort operation does takes precedence regardless of if it's coming before skip or after. However, it appears to be very strange behavior to me.
My sample dataset is:
[
{
"_id" : ObjectId("56f845fea524b4d098e0ef81"),
"number" : 48.98052410874508
},
{
"_id" : ObjectId("56f845fea524b4d098e0ef82"),
"number" : 50.98747461471063
},
{
"_id" : ObjectId("56f845fea524b4d098e0ef83"),
"number" : 81.32911244349772
},
{
"_id" : ObjectId("56f845fea524b4d098e0ef84"),
"number" : 87.95549919039071
},
{
"_id" : ObjectId("56f845fea524b4d098e0ef85"),
"number" : 81.63582683594402
},
{
"_id" : ObjectId("56f845fea524b4d098e0ef86"),
"number" : 43.25696270026136
},
{
"_id" : ObjectId("56f845fea524b4d098e0ef87"),
"number" : 88.22046335409453
},
{
"_id" : ObjectId("56f845fea524b4d098e0ef88"),
"number" : 64.00556739160076
},
{
"_id" : ObjectId("56f845fea524b4d098e0ef89"),
"number" : 16.09353150244296
},
{
"_id" : ObjectId("56f845fea524b4d098e0ef8a"),
"number" : 17.46667776660574
}
]
Python test code:
import pymongo
client = pymongo.MongoClient("mongodb://localhost:27017")
database = client.get_database("test")
collection = database.get_collection("collection")
print("----------------[limit -> sort]--------------------------")
result = collection.find().limit(5).sort([("number", pymongo.ASCENDING)])
for r in result:
print(r)
print("----------------[sort -> limit]--------------------------")
result = collection.find().sort([("number", pymongo.ASCENDING)]).limit(5)
for r in result:
print(r)
Result:
----------------[limit -> sort]--------------------------
{u'_id': ObjectId('56f845fea524b4d098e0ef89'), u'number': 16.09353150244296}
{u'_id': ObjectId('56f845fea524b4d098e0ef8a'), u'number': 17.46667776660574}
{u'_id': ObjectId('56f845fea524b4d098e0ef86'), u'number': 43.25696270026136}
{u'_id': ObjectId('56f845fea524b4d098e0ef81'), u'number': 48.98052410874508}
{u'_id': ObjectId('56f845fea524b4d098e0ef82'), u'number': 50.98747461471063}
----------------[sort -> limit]--------------------------
{u'_id': ObjectId('56f845fea524b4d098e0ef89'), u'number': 16.09353150244296}
{u'_id': ObjectId('56f845fea524b4d098e0ef8a'), u'number': 17.46667776660574}
{u'_id': ObjectId('56f845fea524b4d098e0ef86'), u'number': 43.25696270026136}
{u'_id': ObjectId('56f845fea524b4d098e0ef81'), u'number': 48.98052410874508}
{u'_id': ObjectId('56f845fea524b4d098e0ef82'), u'number': 50.98747461471063}

The accepted answer didn't work for me, but this does:
last5 = db.collection.find( {'key': "YOURKEY"}, sort=[( '_id', pymongo.DESCENDING )] ).limit(5)
with the limit outside and the sort inside of the find argument.

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

OperationFailure: unknown top level operator: $ne (Monogbd) - python

Related

Fuzzywuzzy for a list of dictionaries

Partial word search not working in elasticsearch (elasticsearch-py) using mongo-connector

parse data from Dictionary in python

Can't access dictionary data in boto3

limit() and sort() order pymongo and mongodb

Categories

Resources