Related
I have a list of dictionaries (API response) and I use the following function to search for certain nations:
def nation_search(self):
result = next((item for item in nations_v2 if (item["nation"]).lower() == (f"{self}").lower()), False)
if result:
return result
else:
return next((item for item in nations_v2 if (item["leader"]).lower() == (f"{self}").lower()), False)
2 examples :
nations_v2 = [{'nation_id': 5270, 'nation': 'Indo-Froschtia', 'leader': 'Saxplayer', 'continent': 2, 'war_policy': 4, 'domestic_policy': 2, 'color': 15, 'alliance_id': 790, 'alliance': 'Rose', 'alliance_position': 3, 'cities': 28, 'offensive_wars': 0, 'defensive_wars': 0, 'score': 4945, 'v_mode': False, 'v_mode_turns': 0, 'beige_turns': 0, 'last_active': '2020-08-10 04:04:48', 'founded': '2014-08-05 00:09:31', 'soldiers': 0, 'tanks': 0, 'aircraft': 2100, 'ships': 0, 'missiles': 0, 'nukes': 0},
{'nation_id': 582, 'nation': 'Nightsilver Woods', 'leader': 'Luna', 'continent': 4, 'war_policy': 4, 'domestic_policy': 2, 'color': 10, 'alliance_id': 615, 'alliance': 'Seven Kingdoms', 'alliance_position': 2, 'cities': 23, 'offensive_wars': 0, 'defensive_wars': 0, 'score': 3971.25, 'v_mode': False, 'v_mode_turns': 0, 'beige_turns': 0, 'last_active': '2020-08-10 00:22:16', 'founded': '2014-08-05 00:09:35', 'soldiers': 0, 'tanks': 0, 'aircraft': 1725, 'ships': 115, 'missiles': 0, 'nukes': 0}]
I want to add a fuzzy-search using fuzzywuzzy to get 5 possible matches in case there's a spelling error in the argument passed into the function but I can't seem to figure it out.
I only want to search in values for nation and leader.
If you need 5 possible matches, use process.
from fuzzywuzzy import process
def nation_search(self):
nations_only = [ v2['nation'].lower() for v2 in nations_v2 ]
leaders_only = [ v2['leader'].lower() for v2 in nations_v2 ]
matched_nations = process.extract((f"{self}").lower(), nations_only, limit=5)
matched_leaders = process.extract((f"{self}").lower(), leaders_only, limit=5)
return matched_nations, matched_leaders
Currently I've indexed my mongoDB collection into Elasticsearch running in a docker container. I am able to query a document by it's exact name, but Elasticsearch is unable to match the query if it is only part of the name. Here is an example:
>>> es = Elasticsearch('0.0.0.0:9200')
>>> es.indices.get_alias('*')
{'mongodb_meta': {'aliases': {}}, 'sigstore': {'aliases': {}}, 'my-index': {'aliases': {}}}
>>> x = es.search(index='sigstore', body={'query': {'match': {'name': 'KEGG_GLYCOLYSIS_GLUCONEOGENESIS'}}})
>>> x
{'took': 198, 'timed_out': False, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}, 'hits': {'total': {'value': 1, 'relation': 'eq'}, 'max_score': 8.062855, 'hits': [{'_index': 'sigstore', '_type': 'sigs', '_id': '5d66c23228144432307c2c49', '_score': 8.062855, '_source': {'id': 1, 'name': 'KEGG_GLYCOLYSIS_GLUCONEOGENESIS', 'description': 'http://www.broadinstitute.org/gsea/msigdb/cards/KEGG_GLYCOLYSIS_GLUCONEOGENESIS', 'members': ['ACSS2', 'GCK', 'PGK2', 'PGK1', 'PDHB', 'PDHA1', 'PDHA2', 'PGM2', 'TPI1', 'ACSS1', 'FBP1', 'ADH1B', 'HK2', 'ADH1C', 'HK1', 'HK3', 'ADH4', 'PGAM2', 'ADH5', 'PGAM1', 'ADH1A', 'ALDOC', 'ALDH7A1', 'LDHAL6B', 'PKLR', 'LDHAL6A', 'ENO1', 'PKM2', 'PFKP', 'BPGM', 'PCK2', 'PCK1', 'ALDH1B1', 'ALDH2', 'ALDH3A1', 'AKR1A1', 'FBP2', 'PFKM', 'PFKL', 'LDHC', 'GAPDH', 'ENO3', 'ENO2', 'PGAM4', 'ADH7', 'ADH6', 'LDHB', 'ALDH1A3', 'ALDH3B1', 'ALDH3B2', 'ALDH9A1', 'ALDH3A2', 'GALM', 'ALDOA', 'DLD', 'DLAT', 'ALDOB', 'G6PC2', 'LDHA', 'G6PC', 'PGM1', 'GPI'], 'user': 'naji.taleb#medimmune.com', 'type': 'public', 'level1': 'test', 'level2': 'test2', 'time': '08-28-2019 14:03:29 EDT-0400', 'source': 'File', 'mapped': [''], 'notmapped': [''], 'organism': 'human'}}]}}
When using the full name of the document, elasticsearch is able to successfully query it. But this is what happens when I attempt to search part of the name or use a wildcard:
>>> x = es.search(index='sigstore', body={'query': {'match': {'name': 'KEGG'}}})
>>> x
{'took': 17, 'timed_out': False, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}, 'hits': {'total': {'value': 0, 'relation': 'eq'}, 'max_score': None, 'hits': []}}
>>> x = es.search(index='sigstore', body={'query': {'match': {'name': 'KEGG*'}}})
>>> x
{'took': 3, 'timed_out': False, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}, 'hits': {'total': {'value': 0, 'relation': 'eq'}, 'max_score': None, 'hits': []}}
In addition to the default index settings I also tried making an index that allows the use of the nGram tokenizer to enable me to do partial search, but that also didn't work. These are the settings I used for that index:
{
"sigstore": {
"aliases": {},
"mappings": {},
"settings": {
"index": {
"max_ngram_diff": "99",
"number_of_shards": "1",
"provided_name": "sigstore",
"creation_date": "1579200699718",
"analysis": {
"filter": {
"substring": {
"type": "nGram",
"min_gram": "1",
"max_gram": "20"
}
},
"analyzer": {
"str_index_analyzer": {
"filter": [
"lowercase",
"substring"
],
"tokenizer": "keyword"
},
"str_search_analyzer": {
"filter": [
"lowercase"
],
"tokenizer": "keyword"
}
}
},
"number_of_replicas": "1",
"uuid": "3nf915U6T9maLdSiJozvGA",
"version": {
"created": "7050199"
}
}
}
}
}
and this is the corresponding python command that created it:
es.indices.create(index='sigstore',body={"mappings": {},"settings": { 'index': { "analysis": {"analyzer": {"str_search_analyzer": {"tokenizer": "keyword","filter": ["lowercase"]},"str_index_analyzer": {"tokenizer": "keyword","filter": ["lowercase", "substring"]}},"filter": {"substring": {"type": "nGram","min_gram": 1,"max_gram": 20}}}},'max_ngram_diff': '99'}})
I use mongo-connector as the pipeline between my mongoDB collection and elasticsearch. This is the command I use to start it:
mongo-connector -m mongodb://username:password#xx.xx.xxx.xx:27017/?authSource=admin -t elasticsearch:9200 -d elastic2_doc_manager -n sigstore.sigs
I'm unsure as to why my elasticsearch is unable to get a partial match, and wondering if there is some setting I'm missing or if there's some crucial mistake I've made somewhere. Thanks for reading.
Versions
MongoDB 4.0.10
elasticsearch==7.1.0
elastic2-doc-manager[elastic5]
Updated after checked your gist:
You need to apply the mapping to your field as written in the doc, cf the first link I share in the comment.
You need to do it after applying the settings on your index according to the gist it's line 11.
Something like:
PUT /your_index/_mapping
{
"properties": {
"name": {
"type": "keyword",
"ignore_above": 256,
"fields": {
"str_search_analyzer": {
"type": "text",
"analyzer": "str_search_analyzer"
}
}
}
}
}
After you set the mapping need to apply it to your document, using update_by_query
https://www.elastic.co/guide/en/elasticsearch/reference/master/docs-update-by-query.html
So you can continue to search with term search on your field name as it will be indexed with a keyword mapping (exact match) and on the sub_field name.str_search_analyzer with part of the word.
your_keyword = 'KEGG_GLYCOLYSIS_GLUCONEOGENESIS' OR 'KEGG*'
x = es.search(index='sigstore', body={'query': {'bool': {'should':[{'term': {'name': your_keyword}},
{'match': {'name.str_search_analyzer': your_keyword}}
]}}
})
So i have this dictionary "runs":
[{
'id': 12,
'suite_id': 2,
'name': 'name',
'description': "desc.",
'nice_id': 3,
'joku_id': None,
'onko': False,
'eikai': False,
'tehty': None,
'config': None,
'config_ids': [],
'passed_count': 1,
'blocked_count': 2,
'untested_count': 3,
'retest_count': 4,
'failed_count': 5,
'custom_status1_count': 0,
'custom_status2_count': 0,
'custom_status3_count': 0,
'custom_status4_count': 0,
'custom_status5_count': 0,
'custom_status6_count': 0,
'custom_status7_count': 0,
'projekti_id': 1,
'plan_id': None,
'created_on': 12343214,
'created_by': 11,
'url': 'google.com'
}, {
'id': 16,
'suite_id': 2,
'name': 'namae)',
'description': "desc1",
'nice_id': 5,
'joku_id': None,
'onko': False,
'eikai': False,
'tehty': None,
'config': None,
'config_ids': [],
'passed_count': 100,
'blocked_count': 1,
'untested_count': 3,
'retest_count': 2,
'failed_count': 5,
'custom_status1_count': 0,
'custom_status2_count': 0,
'custom_status3_count': 0,
'custom_status4_count': 0,
'custom_status5_count': 0,
'custom_status6_count': 0,
'custom_status7_count': 0,
'prokti_id': 7,
'plan_id': None,
'created_on': 4321341644,
'created_by': 11,
'url': 'google.com/2' }
there is "id" for about 50 times. that is just a part of it.
i need to find all "id":s (Not joku_ids, ncie_ids etc. Only "id") and make a string/dict of them
and same for name, and description
i have tried:
j = json.load(run)
ids = (j["id"])
j = json.load(run)
names = (j["name"])
j = json.load(run)
descriptions = (j["description"])
but it returns:
AttributeError: 'list' object has no attribute 'read'
I also need to send a request with specific id and in this case the specific id is marked by o. so id[o]
the request code is below:
test = client.send_get('get_tests/1/ ')
so i need to have the id[o] instead of the 1.
i have tried
test = client.send_get('get_tests/' + id[o] + '/ ')
but it returns:
TypeError: 'int' object is not subscriptable
May be this can help you.
id = []
for i in runs :
id.append(i.get('id'))
[12, 16]
You are trying to pass a list to a json.load function. Please read the docs. Load() does not accep lists, it accepts
a .read()-supporting file-like object containing a JSON document
If you want your result in list of dictionary then:
result = [{x:y} for i in range(len(data)) for x,y in data[i].items() if x=='id' or x=='name' or x=='description']
output:
[{'name': 'name'}, {'id': 12}, {'description': 'desc.'}, {'name': 'namae)'}, {'id': 16}, {'description': 'desc1'}]
the data is your list of dictionary data.
hope this answer helpful for you.
I'm using boto3, and I ran this loop:
for i in x["Instances"]
print(i)
Then I get:
{
'AmiLaunchIndex': 0,
'Hypervisor': 'xen',
'VpcId': 'vpc-a790ac1',
'Architecture': 'x86_64',
'InstanceId': 'i-0bab3fb8314',
'PrivateDnsName': 'ip-10-c2.internal',
'BlockDeviceMappings': [{
'Ebs': {
'DeleteOnTermination': True,
'AttachTime': datetime.datetime(2017, 4, 4, 20, 44, 27, tzinfo = tzutc()),
'VolumeId': 'vol-07fd506f45',
'Status': 'attached'
},
'DeviceName': '/dev/xvda'
}, {
'Ebs': {
'DeleteOnTermination': False,
'AttachTime': datetime.datetime(2017, 4, 6, 1, 12, 45, tzinfo = tzutc()),
'VolumeId': 'vol-01ef36c45',
'Status': 'attached'
},
'DeviceName': '/dev/sdf'
}],
'RootDeviceName': '/dev/xvda',
'InstanceType': 't2.micro',
'EnaSupport': True,
'ClientToken': 'ODrMT1465413',
'EbsOptimized': False,
'SubnetId': 'subnet-fb1a4',
'Monitoring': {
'State': 'disabled'
},
'PublicDnsName': '',
'StateTransitionReason': 'User initiated (2017-04-06 01:15:22 GMT)',
'PrivateIpAddress': '10.10.4.116',
'RootDeviceType': 'ebs',
'Tags': [{
'Value': 'wp2',
'Key': 'Name'
}, {
'Value': 'true',
'Key': 'backup'
}],
'ImageId': 'ami-0976f01f',
'StateReason': {
'Code': 'Client.UserInitiadShutdown',
'Message': 'Client.UserInitiatedShutdown: User initiated shutdown'
},
'KeyName': 'pair2',
'ProductCodes': [],
'State': {
'Name': 'stopped',
'Code': 80
},
'LaunchTime': datetime.datetime(2017, 4, 6, 1, 13, 1, tzinfo = tzutc()),
'Placement': {
'AvailabilityZone': 'us-east-1b',
'GroupName': '',
'Tenancy': 'default'
},
'SourceDestCheck': True,
'NetworkInterfaces': [{
'Description': 'Primary network interface',
'PrivateIpAddress': '10.10.4.116',
'PrivateIpAddresses': [{
'Primary': True,
'PrivateIpAddress': '10.10.4.116'
}],
'Status': 'in-use',
'SubnetId': 'subnet-ffbcba4',
'VpcId': 'vpc-a790a7c1',
'Attachment': {
'DeleteOnTermination': True,
'AttachTime': datetime.datetime(2017, 4, 4, 20, 44, 26, tzinfo = tzutc()),
'DeviceIndex': 0,
'AttachmentId': 'eni-attach-c8398',
'Status': 'attached'
},
'Ipv6Addresses': [],
'OwnerId': '895548',
'MacAddress': '0e:31:4c4:b6',
'Groups': [{
'GroupId': 'sg-26c59',
'GroupName': 'web-dmz'
}],
'NetworkInterfaceId': 'eni-5383',
'SourceDestCheck': True
}],
'SecurityGroups': [{
'GroupId': 'sg-2cab59',
'GroupName': 'web-dmz'
}],
'VirtualizationType': 'hvm'
}
I'm trying to access the 'VolumeId' using something like:
for x in ["BlockDeviceMappings"][0]["Ebs"]["VolumeId"]:
print(x)
I get TypeError: string indices must be integers
It looks like 'BlockDeviceMappings' starts as a list with a dictionary in it, but I can't get to 'VolumeId'.
I've also tried:
for x in ["BlockDeviceMappings"][0]:
for k,v in ["Ebs"]:
print(v)
And I get:
ValueError: too many values to unpack (expected 2)
And I tried:
for x in ["BlockDeviceMappings"][0]:
for v in ["Ebs"]:
print(v)
Which prints 'Ebs' several times.
Could someone please point me in the right direction?
To get VolumeId please use
print x["Instances"][0]["BlockDeviceMappings"][0]["Ebs"]["VolumeId"]
you just missed x or _.
You are getting an error because ["BlockDeviceMappings"][0] evalutates to "B".
So you trying getting "orb" from "B"
To get all volumes:
for i in x["Instances"]:
for b in i["BlockDeviceMappings"]
print b["Ebs"]["VolumeId"]
If you have to extract data from complex sturctures like that often, try some quirky search library like github.com/akesterson/dpath-python , it can extract data just using keywords
Despite reading peoples answers stating that the sort is done first, evidence shows something different that the limit is done before the sort. Is there a way to force sort always first?
views = mongo.db.view_logging.find().sort([('count', 1)]).limit(10)
Whether I use .sort().limit() or .limit().sort(), the limit takes precedence. I wonder if this is something to do with pymongo...
According to the documentation, regardless of which goes first in your chain of commands, sort() would be always applied before the limit().
You can also study the .explain() results of your query and look at the execution stages - you will find that the sorting input stage examines all of the filtered (in your case all documents in the collection) and then the limit is applied.
Let's go through an example.
Imagine there is a foo database with a test collection having 6 documents:
>>> col = db.foo.test
>>> for doc in col.find():
... print(doc)
{'time': '2016-03-28 12:12:00', '_id': ObjectId('56f9716ce4b05e6b92be87f2'), 'value': 90}
{'time': '2016-03-28 12:13:00', '_id': ObjectId('56f971a3e4b05e6b92be87fc'), 'value': 82}
{'time': '2016-03-28 12:14:00', '_id': ObjectId('56f971afe4b05e6b92be87fd'), 'value': 75}
{'time': '2016-03-28 12:15:00', '_id': ObjectId('56f971b7e4b05e6b92be87ff'), 'value': 72}
{'time': '2016-03-28 12:16:00', '_id': ObjectId('56f971c0e4b05e6b92be8803'), 'value': 81}
{'time': '2016-03-28 12:17:00', '_id': ObjectId('56f971c8e4b05e6b92be8806'), 'value': 90}
Now, let's execute queries with different order of sort() and limit() and check the results and the explain plan.
Sort and then limit:
>>> from pprint import pprint
>>> cursor = col.find().sort([('time', 1)]).limit(3)
>>> sort_limit_plan = cursor.explain()
>>> pprint(sort_limit_plan)
{u'executionStats': {u'allPlansExecution': [],
u'executionStages': {u'advanced': 3,
u'executionTimeMillisEstimate': 0,
u'inputStage': {u'advanced': 6,
u'direction': u'forward',
u'docsExamined': 6,
u'executionTimeMillisEstimate': 0,
u'filter': {u'$and': []},
u'invalidates': 0,
u'isEOF': 1,
u'nReturned': 6,
u'needFetch': 0,
u'needTime': 1,
u'restoreState': 0,
u'saveState': 0,
u'stage': u'COLLSCAN',
u'works': 8},
u'invalidates': 0,
u'isEOF': 1,
u'limitAmount': 3,
u'memLimit': 33554432,
u'memUsage': 213,
u'nReturned': 3,
u'needFetch': 0,
u'needTime': 8,
u'restoreState': 0,
u'saveState': 0,
u'sortPattern': {u'time': 1},
u'stage': u'SORT',
u'works': 13},
u'executionSuccess': True,
u'executionTimeMillis': 0,
u'nReturned': 3,
u'totalDocsExamined': 6,
u'totalKeysExamined': 0},
u'queryPlanner': {u'indexFilterSet': False,
u'namespace': u'foo.test',
u'parsedQuery': {u'$and': []},
u'plannerVersion': 1,
u'rejectedPlans': [],
u'winningPlan': {u'inputStage': {u'direction': u'forward',
u'filter': {u'$and': []},
u'stage': u'COLLSCAN'},
u'limitAmount': 3,
u'sortPattern': {u'time': 1},
u'stage': u'SORT'}},
u'serverInfo': {u'gitVersion': u'6ce7cbe8c6b899552dadd907604559806aa2e9bd',
u'host': u'h008742.mongolab.com',
u'port': 53439,
u'version': u'3.0.7'}}
Limit and then sort:
>>> cursor = col.find().limit(3).sort([('time', 1)])
>>> limit_sort_plan = cursor.explain()
>>> pprint(limit_sort_plan)
{u'executionStats': {u'allPlansExecution': [],
u'executionStages': {u'advanced': 3,
u'executionTimeMillisEstimate': 0,
u'inputStage': {u'advanced': 6,
u'direction': u'forward',
u'docsExamined': 6,
u'executionTimeMillisEstimate': 0,
u'filter': {u'$and': []},
u'invalidates': 0,
u'isEOF': 1,
u'nReturned': 6,
u'needFetch': 0,
u'needTime': 1,
u'restoreState': 0,
u'saveState': 0,
u'stage': u'COLLSCAN',
u'works': 8},
u'invalidates': 0,
u'isEOF': 1,
u'limitAmount': 3,
u'memLimit': 33554432,
u'memUsage': 213,
u'nReturned': 3,
u'needFetch': 0,
u'needTime': 8,
u'restoreState': 0,
u'saveState': 0,
u'sortPattern': {u'time': 1},
u'stage': u'SORT',
u'works': 13},
u'executionSuccess': True,
u'executionTimeMillis': 0,
u'nReturned': 3,
u'totalDocsExamined': 6,
u'totalKeysExamined': 0},
u'queryPlanner': {u'indexFilterSet': False,
u'namespace': u'foo.test',
u'parsedQuery': {u'$and': []},
u'plannerVersion': 1,
u'rejectedPlans': [],
u'winningPlan': {u'inputStage': {u'direction': u'forward',
u'filter': {u'$and': []},
u'stage': u'COLLSCAN'},
u'limitAmount': 3,
u'sortPattern': {u'time': 1},
u'stage': u'SORT'}},
u'serverInfo': {u'gitVersion': u'6ce7cbe8c6b899552dadd907604559806aa2e9bd',
u'host': u'h008742.mongolab.com',
u'port': 53439,
u'version': u'3.0.7'}}
As you can see, in both cases the sort is applied first and affects all the 6 documents and then the limit limits the results to 3.
And, the execution plans are exactly the same:
>>> from copy import deepcopy # just in case
>>> cursor = col.find().sort([('time', 1)]).limit(3)
>>> sort_limit_plan = deepcopy(cursor.explain())
>>> cursor = col.find().limit(3).sort([('time', 1)])
>>> limit_sort_plan = deepcopy(cursor.explain())
>>> sort_limit_plan == limit_sort_plan
True
Also see:
How do you tell Mongo to sort a collection before limiting the results?
The mongodb documentation states that the skip() method controls the starting point of the results set, followed by sort() and ends with the limit() method.
This is regardless the order of your code. The reason is that mongo gets all the methods for the query, then it orders the skip-sort-limit methods in that exact order, and then runs the query.
I suspect, you're passing wrong key in sort parameter. something like "$key_name" instead of just "key_name"
refer How do you tell Mongo to sort a collection before limiting the results?solution for same problem as yours
Logically it should be whatever comes first in pipeline, But MongoDB always sort first before limit.
In my test Sort operation does takes precedence regardless of if it's coming before skip or after. However, it appears to be very strange behavior to me.
My sample dataset is:
[
{
"_id" : ObjectId("56f845fea524b4d098e0ef81"),
"number" : 48.98052410874508
},
{
"_id" : ObjectId("56f845fea524b4d098e0ef82"),
"number" : 50.98747461471063
},
{
"_id" : ObjectId("56f845fea524b4d098e0ef83"),
"number" : 81.32911244349772
},
{
"_id" : ObjectId("56f845fea524b4d098e0ef84"),
"number" : 87.95549919039071
},
{
"_id" : ObjectId("56f845fea524b4d098e0ef85"),
"number" : 81.63582683594402
},
{
"_id" : ObjectId("56f845fea524b4d098e0ef86"),
"number" : 43.25696270026136
},
{
"_id" : ObjectId("56f845fea524b4d098e0ef87"),
"number" : 88.22046335409453
},
{
"_id" : ObjectId("56f845fea524b4d098e0ef88"),
"number" : 64.00556739160076
},
{
"_id" : ObjectId("56f845fea524b4d098e0ef89"),
"number" : 16.09353150244296
},
{
"_id" : ObjectId("56f845fea524b4d098e0ef8a"),
"number" : 17.46667776660574
}
]
Python test code:
import pymongo
client = pymongo.MongoClient("mongodb://localhost:27017")
database = client.get_database("test")
collection = database.get_collection("collection")
print("----------------[limit -> sort]--------------------------")
result = collection.find().limit(5).sort([("number", pymongo.ASCENDING)])
for r in result:
print(r)
print("----------------[sort -> limit]--------------------------")
result = collection.find().sort([("number", pymongo.ASCENDING)]).limit(5)
for r in result:
print(r)
Result:
----------------[limit -> sort]--------------------------
{u'_id': ObjectId('56f845fea524b4d098e0ef89'), u'number': 16.09353150244296}
{u'_id': ObjectId('56f845fea524b4d098e0ef8a'), u'number': 17.46667776660574}
{u'_id': ObjectId('56f845fea524b4d098e0ef86'), u'number': 43.25696270026136}
{u'_id': ObjectId('56f845fea524b4d098e0ef81'), u'number': 48.98052410874508}
{u'_id': ObjectId('56f845fea524b4d098e0ef82'), u'number': 50.98747461471063}
----------------[sort -> limit]--------------------------
{u'_id': ObjectId('56f845fea524b4d098e0ef89'), u'number': 16.09353150244296}
{u'_id': ObjectId('56f845fea524b4d098e0ef8a'), u'number': 17.46667776660574}
{u'_id': ObjectId('56f845fea524b4d098e0ef86'), u'number': 43.25696270026136}
{u'_id': ObjectId('56f845fea524b4d098e0ef81'), u'number': 48.98052410874508}
{u'_id': ObjectId('56f845fea524b4d098e0ef82'), u'number': 50.98747461471063}
The accepted answer didn't work for me, but this does:
last5 = db.collection.find( {'key': "YOURKEY"}, sort=[( '_id', pymongo.DESCENDING )] ).limit(5)
with the limit outside and the sort inside of the find argument.