Matching / Mapping lists with elasticsearch - python

There is a list in mongodb,
eg:
db_name = "Test"
collection_name = "Map"
db.Map.findOne()
{
"_id" : ObjectId(...),
"Id" : "576",
"FirstName" : "xyz",
"LastName" : "abc",
"skills" : [
"C++",
"Java",
"Python",
"MongoDB",
]
}
There is a list in elastcisearch index (I am using kibana to execute queries)
GET /user/_search
{
"took" : 31,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : 7,
"max_score" : 1.0,
"hits" : [
{
"_index" : "customer",
"_type" : "_doc",
"_id" : "5",
"_score" : 1.0,
"_source" : {
"name" : "xyz abc"
"Age" : 21,
"skills" : [
"C++",
"Java",
"Python",
"MongoDB",
]
}
},
]
}
}
Can anyone help with the elasticsearch query that will match both the records based on skills.
I am using python to write the code
If a match is found, I am trying to get the first name and last name of that user
First name : "xyz"
Last name : "abc"

Assuming you are indexing all the document in elastic and of these you want to match documents where skills has both java and mongodb the query will be as:
{
"query": {
"bool": {
"filter": [
{
"term": {
"skills": "mongodb"
}
},
{
"term": {
"skills": "java"
}
}
]
}
}
}

Related

Mongodb find nested dict element

{
"_id" : ObjectId("63920f965d15e98e3d7c450c"),
"first_name" : "mymy",
"last_activity" : 1669278303.4341061,
"username" : null,
"dates" : {
"29.11.2022" : {
},
"30.11.2022" : {
}
},
"user_id" : "1085116517"
}
How can I find all documents with 29.11.2022 contained in date? I tried many things but in all of them it detects the dot letter as something else.
Use $getField in $expr.
db.collection.find({
$expr: {
$eq: [
{},
{
"$getField": {
"field": "29.11.2022",
"input": "$dates"
}
}
]
}
})
Mongo Playground

How to return the json in a required format

Json is below
result = {
"took" : 21,
"timed_out" : False,
"_shards" : {
"total" : 5,
"successful" : 5,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "data",
"_type" : "_doc",
"_id" : "qwcs",
"_score" : 1.0,
"_source" : {
"id" : "10",
"name" : "Country ",
"description" : "This product contains all currency details",
"Owner" : {
"id" : "11",
"Name" : "David",
"Email" : "nons#utc.com",
"role" : "Analyst"
},
"Area" : [
"Data Management"
],
"Type" : [
"API",
"TXT"
],
"Level" : [
"A"
]
}
}
]
}
}
I wrote the python code to extract the data from elastic through api hit and above is the result
sample api: http://utc.com/search/Owner.id?=11
Back-end query will generate {'query': {'match': {'Owner.id': '11'}}}
But i need only small details the expected out is below
"Owner" : {
"id" : "11",
"Name" : "David",
"Email" : "nons#utc.com",
"role" : "Analyst"
}
If you're saying you want to return only the Owners in the hits list with an id matching your query, you can use a list comprehension:
query = {'query': {'match': {'Owner.id': '11'}}}
owners = [hit['_source']['Owner'] for hit in result['hits']['hits']
if hit['_source']['Owner']['id'] == query['query']['match']['Owner.id']]
print(owners)
Output:
[{'id': '11', 'Name': 'David', 'Email': 'nons#utc.com', 'role': 'Analyst'}]

My code is woring in mongodb but not working in pymongo

I have a documents in collection and I want to find document and update elements of list.
Here is sample data:
{
{
"_id" : ObjectId("5edd3faaf6c9d938e0bfd966"),
"id" : 1,
"status" : "XXX",
"number" : [
{
"code" : "AAA"
},
{
"code" : "CVB"
},
{
"code" : "AAA"
},
{
"code" : "BBB"
}
]
},
{
"_id" : ObjectId("asseffsfpo2dedefwef"),
"id" : 2,
"status" : "TUY",
"number" : [
{
"code" : "PPP"
},
{
"code" : "SSD"
},
{
"code" : "HDD"
},
{
"code" : "IOO"
}
]
}
}
I planed to find where "id":1 and value of number.code in ["AAA", "BBB"], change number.code to "DDD". I did it with following code:
db.test.update(
{
id: 1,
"number.code": {$in: ["AAA", "BBB"]}
},
{
$set: {"number.$[elem].code": "VVV"}
},
{ "arrayFilters": [{ "elem.code": {$in: ["AAA", "BBB"]} }], "multi": true, "upsert": false
}
)
It works in mongodb shell, but in python (with pymongo) it doesn't with the following error:
raise TypeError("%s must be True or False" % (option,))
TypeError: upsert must be True or False
Please help me. What can I do?
pymongo just has syntax that's a tad different. it would look like this:
db.test.update_many(
{
"id": 1,
"number.code": {"$in": ["AAA", "BBB"]}
},
{
"$set": {"number.$[elem].code": "VVV"}
},
array_filters=[{"elem.code": {"$in": ["AAA", "BBB"]}}],
upsert=False
)
multi flag not needed with update_many.
upsert is False by default hence also redundant.
You can find pymongo's docs here.

Elasticsearch full-text autocomplete

I'm using Elasticsearch through the python requests library. I've set up my analysers like so:
"analysis" : {
"analyzer": {
"my_basic_search": {
"type": "standard",
"stopwords": []
},
"my_autocomplete": {
"type": "custom",
"tokenizer": "keyword",
"filter": ["lowercase", "autocomplete"]
}
},
"filter": {
"autocomplete": {
"type": "edge_ngram",
"min_gram": 1,
"max_gram": 20,
}
}
}
I've got a list of artists who I'd like to search for using autocomplete: my current test case is 'bill w', which should match 'bill withers' etc - the artist mapping looks like this (this is a output of GET http://localhost:9200/my_index/artist/_mapping):
{
"my_index" : {
"mappings" : {
"artist" : {
"properties" : {
"clean_artist_name" : {
"type" : "string",
"analyzer" : "my_basic_search",
"fields" : {
"autocomplete" : {
"type" : "string",
"index_analyzer" : "my_autocomplete",
"search_analyzer" : "my_basic_search"
}
}
},
"submitted_date" : {
"type" : "date",
"format" : "basic_date_time"
},
"total_count" : {
"type" : "integer"
}
}
}
}
}
}
...and then I run this query to do the autocomplete:
"query": {
"function_score": {
"query": {
"bool": {
"must" : { "match": { "clean_artist_name.autocomplete": "bill w" } },
"should" : { "match": { "clean_artist_name": "bill w" } },
}
},
"functions": [
{
"script_score": {
"script": "artist-score"
}
}
]
}
}
This seems to match artists that contain either 'bill' or 'w' as well as 'bill withers': I only wanted to match artists that contain that exact string. The analyser seems to be working fine, here is the output of http://localhost:9200/my_index/_analyze?analyzer=my_autocomplete&text=bill%20w:
{
"tokens" : [ {
"token" : "b",
"start_offset" : 0,
"end_offset" : 6,
"type" : "word",
"position" : 1
}, {
"token" : "bi",
"start_offset" : 0,
"end_offset" : 6,
"type" : "word",
"position" : 1
}, {
"token" : "bil",
"start_offset" : 0,
"end_offset" : 6,
"type" : "word",
"position" : 1
}, {
"token" : "bill",
"start_offset" : 0,
"end_offset" : 6,
"type" : "word",
"position" : 1
}, {
"token" : "bill ",
"start_offset" : 0,
"end_offset" : 6,
"type" : "word",
"position" : 1
}, {
"token" : "bill w",
"start_offset" : 0,
"end_offset" : 6,
"type" : "word",
"position" : 1
} ]
}
So why is this not excluding matches with just 'bill' or 'w' in there? Is there something in my query that is allowing the results that only match with the my_basic_search analyser?
I believe you need a "term" filter instead of a "match" one for your "must". You already have split your artist names in ngrams so your searching text should match exactly one of the ngrams. For this to happen you need a "term" that will match exactly the ngrams:
"query": {
"function_score": {
"query": {
"bool": {
"must" : { "term": { "clean_artist_name.autocomplete": "bill w" } },
"should" : { "match": { "clean_artist_name": "bill w" } },
}
},
"functions": [
{
"script_score": {
"script": "artist-score"
}
}
]
}
}

pymongo get id for collection

i have this code:
def get_attribute_colour(colour_code):
attribute_colour_meta = db.attributes.aggregate([{ '$match': {"name.en-UK": "Colour"} },
{ '$unwind' : "$values" },
{ '$project': { "code" : "$values.code", "valueId": "$values._id"} },
{ '$match': {"code": colour_code} }])
return attribute_colour_meta['result']
that looks up a collection called attributes, which has the following structure:
> db.attributes.find({}).pretty();
{
"_id" : ObjectId("53b27bded901f26432996e00"),
"values" : [
{
"code" : "AQ",
"pmsCode" : "638c",
"name" : {
"en-UK" : "Aqua"
},
"tcxCode" : "16-4529 TCX",
"hexCode" : "#00aed8",
"images" : [
"AQ.jpg"
],
"_id" : ObjectId("53b27bded901f26432996d83")
},
{
"code" : "AQ",
"pmsCode" : "3115c",
"name" : {
"en-UK" : "Aqua"
},
"tcxCode" : "",
"hexCode" : "#00c4db",
"images" : [
"AQ.jpg"
],
"_id" : ObjectId("53b27bded901f26432996d84")
},
.....
}
],
"name" : {
"en-UK" : "Colour"
}
}
{
"_id" : ObjectId("53b27bded901f26432996e1b"),
"values" : [
{
"code" : 0,
"_id" : ObjectId("53b27bded901f26432996e01"),
"name" : {
"en-UK" : "0-3 MTHS"
}
},
.....
}
],
"name" : {
"en-UK" : "Size"
}
}
{
"_id" : ObjectId("53b27bded901f26432996e28"),
"values" : [
{
"Currency" : "GBP",
"_id" : ObjectId("53b27bded901f26432996e1c"),
"name" : {
"en-UK" : "Carton price list"
}
},
}
],
"name" : {
"en-UK" : "Price list"
}
}
>
basically, there are 3 attributes, colour, size and price list, each of which has sub-documents called values
in my def get_attribute_colour function, how do i return the _id for the attribute within the results, so that i get something like:
{ attributeId: ObjectId("53b27bded901f26432996e00"),
valueId: ObjectId("53b27bded901f26432996d83") }
the result does return the _id:
[{u'code': u'AQ', u'_id': ObjectId('53b27bded901f26432996e00'), u'valueId': ObjectId('53b27bded901f26432996d83')}]
but i don't see where this is specified?
any advice much appreciated.

Categories