ElasticSearch - MoreLikeThis - Conditional Boost - python

I would like to set different "boost terms" according to the year of a publication, example:
"boost_term": 10.0 to produced after 2015
"boost_term": 5.0 to produced between 2010 and 2015
"boost_term": 3.0 to produced between 2010 and 2005
and so on..
Current code:
res = es.search(body={
"query": {
"dis_max": {
"queries": [
{
"more_like_this" : {
"fields": [
"article.name",
"article.year"
],
"like" : {
"_index" : "test-index",
"_type" : "researcher",
"_id" : "idResearcher,
},
"min_term_freq" : 1,
"min_doc_freq": 1,
"boost_terms": 5.0
}
},
]
}
}
})

Try something like:
{
"query": {
"bool": {
"must": [
{
"more_like_this": {
"fields": [
"article.name",
"article.year"
],
"like" : {
"_index" : "test-index",
"_type" : "researcher",
"_id" : "idResearcher",
},
"min_term_freq": 1,
"min_doc_freq": 1
}
}
],
"should": [
{
"range": {
"producedYear" : {
"gte" : "2015",
"boost" : 10.0
}
}
},
{
"range": {
"producedYear" : {
"gte" : "2010",
"lt" : "2015"
"boost" : 10.0
}
}
},{
"range": {
"producedYear" : {
"gte" : "2005",
"lt" : "2010"
"boost" : 3.0
}
}
}
]
}
}
}

Related

Convert a MultiIndex pandas DataFrame to a nested JSON

I have the following Dataframe with MultiIndex rows in pandas.
time available_slots status
month day
1 1 10:00:00 1 AVAILABLE
1 12:00:00 1 AVAILABLE
1 14:00:00 1 AVAILABLE
1 16:00:00 1 AVAILABLE
1 18:00:00 1 AVAILABLE
2 10:00:00 1 AVAILABLE
... ... ... ...
2 28 12:00:00 1 AVAILABLE
28 14:00:00 1 AVAILABLE
28 16:00:00 1 AVAILABLE
28 18:00:00 1 AVAILABLE
28 20:00:00 1 AVAILABLE
And I need to transform it to a hierarchical nested JSON as this:
[
{
"month": 1,
"days": [
{
"day": 1,
"slots": [
{
"time": "10:00:00",
"available_slots": 1,
"status": "AVAILABLE"
},
{
"time": "12:00:00",
"available_slots": 1,
"status": "AVAILABLE"
},
...
]
},
{
"day": 2,
"slots": [
...
]
}
]
},
{
"month": 2,
"days":[
{
"day": 1,
"slots": [
...
]
}
]
},
...
]
Unfortunately, it is not as easy as doing df.to_json(orient="index").
Does anyone know if there is a method in pandas to perform this kind of transformations? or in what way I could iterate over the DataFrame to build the final object?
Here's one way. Basically repeated groupby + apply(to_dict) + reset_index until we get the desired shape:
out = (df.groupby(level=[0,1])
.apply(lambda x: x.to_dict('records'))
.reset_index()
.rename(columns={0:'slots'})
.groupby('month')
.apply(lambda x: x[['day','slots']].to_dict('records'))
.reset_index()
.rename(columns={0:'days'})
.to_json(orient='records', indent=True)
)
Output:
[
{
"month":1,
"days":[
{
"day":1,
"slots":[
{
"time":"10:00:00",
"available_slots":1,
"status":"AVAILABLE"
},
{
"time":"12:00:00",
"available_slots":1,
"status":"AVAILABLE"
},
{
"time":"14:00:00",
"available_slots":1,
"status":"AVAILABLE"
},
{
"time":"16:00:00",
"available_slots":1,
"status":"AVAILABLE"
},
{
"time":"18:00:00",
"available_slots":1,
"status":"AVAILABLE"
}
]
},
{
"day":2,
"slots":[
{
"time":"10:00:00",
"available_slots":1,
"status":"AVAILABLE"
}
]
}
]
},
{
"month":2,
"days":[
{
"day":28,
"slots":[
{
"time":"12:00:00",
"available_slots":1,
"status":"AVAILABLE"
},
{
"time":"14:00:00",
"available_slots":1,
"status":"AVAILABLE"
},
{
"time":"16:00:00",
"available_slots":1,
"status":"AVAILABLE"
},
{
"time":"18:00:00",
"available_slots":1,
"status":"AVAILABLE"
},
{
"time":"20:00:00",
"available_slots":1,
"status":"AVAILABLE"
}
]
}
]
}
]
You can use a double loop for each level of your index:
data = []
for month, df1 in df.groupby(level=0):
data.append({'month': month, 'days': []})
for day, df2 in df1.groupby(level=1):
data[-1]['days'].append({'day': day, 'slots': df2.to_dict('records')})
Output:
import json
print(json.dumps(data, indent=2))
[
{
"month": 1,
"days": [
{
"day": 1,
"slots": [
{
"time": "10:00:00",
"available_slots": 1,
"status": "AVAILABLE"
},
{
"time": "12:00:00",
"available_slots": 1,
"status": "AVAILABLE"
},
{
"time": "14:00:00",
"available_slots": 1,
"status": "AVAILABLE"
},
{
"time": "16:00:00",
"available_slots": 1,
"status": "AVAILABLE"
},
{
"time": "18:00:00",
"available_slots": 1,
"status": "AVAILABLE"
}
]
},
{
"day": 2,
"slots": [
{
"time": "10:00:00",
"available_slots": 1,
"status": "AVAILABLE"
}
]
}
]
},
{
"month": 2,
"days": [
{
"day": 28,
"slots": [
{
"time": "12:00:00",
"available_slots": 1,
"status": "AVAILABLE"
},
{
"time": "14:00:00",
"available_slots": 1,
"status": "AVAILABLE"
},
{
"time": "18:00:00",
"available_slots": 1,
"status": "AVAILABLE"
},
{
"time": "20:00:00",
"available_slots": 1,
"status": "AVAILABLE"
}
]
}
]
}
]

How to merge two DSL query for aggregation and filter

I need to search BusinessArea which is Research or Accounting this is array of fields(OR) statement
I need to search Role is Developer or Tester condition this is array of fields(OR) statement
I want to get the count of masterid of BusinessArea, designationNames, Role which is all the names
Name filter is "Group1"
Below is the dictionary
test= [ { 'masterid': '1', 'name': 'Group1', 'BusinessArea': [ 'Accounting','Research'], 'Designation': [ 'L1' 'L2' ] }, { 'masterid': '2', 'name': 'Group1', 'BusinessArea': ['Research','Accounting' ], 'Role': [ { 'id': '5032', 'name': 'Tester' }, { 'id': '5033', 'name': 'Developer' } ], 'Designation': [ 'L1' 'L2' ]}, { 'masterid': '3', 'name': 'Group1', 'BusinessArea': [ 'Engineering' ], 'Role': [ { 'id': '5032', 'name': 'Developer' }, { 'id': '5033', 'name': 'Developer', 'parentname': '' } ], 'Designation': [ 'L1' 'L2' ]}]
Below is the aggregation function
{
"size": 0,
"aggs": {
"countNames": {
"terms": {
"field": "BusinessArea.keyword"
}
},
"designationNames": {
"terms": {
"field": "Designation.keyword"
}
},
"Role": {
"terms": {
"field": "Role.name.keyword"
}
}
}
}
Below is the filtering query
{
"query": {
"bool": {
"must": [
{
"terms": {
"BusinessArea.keyword": [
"Research",
"Accounting"
]
}
},
{
"terms": {
"Role.name.keyword": [
"Developer",
"Tester"
]
}
}
]
}
}
}
"filter": [
"term": {
"name.keyword": "Group1"}]
I need to merge both query and output will be having from the both
Nice start !!! Now you can simply combine all those snippets like this:
{
"size": 0,
"query": {
"bool": {
"filter": [
{
"term": {
"name.keyword": "Group1"
}
},
{
"terms": {
"BusinessArea.keyword": [
"Research",
"Accounting"
]
}
},
{
"terms": {
"Role.name.keyword": [
"Developer",
"Tester"
]
}
}
]
}
},
"aggs": {
"countNames": {
"terms": {
"field": "BusinessArea.keyword"
}
},
"designationNames": {
"terms": {
"field": "Designation.keyword"
}
},
"Role": {
"terms": {
"field": "Role.name.keyword"
}
}
}
}

Sort query by length of field in Elstic Search

I have a field in each of my documents like so
'some_field': 3, 5, 10
But each document could have a very length of numbers,
'some_field': 3, 5, 10 # Doc 1
'some_field': 5 # Doc 2
'some_field': 3, 5, 10, 20, 9 # Doc 3
Is there a way to query and sort by the length so that my results would be arranged as so:
'some_field': 3, 5, 10, 20, 9 # Doc 3
'some_field': 3, 5, 10 # Doc 1
'some_field': 5 # Doc 2
My current query, sorting by _id at the moment
es_object.search(index='index', size=500, body={
"sort": [
{"_id": "desc"}
],
"query": {
"bool": {
"must": [
{
"match_all": {}
},
{
"exists": {
"field": "some_field"
}
}
],
"filter": [],
"should": [],
"must_not": []
}
}})
You can do script sort
{
"sort": {
"_script": {
"script": "doc['some_field'].value.length()",
"type": "number",
"order": "asc"
}
},
"query": {
"bool": {
"must": [{
"match_all": {}
},
{
"exists": {
"field": "some_field"
}
}
],
"filter": [],
"should": [],
"must_not": []
}
}
}

pymongo : How can I get multiple results from filed in mongodb?

I would like to get every 'ecg_raw' in 'data' itmes that founded by 'name' with python3 pymongo.
If i know 'name' and 'time_info', How can i get 4 ecg_raw datas which time_info == '2018-09-01 00:00:03'
I want to get every ecg_raw data like [[8,2],[1,10],[9,4],[1,9]]
I tried
db.g.find({"data":{"$elemMatch":{"time_info":"2018-09-01 00:00:03"}}},{"name":1,"data":{"$elemMatch":{"time_info":"2018-09-01 00:00:03"}}})
but it returns only one value like bottom.
{'_id': ObjectId('5b90d401219e9c9f72cac8c4'), 'name': 'testDog3', 'data': [{'time_info': '2018-09-01 00:00:03', 'ecg_raw': [8, 2]}]}
Please help me.
> db.g.find().pretty()
{
"_id" : ObjectId("5b90d401219e9c9f72cac8c4"),
"name" : "testDog3",
"data" : [
{
"time_info" : "2018-09-01 00:00:03",
"ecg_raw" : [
8,
2
]
},
{
"time_info" : "2018-09-01 00:00:03",
"ecg_raw" : [
1,
10
]
},
{
"time_info" : "2018-09-01 00:00:03",
"ecg_raw" : [
9,
4
]
},
{
"time_info" : "2018-09-01 00:00:03",
"ecg_raw" : [
1,
9
]
},
{
"time_info" : "2018-09-01 00:00:04",
"ecg_raw" : [
10,
6
]
},
{
"time_info" : "2018-09-01 00:00:04",
"ecg_raw" : [
1,
6
]
}
]
}
try this.
aggregate([{$match:{'name':'testDog3'}},{ "$unwind": "$data" },{ "$match": {"data.time_info":"2018-09-01 00:00:03"}}])

How to merge two aggregation queries into one

How can I merge these 2 queries into one:
Query 1
db.Response.aggregate([
{
"$and": [
{ "job_details.owner_id" : 428 },
{ "job_details.owner_type" : 'searches' }
]
},
{
"$group": {
"_id": "$candidate_city_name_string",
"count": { "$sum": 1 }
}
}
])
Query 2
db.Response.aggregate([
{
"$and": [
{ "job_details.owner_id" : 428 },
{ "job_details.owner_type" : 'searches' }
]
},
{
"$group": {
"_id": "$skill",
"count": { "$sum": 1 }
}
}
])
The result of this query like this
output 1:
{
"result": [
{ _id: 'Bangalore', count: 8 },
{ _id: 'cochi', count: 9 }
]
"ok":1
}
output 2:
{
"result": [
{ _id: 'java', count: 7 },
{ _id: 'python', count: 10 }
],
"ok":1
}
How can I get these 2 results in one query?
I need an output like this:
Expected output:
{
"result": [
"candidate_city_name_string": [
{ _id: 'Bangalore', count: 8 },
{ _id: 'cochi', count: 9 }
],
"skill": [
{ _id: 'java', count: 7 },
{ _id: 'python', count: 10 }
]
],
"ok":1
}
Is it possible? Somewhere I saw something about $facet but I didn't understand that.
db.Response.aggregate([
{"$match":{"$and":[{"job_details.owner_id" : 482},{"job_details.owner_type" : 'searches'}]}},
{$facet: {
"candidate_sublocation_name_string": [
{"$group": {"_id":"$candidate_sublocation_name_string","count": {"$sum": 1 }}}
],
"skill": [
{"$group": {"_id":"$skill","count": {"$sum": 1 }}}
]
}}])

Categories