pymongo group multiple conditions - python

CURRENT_TZ = timezone(bp.BaseModel.__timezone__ or "Asia/Shanghai")
NOW = CURRENT_TZ.localize(datetime.utcnow())
EXPIRY_DATE = NOW + relativedelta(days=5)
res = await Fixture.aggregate(
[
{"$match": dict(eol={"$nin": [True, ""]})},
{
"$group": {
"_id": {
"$cond": [
{"$lt": ["pm_date", start_date]},
"PENDING",
{
"$gte": ["pm_date", start_date],
"$lt": ["pm_date", end_date],
},
"DONE",
{
"$gte": ["pm_due_date", start_date],
"$lte": ["pm_due_date", EXPIRY_DATE],
},
"WILL EXPIRED",
{"$lte": ["pm_due_date", NOW]},
"EXPIRED",
]
},
"count": {"$sum": 1},
}
},
]
)
from the above code, I expected output for example like
{
"_id" : "PENDING",
"qty": 50
},
{
"_id" : "DONE",
"qty": 50
},
{
"_id" : "WILL BE EXPIRE",
"qty": 40
}
{
"_id" : "EXPIRED",
"qty": 10
}
but from my console show error as following, can someone help me fix the pymongo pipeline for groping multiple conditions?
raise OperationFailure(msg % errmsg, code, response)
pymongo.errors.OperationFailure: An object representing an expression must have exactly one field: { $gte: [ "pm_date", new Date(1596240000000) ], $lt: [ "pm_date", new Date(1598918400000) ] }

Update: I got the result by using $switch (aggregation)
Refer to: https://docs.mongodb.com/manual/reference/operator/aggregation/switch/
res = await Fixture.aggregate(
[
{"$match": dict(eol={"$nin": [True, ""]})},
{
"$project": {
"pm_due_date": 1,
"status": {
"$switch": {
"branches": [
{
"case": {"$lt": ["$pm_due_date", NOW]},
"then": "EXPIRED",
},
{
"case": {
"$and": [
{
"$gte": [
"$pm_due_date",
start_date,
]
},
{
"$lte": [
"$pm_due_date",
EXPIRY_DATE,
]
},
]
},
"then": "WILL EXPIRE",
},
{
"case": {"$lt": ["$pm_date", start_date]},
"then": "PENDING",
},
{
"case": {
"$and": [
{"$gte": ["$pm_date", start_date]},
{"$lt": ["$pm_date", end_date]},
]
},
"then": "DONE",
},
],
"default": "NA",
}
},
}
},
{"$group": {"_id": "$status", "count": {"$sum": 1}}},
]
)

You should put your $cond in a $project stage instead of the $group
[
{"$match": dict(eol={"$nin": [True, ""]})},
{"$project": {
"status": {
"$cond": [
{"$lt": ["pm_date", start_date]},
"PENDING",
{"$cond": [
{
"$and": [
{"$gte": ["pm_date", start_date]},
{"$lt": ["pm_date", end_date]}
]
},
"DONE",
{"$cond": [
{
"$and": [
{"$gte": ["pm_date", start_date]},
{"$lt": ["pm_date", EXPIRY_DATE]}
]
},
"WILL EXPIRED",
"EXPIRED"
]}
]}
]}
}
},
{
"$group": {
"_id": "$status",
"count": {"$sum": 1},
}
},
]

Related

json.decoder.JSONDecodeError - while converting JSON to CSV output

While trying to convert a JSON output below to CSV, getting error
Here is the JSON output
{
"data": [
{
"id": "-1000100591151294842",
"type": "fres",
"attributes": {
"operationState": "In Service",
"deploymentState": "discovered",
"displayData": {
"operationState": "Up",
"adminState": "Enabled",
"displayTopologySource": "Protocol,Derived",
"displayPhotonicSpectrumData": [
{
"frequency": "194.950000",
"wavelength": "1537.79",
"channel": "CH-20"
}
],
"displayDeploymentState": "Discovered",
"displayName": "J-BBEG-CHLC-P109"
},
"utilizationData": {
"totalCapacity": "100.0",
"usedCapacity": "100.0",
"utilizationPercent": "100",
"capacityUnits": "Gbps"
},
"resourceState": "discovered",
"serviceClass": "OTU",
"linkLabel": "BBEG-ROADM-0101:5-4-1,CHLC-ROADM-0401:7-35-1",
"lastUpdatedAdminStateTimeStamp": "2021-05-03T00:29:24.444Z",
"lastUpdatedOperationalStateTimeStamp": "2022-12-08T22:42:21.567Z",
"userLabel": "J-BBEG-CHLC-P109",
"mgmtName": "",
"nativeName": "",
"awarenessTime": "2022-12-08T22:42:22.123Z",
"layerRate": "OTU4",
"layerRateQualifier": "OTU4",
"supportedByLayerRatePackageList": [
{
"layerRate": "OTSi",
"layerRateQualifier": "100G"
}
],
"networkRole": "FREAP",
"directionality": "bidirectional",
"topologySources": [
"adjacency",
"stitched"
],
"adminState": "In Service",
"photonicSpectrumPackageList": [
{
"frequency": "194.950000",
"width": "37.5"
}
],
"active": true,
"additionalAttributes": {
"isActual": "true",
"hasLowerTopology": "true"
},
"reliability": "auto",
"resilienceLevel": "unprotected"
},
"relationships": {
"freDiscovered": {
"data": {
"type": "freDiscovered",
"id": "-1000100591151294842"
}
},
"supportedByServices": {
"data": [
{
"type": "fres",
"id": "6765278351459212874"
}
]
},
"endPoints": {
"data": [
{
"type": "endPoints",
"id": "-1000100591151294842:1"
},
{
"type": "endPoints",
"id": "-1000100591151294842:2"
}
]
},
"partitionFres": {
"data": [
{
"type": "fres",
"id": "7147507956181395827"
}
]
}
}
},
{
"id": "-1013895107051577774",
"type": "fres",
"attributes": {
"operationState": "In Service",
"deploymentState": "discovered",
"displayData": {
"operationState": "Up",
"adminState": "Enabled",
"displayTopologySource": "Protocol,Derived",
"displayPhotonicSpectrumData": [
{
"frequency": "191.600000",
"wavelength": "1564.68",
"channel": "CH-87"
}
],
"displayDeploymentState": "Discovered",
"displayName": "J-KFF9-PNTH-P101"
},
"utilizationData": {
"totalCapacity": "100.0",
"usedCapacity": "90.0",
"utilizationPercent": "90",
"capacityUnits": "Gbps"
},
"resourceState": "discovered",
"serviceClass": "OTU",
"tags": [
"J-KFF9-PNTH-P101"
],
"linkLabel": "KFF9-ROADM-0301:1-1-1,PNTH-ROADM-0101:1-1-1",
"lastUpdatedAdminStateTimeStamp": "2021-09-12T20:22:59.334Z",
"lastUpdatedOperationalStateTimeStamp": "2022-10-12T14:20:44.779Z",
"userLabel": "J-KFF9-PNTH-P101",
"mgmtName": "",
"nativeName": "",
"awarenessTime": "2022-10-12T14:20:45.417Z",
"layerRate": "OTU4",
"layerRateQualifier": "OTU4",
"supportedByLayerRatePackageList": [
{
"layerRate": "OTSi",
"layerRateQualifier": "100G"
}
],
"networkRole": "FREAP",
"directionality": "bidirectional",
"topologySources": [
"adjacency",
"stitched"
],
"adminState": "In Service",
"photonicSpectrumPackageList": [
{
"frequency": "191.600000",
"width": "37.5"
}
],
"active": true,
"additionalAttributes": {
"isActual": "true",
"hasLowerTopology": "true"
},
"reliability": "auto",
"resilienceLevel": "unprotected"
},
"relationships": {
"freDiscovered": {
"data": {
"type": "freDiscovered",
"id": "-1013895107051577774"
}
},
"supportedByServices": {
"data": [
{
"type": "fres",
"id": "6055685088078365419"
}
]
},
"endPoints": {
"data": [
{
"type": "endPoints",
"id": "-1013895107051577774:1"
},
{
"type": "endPoints",
"id": "-1013895107051577774:2"
}
]
},
"partitionFres": {
"data": [
{
"type": "fres",
"id": "-6727082893715936342"
}
]
}
}
}
] }
getting below error, not sure what is missing
Here is the python script I used. have been trying different variations but no luck getting different errors in all other instances
filename = Path('fre.json')
data = []
with open(filename,'r') as json_file:
data_str = json_file.read()
data_str = data_str.split('[',1)[-1]
data_str = data_str.rsplit(']',1)[0]
data_str = data_str.split('][')
for jsonStr in data_str:
jsonStr = '[' + jsonStr + ']'
temp_data = json.loads(jsonStr)
for each in temp_data:
data.append(each)
what is wrong?

update nested json object in python

I have a json file name input which as follows
{
"abc": {
"dbc": {
"type": "string",
"metadata": {
"description": "Name of the namespace"
}
},
"fgh": {
"type": "string",
"metadata": {
"description": "Name of the Topic"
}
}
},
"resources": [
{
"sku": {
"name": "[parameters('sku')]"
},
"properties": {},
"resources": [
{
"resources": [
{
"resources": [
{
"properties": {
"filterType": "SqlFilter",
"sqlFilter": {
"sqlExpression": "HAI"
}
}
}
]
}
]
}
]
}
]
}
I want "sqlExpression": "HAI" value to be replaced with BYE as below
"sqlExpression": "BYE"
I want python code to do it, I tried the below code but not working
input['resources'][0]['resources'][0]['resources'][0]['resources'][0][properties][0][sqlFilter][0][sqlExpression][0]='BYE'
inp = {
"abc": {
"dbc": {
"type": "string",
"metadata": {
"description": "Name of the namespace"
}
},
"fgh": {
"type": "string",
"metadata": {
"description": "Name of the Topic"
}
}
},
"resources": [
{
"sku": {
"name": "[parameters('sku')]"
},
"properties": {},
"resources": [
{
"resources": [
{
"resources": [
{
"properties": {
"filterType": "SqlFilter",
"sqlFilter": {
"sqlExpression": "HAI"
}
}
}
]
}
]
}
]
}
]
}
inp['resources'][0]['resources'][0]['resources'][0]['resources'][0]['properties']['sqlFilter']['sqlExpression']='BYE'
print(inp)
Result
{'abc': {'dbc': ...truncated... {'sqlExpression': 'BYE'}}}]}]}]}]}

Elasticsearch Cosine similarity exception

I am using Elasticsearch 7.15.2
I have a dense vector field of size 1024 that is saved in field vector. My query goes like this,
{
"size": 100,
"min_score": 0.75,
"query": {
"script_score": {
"query": {
"bool": {
"must": [],
"must_not": [
{
"terms": {
"id": [
"12"
]
}
}
],
"filter": [
]
}
},
"script": {
"source": "doc['vector'].size() == 0 ? 0 : cosineSimilarity(params.query_vector, 'vector')",
"params": {
"query_vector": [
10.798686228610265,
4.186900536065757,
0.19701037630829776,
0.20834632696963679,
......
......
]
}
}
}
}
}
when I execute this I am getting,
{
"error": {
"root_cause": [
{
"type": "script_exception",
"reason": "runtime error",
"script_stack": [
"org.elasticsearch.xpack.vectors.query.ScoreScriptUtils$DenseVectorFunction.<init>(ScoreScriptUtils.java:65)",
"org.elasticsearch.xpack.vectors.query.ScoreScriptUtils$CosineSimilarity.<init>(ScoreScriptUtils.java:172)",
"doc['vector'].size() == 0 ? 0 : cosineSimilarity(params.query_vector, 'vector')",
" ^---- HERE"
],
"script": "doc['vector'].size() == 0 ? 0 : cosineSimilarity(params.query_vector, 'vector')",
"lang": "painless",
"position": {
"offset": 70,
"start": 0,
"end": 79
}
}
],
"type": "search_phase_execution_exception",
"reason": "all shards failed",
"phase": "query",
"grouped": true,
"failed_shards": [
{
"shard": 0,
"index": "candidate_saas",
"node": "itz4QoZERGCHYk65uiTUBg",
"reason": {
"type": "script_exception",
"reason": "runtime error",
"script_stack": [
"org.elasticsearch.xpack.vectors.query.ScoreScriptUtils$DenseVectorFunction.<init>(ScoreScriptUtils.java:65)",
"org.elasticsearch.xpack.vectors.query.ScoreScriptUtils$CosineSimilarity.<init>(ScoreScriptUtils.java:172)",
"doc['vector'].size() == 0 ? 0 : cosineSimilarity(params.query_vector, 'vector')",
" ^---- HERE"
],
"script": "doc['vector'].size() == 0 ? 0 : cosineSimilarity(params.query_vector, 'vector')",
"lang": "painless",
"position": {
"offset": 70,
"start": 0,
"end": 79
},
"caused_by": {
"type": "class_cast_exception",
"reason": "class org.elasticsearch.index.fielddata.ScriptDocValues$Doubles cannot be cast to class org.elasticsearch.xpack.vectors.query.VectorScriptDocValues$DenseVectorScriptDocValues (org.elasticsearch.index.fielddata.ScriptDocValues$Doubles is in unnamed module of loader 'app'; org.elasticsearch.xpack.vectors.query.VectorScriptDocValues$DenseVectorScriptDocValues is in unnamed module of loader java.net.FactoryURLClassLoader #af9a89f)"
}
}
}
]
},
"status": 400
}
Please help

Elasticsearch bool query sort by date if status is true

I have a JSON file like as follows in an Elasticsearch index. I need to sort data if the advertisement does not expire and status is true, and then sort them as desc. How can I achieve this?
I tried using end_date sort, but it did not work. Also I need to show all expired data which end_date are expired.
advertisement = [
{
"id": 1,
"name": "test",
"status": True,
"start_date": "2020-08-09",
"end_date": "2020-09-09",
},
{
"id": 2,
"name": "test2",
"status": False,
"start_date": "2020-08-09",
"end_date": "2020-08-09",
}]
This is my elastic search method.
def elastic_search(category=None):
client = Elasticsearch(host="localhost", port=9200)
query_all = {
'size': 10000,
'query': {
"bool": {
"filter": [
{
"match": {
"name": "test"
}
}]
},
},
"sort": [
{
"end_date": {
"type": "date",
"order": 'desc'
}
}
]
}
resp = client.search(
index="my-index",
body=query_all
)
return resp
This is my es response
http://localhost:9200/my-index/_search
{
"took":96,
"timed_out":false,
"_shards":{
"total":5,
"successful":5,
"skipped":0,
"failed":0
},
"hits":{
"total":36,
"max_score":1.0,
"hits":[
{
"_index":"my-index",
"_type":"doc",
"_id":"52",
"_score":1.0,
"_source":{
"id": 1,
"name": "test",
"status": True,
"start_date": "2020-08-09",
"end_date": "2020-09-09",
}
},
{
"_index":"my-index",
"_type":"doc",
"_id":"60",
"_score":1.0,
"_source":{
"id": 1,
"name": "English test",
"status": True,
"start_date": "2020-08-09",
"end_date": "2020-09-09",
}
},
{
"_index":"my-index",
"_type":"doc",
"_id":"40",
"_score":1.0,
"_source":{
"id": 1,
"name": "Designw test",
"status": false,
"start_date": "2020-08-09",
"end_date": "2020-09-09",
}
},
{
"_index":"my-index",
"_type":"doc",
"_id":"41",
"_score":1.0,
"_source":{
"id": 1,
"name": "Designw New",
"status": false,
"start_date": "2020-08-09",
"end_date": "2020-09-09",
}
},
{
"_index":"my-index",
"_type":"doc",
"_id":"59",
"_score":1.0,
"_source":{
"id": 1,
"name": "Designw New",
"status": false,
"start_date": "2020-08-09",
"end_date": "2020-09-09",
}
},
{
"_index":"my-index",
"_type":"doc",
"_id":"62",
"_score":1.0,
"_source":{
"id": 1,
"name": "Designw New",
"status": false,
"start_date": "2020-08-09",
"end_date": "2020-09-09",
}
}
]
}
}
This is my mapping http://localhost:9200/my-index/_mapping response.
"my-index":{
"mappings":{
"_doc":{
"properties":{
"address":{
"properties":{
"name":{
"type":"text",
"fields":{
"keyword":{
"type":"keyword",
"ignore_above":256
}
}
},
"start_date":{
"type":"text",
"fields":{
"keyword":{
"type":"keyword",
"ignore_above":256
}
}
},
"end_date":{
"type":"text",
"fields":{
"keyword":{
"type":"keyword",
"ignore_above":256
}
}
},
"id":{
"type":"long"
},
"status":{
"type":"text",
"fields":{
"keyword":{
"type":"keyword",
"ignore_above":256
}
}
}
}
}
}
}
}
}
}
Two things regarding the mapping:
There's an address field there that's not found in your actual documents. Remove it.
Your dates should be mapped correctly using the date datatype.
A correct mapping would look like this:
{
"properties":{
"end_date":{
"type":"date",
"format":"yyyy-MM-dd"
},
"start_date":{
"type":"date",
"format":"yyyy-MM-dd"
},
//...other properties
}
}
Once you get the mapping right, this query looks for all non-expired ads w/ a true status and sorts by the longest running:
{
"query": {
"bool": {
"must": [
{
"range": {
"end_date": {
"gt": "now"
}
}
},
{
"term": {
"status": {
"value": true
}
}
}
]
}
},
"sort": [
{
"end_date": {
"order": "desc"
}
}
]
}
Alternatively, if you're looking for the expired ones, change gt to lt which stands for less-than.

Unhashable type 'dict' when trying to send an Elasticsearch

I keep on getting the following error in Python
Exception has occurred: TypeError unhashable type: 'dict'
on line 92
"should": [],
"must_not": []
This is the query string
res = es.search(
scroll = '2m',
index = "logstash-*",
body = {
{
"aggs": {
"2": {
"terms": {
"field": "src_ip.keyword",
"size": 50,
"order": {
"1": "desc"
}
},
"aggs": {
"1": {
"cardinality": {
"field": "src_ip.keyword"
}
}
}
}
},
"size": 0,
"_source": {
"excludes": []
},
"stored_fields": [
"*"
],
"script_fields": {},
"docvalue_fields": [
{
"field": "#timestamp",
"format": "date_time"
},
{
"field": "flow.start",
"format": "date_time"
},
{
"field": "timestamp",
"format": "date_time"
},
{
"field": "tls.notafter",
"format": "date_time"
},
{
"field": "tls.notbefore",
"format": "date_time"
}
],
"query": {
"bool": {
"must": [
{
"range": {
"#timestamp": {
"gte": 1555777931992,
"lte": 1558369931992,
"format": "epoch_millis"
}
}
}
],
"filter": [
{
"match_all": {}
}
],
"should": [],
"must_not": []
}
}
}
})
the value of body is a set ({ } without key-value is a set literal, e.g., {1,2} is a set). Inside this set you have a dictionary.
Items in a set have to be hashable, and dictionary isn't.
As the comment from #Carcigenicate says, it seems like a typo of having {{ }} instead of { } for the value of body.
Elasticsearch documentation shows that body should be a dictionary.
More about sets from python docs

Categories