pymongo group multiple conditions

pymongo group multiple conditions - python

CURRENT_TZ = timezone(bp.BaseModel.__timezone__ or "Asia/Shanghai")
NOW = CURRENT_TZ.localize(datetime.utcnow())
EXPIRY_DATE = NOW + relativedelta(days=5)
res = await Fixture.aggregate(
[
{"$match": dict(eol={"$nin": [True, ""]})},
{
"$group": {
"_id": {
"$cond": [
{"$lt": ["pm_date", start_date]},
"PENDING",
{
"$gte": ["pm_date", start_date],
"$lt": ["pm_date", end_date],
},
"DONE",
{
"$gte": ["pm_due_date", start_date],
"$lte": ["pm_due_date", EXPIRY_DATE],
},
"WILL EXPIRED",
{"$lte": ["pm_due_date", NOW]},
"EXPIRED",
]
},
"count": {"$sum": 1},
}
},
]
)
from the above code, I expected output for example like
{
"_id" : "PENDING",
"qty": 50
},
{
"_id" : "DONE",
"qty": 50
},
{
"_id" : "WILL BE EXPIRE",
"qty": 40
}
{
"_id" : "EXPIRED",
"qty": 10
}
but from my console show error as following, can someone help me fix the pymongo pipeline for groping multiple conditions?
raise OperationFailure(msg % errmsg, code, response)
pymongo.errors.OperationFailure: An object representing an expression must have exactly one field: { $gte: [ "pm_date", new Date(1596240000000) ], $lt: [ "pm_date", new Date(1598918400000) ] }

Update: I got the result by using $switch (aggregation)
Refer to: https://docs.mongodb.com/manual/reference/operator/aggregation/switch/
res = await Fixture.aggregate(
[
{"$match": dict(eol={"$nin": [True, ""]})},
{
"$project": {
"pm_due_date": 1,
"status": {
"$switch": {
"branches": [
{
"case": {"$lt": ["$pm_due_date", NOW]},
"then": "EXPIRED",
},
{
"case": {
"$and": [
{
"$gte": [
"$pm_due_date",
start_date,
]
},
{
"$lte": [
"$pm_due_date",
EXPIRY_DATE,
]
},
]
},
"then": "WILL EXPIRE",
},
{
"case": {"$lt": ["$pm_date", start_date]},
"then": "PENDING",
},
{
"case": {
"$and": [
{"$gte": ["$pm_date", start_date]},
{"$lt": ["$pm_date", end_date]},
]
},
"then": "DONE",
},
],
"default": "NA",
}
},
}
},
{"$group": {"_id": "$status", "count": {"$sum": 1}}},
]
)

You should put your $cond in a $project stage instead of the $group
[
{"$match": dict(eol={"$nin": [True, ""]})},
{"$project": {
"status": {
"$cond": [
{"$lt": ["pm_date", start_date]},
"PENDING",
{"$cond": [
{
"$and": [
{"$gte": ["pm_date", start_date]},
{"$lt": ["pm_date", end_date]}
]
},
"DONE",
{"$cond": [
{
"$and": [
{"$gte": ["pm_date", start_date]},
{"$lt": ["pm_date", EXPIRY_DATE]}
]
},
"WILL EXPIRED",
"EXPIRED"
]}
]}
]}
}
},
{
"$group": {
"_id": "$status",
"count": {"$sum": 1},
}
},
]

Related

json.decoder.JSONDecodeError - while converting JSON to CSV output

While trying to convert a JSON output below to CSV, getting error
Here is the JSON output
{
"data": [
{
"id": "-1000100591151294842",
"type": "fres",
"attributes": {
"operationState": "In Service",
"deploymentState": "discovered",
"displayData": {
"operationState": "Up",
"adminState": "Enabled",
"displayTopologySource": "Protocol,Derived",
"displayPhotonicSpectrumData": [
{
"frequency": "194.950000",
"wavelength": "1537.79",
"channel": "CH-20"
}
],
"displayDeploymentState": "Discovered",
"displayName": "J-BBEG-CHLC-P109"
},
"utilizationData": {
"totalCapacity": "100.0",
"usedCapacity": "100.0",
"utilizationPercent": "100",
"capacityUnits": "Gbps"
},
"resourceState": "discovered",
"serviceClass": "OTU",
"linkLabel": "BBEG-ROADM-0101:5-4-1,CHLC-ROADM-0401:7-35-1",
"lastUpdatedAdminStateTimeStamp": "2021-05-03T00:29:24.444Z",
"lastUpdatedOperationalStateTimeStamp": "2022-12-08T22:42:21.567Z",
"userLabel": "J-BBEG-CHLC-P109",
"mgmtName": "",
"nativeName": "",
"awarenessTime": "2022-12-08T22:42:22.123Z",
"layerRate": "OTU4",
"layerRateQualifier": "OTU4",
"supportedByLayerRatePackageList": [
{
"layerRate": "OTSi",
"layerRateQualifier": "100G"
}
],
"networkRole": "FREAP",
"directionality": "bidirectional",
"topologySources": [
"adjacency",
"stitched"
],
"adminState": "In Service",
"photonicSpectrumPackageList": [
{
"frequency": "194.950000",
"width": "37.5"
}
],
"active": true,
"additionalAttributes": {
"isActual": "true",
"hasLowerTopology": "true"
},
"reliability": "auto",
"resilienceLevel": "unprotected"
},
"relationships": {
"freDiscovered": {
"data": {
"type": "freDiscovered",
"id": "-1000100591151294842"
}
},
"supportedByServices": {
"data": [
{
"type": "fres",
"id": "6765278351459212874"
}
]
},
"endPoints": {
"data": [
{
"type": "endPoints",
"id": "-1000100591151294842:1"
},
{
"type": "endPoints",
"id": "-1000100591151294842:2"
}
]
},
"partitionFres": {
"data": [
{
"type": "fres",
"id": "7147507956181395827"
}
]
}
}
},
{
"id": "-1013895107051577774",
"type": "fres",
"attributes": {
"operationState": "In Service",
"deploymentState": "discovered",
"displayData": {
"operationState": "Up",
"adminState": "Enabled",
"displayTopologySource": "Protocol,Derived",
"displayPhotonicSpectrumData": [
{
"frequency": "191.600000",
"wavelength": "1564.68",
"channel": "CH-87"
}
],
"displayDeploymentState": "Discovered",
"displayName": "J-KFF9-PNTH-P101"
},
"utilizationData": {
"totalCapacity": "100.0",
"usedCapacity": "90.0",
"utilizationPercent": "90",
"capacityUnits": "Gbps"
},
"resourceState": "discovered",
"serviceClass": "OTU",
"tags": [
"J-KFF9-PNTH-P101"
],
"linkLabel": "KFF9-ROADM-0301:1-1-1,PNTH-ROADM-0101:1-1-1",
"lastUpdatedAdminStateTimeStamp": "2021-09-12T20:22:59.334Z",
"lastUpdatedOperationalStateTimeStamp": "2022-10-12T14:20:44.779Z",
"userLabel": "J-KFF9-PNTH-P101",
"mgmtName": "",
"nativeName": "",
"awarenessTime": "2022-10-12T14:20:45.417Z",
"layerRate": "OTU4",
"layerRateQualifier": "OTU4",
"supportedByLayerRatePackageList": [
{
"layerRate": "OTSi",
"layerRateQualifier": "100G"
}
],
"networkRole": "FREAP",
"directionality": "bidirectional",
"topologySources": [
"adjacency",
"stitched"
],
"adminState": "In Service",
"photonicSpectrumPackageList": [
{
"frequency": "191.600000",
"width": "37.5"
}
],
"active": true,
"additionalAttributes": {
"isActual": "true",
"hasLowerTopology": "true"
},
"reliability": "auto",
"resilienceLevel": "unprotected"
},
"relationships": {
"freDiscovered": {
"data": {
"type": "freDiscovered",
"id": "-1013895107051577774"
}
},
"supportedByServices": {
"data": [
{
"type": "fres",
"id": "6055685088078365419"
}
]
},
"endPoints": {
"data": [
{
"type": "endPoints",
"id": "-1013895107051577774:1"
},
{
"type": "endPoints",
"id": "-1013895107051577774:2"
}
]
},
"partitionFres": {
"data": [
{
"type": "fres",
"id": "-6727082893715936342"
}
]
}
}
}
] }
getting below error, not sure what is missing
Here is the python script I used. have been trying different variations but no luck getting different errors in all other instances
filename = Path('fre.json')
data = []
with open(filename,'r') as json_file:
data_str = json_file.read()
data_str = data_str.split('[',1)[-1]
data_str = data_str.rsplit(']',1)[0]
data_str = data_str.split('][')
for jsonStr in data_str:
jsonStr = '[' + jsonStr + ']'
temp_data = json.loads(jsonStr)
for each in temp_data:
data.append(each)
what is wrong?

update nested json object in python

I have a json file name input which as follows
{
"abc": {
"dbc": {
"type": "string",
"metadata": {
"description": "Name of the namespace"
}
},
"fgh": {
"type": "string",
"metadata": {
"description": "Name of the Topic"
}
}
},
"resources": [
{
"sku": {
"name": "[parameters('sku')]"
},
"properties": {},
"resources": [
{
"resources": [
{
"resources": [
{
"properties": {
"filterType": "SqlFilter",
"sqlFilter": {
"sqlExpression": "HAI"
}
}
}
]
}
]
}
]
}
]
}
I want "sqlExpression": "HAI" value to be replaced with BYE as below
"sqlExpression": "BYE"
I want python code to do it, I tried the below code but not working
input['resources'][0]['resources'][0]['resources'][0]['resources'][0][properties][0][sqlFilter][0][sqlExpression][0]='BYE'

inp = {
"abc": {
"dbc": {
"type": "string",
"metadata": {
"description": "Name of the namespace"
}
},
"fgh": {
"type": "string",
"metadata": {
"description": "Name of the Topic"
}
}
},
"resources": [
{
"sku": {
"name": "[parameters('sku')]"
},
"properties": {},
"resources": [
{
"resources": [
{
"resources": [
{
"properties": {
"filterType": "SqlFilter",
"sqlFilter": {
"sqlExpression": "HAI"
}
}
}
]
}
]
}
]
}
]
}
inp['resources'][0]['resources'][0]['resources'][0]['resources'][0]['properties']['sqlFilter']['sqlExpression']='BYE'
print(inp)
Result
{'abc': {'dbc': ...truncated... {'sqlExpression': 'BYE'}}}]}]}]}]}

Elasticsearch Cosine similarity exception

I am using Elasticsearch 7.15.2
I have a dense vector field of size 1024 that is saved in field vector. My query goes like this,
{
"size": 100,
"min_score": 0.75,
"query": {
"script_score": {
"query": {
"bool": {
"must": [],
"must_not": [
{
"terms": {
"id": [
"12"
]
}
}
],
"filter": [
]
}
},
"script": {
"source": "doc['vector'].size() == 0 ? 0 : cosineSimilarity(params.query_vector, 'vector')",
"params": {
"query_vector": [
10.798686228610265,
4.186900536065757,
0.19701037630829776,
0.20834632696963679,
......
......
]
}
}
}
}
}
when I execute this I am getting,
{
"error": {
"root_cause": [
{
"type": "script_exception",
"reason": "runtime error",
"script_stack": [
"org.elasticsearch.xpack.vectors.query.ScoreScriptUtils$DenseVectorFunction.<init>(ScoreScriptUtils.java:65)",
"org.elasticsearch.xpack.vectors.query.ScoreScriptUtils$CosineSimilarity.<init>(ScoreScriptUtils.java:172)",
"doc['vector'].size() == 0 ? 0 : cosineSimilarity(params.query_vector, 'vector')",
" ^---- HERE"
],
"script": "doc['vector'].size() == 0 ? 0 : cosineSimilarity(params.query_vector, 'vector')",
"lang": "painless",
"position": {
"offset": 70,
"start": 0,
"end": 79
}
}
],
"type": "search_phase_execution_exception",
"reason": "all shards failed",
"phase": "query",
"grouped": true,
"failed_shards": [
{
"shard": 0,
"index": "candidate_saas",
"node": "itz4QoZERGCHYk65uiTUBg",
"reason": {
"type": "script_exception",
"reason": "runtime error",
"script_stack": [
"org.elasticsearch.xpack.vectors.query.ScoreScriptUtils$DenseVectorFunction.<init>(ScoreScriptUtils.java:65)",
"org.elasticsearch.xpack.vectors.query.ScoreScriptUtils$CosineSimilarity.<init>(ScoreScriptUtils.java:172)",
"doc['vector'].size() == 0 ? 0 : cosineSimilarity(params.query_vector, 'vector')",
" ^---- HERE"
],
"script": "doc['vector'].size() == 0 ? 0 : cosineSimilarity(params.query_vector, 'vector')",
"lang": "painless",
"position": {
"offset": 70,
"start": 0,
"end": 79
},
"caused_by": {
"type": "class_cast_exception",
"reason": "class org.elasticsearch.index.fielddata.ScriptDocValues$Doubles cannot be cast to class org.elasticsearch.xpack.vectors.query.VectorScriptDocValues$DenseVectorScriptDocValues (org.elasticsearch.index.fielddata.ScriptDocValues$Doubles is in unnamed module of loader 'app'; org.elasticsearch.xpack.vectors.query.VectorScriptDocValues$DenseVectorScriptDocValues is in unnamed module of loader java.net.FactoryURLClassLoader #af9a89f)"
}
}
}
]
},
"status": 400
}
Please help

Elasticsearch bool query sort by date if status is true

I have a JSON file like as follows in an Elasticsearch index. I need to sort data if the advertisement does not expire and status is true, and then sort them as desc. How can I achieve this?
I tried using end_date sort, but it did not work. Also I need to show all expired data which end_date are expired.
advertisement = [
{
"id": 1,
"name": "test",
"status": True,
"start_date": "2020-08-09",
"end_date": "2020-09-09",
},
{
"id": 2,
"name": "test2",
"status": False,
"start_date": "2020-08-09",
"end_date": "2020-08-09",
}]
This is my elastic search method.
def elastic_search(category=None):
client = Elasticsearch(host="localhost", port=9200)
query_all = {
'size': 10000,
'query': {
"bool": {
"filter": [
{
"match": {
"name": "test"
}
}]
},
},
"sort": [
{
"end_date": {
"type": "date",
"order": 'desc'
}
}
]
}
resp = client.search(
index="my-index",
body=query_all
)
return resp
This is my es response
http://localhost:9200/my-index/_search
{
"took":96,
"timed_out":false,
"_shards":{
"total":5,
"successful":5,
"skipped":0,
"failed":0
},
"hits":{
"total":36,
"max_score":1.0,
"hits":[
{
"_index":"my-index",
"_type":"doc",
"_id":"52",
"_score":1.0,
"_source":{
"id": 1,
"name": "test",
"status": True,
"start_date": "2020-08-09",
"end_date": "2020-09-09",
}
},
{
"_index":"my-index",
"_type":"doc",
"_id":"60",
"_score":1.0,
"_source":{
"id": 1,
"name": "English test",
"status": True,
"start_date": "2020-08-09",
"end_date": "2020-09-09",
}
},
{
"_index":"my-index",
"_type":"doc",
"_id":"40",
"_score":1.0,
"_source":{
"id": 1,
"name": "Designw test",
"status": false,
"start_date": "2020-08-09",
"end_date": "2020-09-09",
}
},
{
"_index":"my-index",
"_type":"doc",
"_id":"41",
"_score":1.0,
"_source":{
"id": 1,
"name": "Designw New",
"status": false,
"start_date": "2020-08-09",
"end_date": "2020-09-09",
}
},
{
"_index":"my-index",
"_type":"doc",
"_id":"59",
"_score":1.0,
"_source":{
"id": 1,
"name": "Designw New",
"status": false,
"start_date": "2020-08-09",
"end_date": "2020-09-09",
}
},
{
"_index":"my-index",
"_type":"doc",
"_id":"62",
"_score":1.0,
"_source":{
"id": 1,
"name": "Designw New",
"status": false,
"start_date": "2020-08-09",
"end_date": "2020-09-09",
}
}
]
}
}
This is my mapping http://localhost:9200/my-index/_mapping response.
"my-index":{
"mappings":{
"_doc":{
"properties":{
"address":{
"properties":{
"name":{
"type":"text",
"fields":{
"keyword":{
"type":"keyword",
"ignore_above":256
}
}
},
"start_date":{
"type":"text",
"fields":{
"keyword":{
"type":"keyword",
"ignore_above":256
}
}
},
"end_date":{
"type":"text",
"fields":{
"keyword":{
"type":"keyword",
"ignore_above":256
}
}
},
"id":{
"type":"long"
},
"status":{
"type":"text",
"fields":{
"keyword":{
"type":"keyword",
"ignore_above":256
}
}
}
}
}
}
}
}
}
}

Two things regarding the mapping:
There's an address field there that's not found in your actual documents. Remove it.
Your dates should be mapped correctly using the date datatype.
A correct mapping would look like this:
{
"properties":{
"end_date":{
"type":"date",
"format":"yyyy-MM-dd"
},
"start_date":{
"type":"date",
"format":"yyyy-MM-dd"
},
//...other properties
}
}
Once you get the mapping right, this query looks for all non-expired ads w/ a true status and sorts by the longest running:
{
"query": {
"bool": {
"must": [
{
"range": {
"end_date": {
"gt": "now"
}
}
},
{
"term": {
"status": {
"value": true
}
}
}
]
}
},
"sort": [
{
"end_date": {
"order": "desc"
}
}
]
}
Alternatively, if you're looking for the expired ones, change gt to lt which stands for less-than.

Unhashable type 'dict' when trying to send an Elasticsearch

I keep on getting the following error in Python
Exception has occurred: TypeError unhashable type: 'dict'
on line 92
"should": [],
"must_not": []
This is the query string
res = es.search(
scroll = '2m',
index = "logstash-*",
body = {
{
"aggs": {
"2": {
"terms": {
"field": "src_ip.keyword",
"size": 50,
"order": {
"1": "desc"
}
},
"aggs": {
"1": {
"cardinality": {
"field": "src_ip.keyword"
}
}
}
}
},
"size": 0,
"_source": {
"excludes": []
},
"stored_fields": [
"*"
],
"script_fields": {},
"docvalue_fields": [
{
"field": "#timestamp",
"format": "date_time"
},
{
"field": "flow.start",
"format": "date_time"
},
{
"field": "timestamp",
"format": "date_time"
},
{
"field": "tls.notafter",
"format": "date_time"
},
{
"field": "tls.notbefore",
"format": "date_time"
}
],
"query": {
"bool": {
"must": [
{
"range": {
"#timestamp": {
"gte": 1555777931992,
"lte": 1558369931992,
"format": "epoch_millis"
}
}
}
],
"filter": [
{
"match_all": {}
}
],
"should": [],
"must_not": []
}
}
}
})

the value of body is a set ({ } without key-value is a set literal, e.g., {1,2} is a set). Inside this set you have a dictionary.
Items in a set have to be hashable, and dictionary isn't.
As the comment from #Carcigenicate says, it seems like a typo of having {{ }} instead of { } for the value of body.
Elasticsearch documentation shows that body should be a dictionary.
More about sets from python docs

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

pymongo group multiple conditions - python

Related

json.decoder.JSONDecodeError - while converting JSON to CSV output

update nested json object in python

Elasticsearch Cosine similarity exception

Elasticsearch bool query sort by date if status is true

Unhashable type 'dict' when trying to send an Elasticsearch

Categories

Resources