Add #timestamp field in ElasticSearch with Python - python

I'm using Python to add entries in a local ElasticSearch (localhost:9200)
Currently, I use this method:
def insertintoes(data):
"""
Insert data into ElasicSearch
:param data: dict
:return:
"""
timestamp = data.get('#timestamp')
logstashIndex = 'logstash-' + timestamp.strftime("%Y.%m.%d")
es = Elasticsearch()
if not es.indices.exists(logstashIndex):
# Setting mappings for index
mapping = '''
{
"mappings": {
"_default_": {
"_all": {
"enabled": true,
"norms": false
},
"dynamic_templates": [
{
"message_field": {
"path_match": "message",
"match_mapping_type": "string",
"mapping": {
"norms": false,
"type": "text"
}
}
},
{
"string_fields": {
"match": "*",
"match_mapping_type": "string",
"mapping": {
"fields": {
"keyword": {
"type": "keyword"
}
},
"norms": false,
"type": "text"
}
}
}
],
"properties": {
"#timestamp": {
"type": "date",
"include_in_all": true
},
"#version": {
"type": "keyword",
"include_in_all": true
}
}
}
}
}
'''
es.indices.create(logstashIndex, ignore=400, body=mapping)
es.index(index=logstashIndex, doc_type='system', timestamp=timestamp, body=data)
data is a dict structure with a valid #timestamp defined like this data['#timestamp'] = datetime.datetime.now()
The problem is, even if there is a timestamp value in my data, Kibana doesn't show the entry in «discovery» field. :(
Here is an example of a full entry in ElasicSearch:
{
"_index": "logstash-2017.06.25",
"_type": "system",
"_id": "AVzf3QX3iazKBndbIkg4",
"_score": 1,
"_source": {
"priority": 6,
"uid": 0,
"gid": 0,
"systemd_slice": "system.slice",
"cap_effective": "1fffffffff",
"exe": "/usr/bin/bash",
"hostname": "ns3003395",
"syslog_facility": 9,
"comm": "crond",
"systemd_cgroup": "/system.slice/cronie.service",
"systemd_unit": "cronie.service",
"syslog_identifier": "CROND",
"message": "(root) CMD (/usr/local/rtm/bin/rtm 14 > /dev/null 2> /dev/null)",
"systemd_invocation_id": "9228b6c72e6a4624a1806e4c59af8d04",
"syslog_pid": 26652,
"pid": 26652,
"#timestamp": "2017-06-25T17:27:01.734453"
}
}
As you can see, there IS a #timestamp field but it doesn't seems to be what Kibana expects.
And don't know what to do to make my entries visible in Kibana.
Any idea ?

Elasticsearch is not recognizing #timestamp as a date, but as a string. If your data['#timestamp'] is a datetime object, you can try to convert it to a ISO string, which is automatically recognized, try:
timestamp = data.get('#timestamp').isoformat()
timestamp should now be a string, but in ISO format

Related

replace nested document array mongodb with python

i have this document in mongodb
{
"_id": {
"$oid": "62644af0368cb0a46d7c2a95"
},
"insertionData": "23/04/2022 19:50:50",
"ipfsMetadata": {
"Name": "data.json",
"Hash": "Qmb3FWgyJHzJA7WCBX1phgkV93GiEQ9UDWUYffDqUCbe7E",
"Size": "431"
},
"metadata": {
"sessionDate": "20220415 17:42:55",
"dataSender": "user345",
"data": {
"height": "180",
"weight": "80"
},
"addtionalInformation": [
{
"name": "poolsize",
"value": "30m"
},
{
"name": "swimStyle",
"value": "mariposa"
},
{
"name": "modality",
"value": "swim"
},
{
"name": "gender-title",
"value": "schoolA"
}
]
},
"fileId": {
"$numberLong": "4"
}
}
I want to update nested array document, for instance the name with gender-tittle. This have value schoolA and i want to change to adult like the body. I give the parameter number of fileId in the post request and in body i pass this
post request : localhost/sessionUpdate/4
and body:
{
"name": "gender-title",
"value": "adultos"
}
flask
#app.route('/sessionUpdate/<string:a>', methods=['PUT'])
def sessionUpdate(a):
datas=request.json
r=str(datas['name'])
r2=str(datas['value'])
print(r,r2)
r3=collection.update_one({'fileId':a, 'metadata.addtionalInformation':r}, {'$set':{'metadata.addtionalInformation.$.value':r2}})
return str(r3),200
i'm getting the 200 but the document don't update with the new value.
As you are using positional operator $ to work with your array, make sure your select query is targeting array element. You can see in below query that it is targeting metadata.addtionalInformation array with the condition that name: "gender-title"
db.collection.update({
"fileId": 4,
"metadata.addtionalInformation.name": "gender-title"
},
{
"$set": {
"metadata.addtionalInformation.$.value": "junior"
}
})
Here is the Mongo playground for your reference.

How to save polygon data on Elasticsearch through Django GeoShapeField inside NestedField?

The models looks like -
class Restaurant(models.Model):
zones = JSONField(default=dict)
The document looks like-
#registry.register_document
class RestaurantDocument(Document):
zone = fields.NestedField(properties={"slug": fields.KeywordField(), "polygon_zone": fields.GeoShapeField()})
class Index:
name = 'restaurant_data'
settings = {
'number_of_shards': 1,
'number_of_replicas': 0
}
class Django:
model = Restaurant
def prepare_zone(self, instance):
return instance.zone
After indexing the mapping looks like-
"zone": {
"type": "nested",
"properties": {
"polygon_zone": {
"type": "geo_shape"
},
"slug": {
"type": "keyword"
}
}
}
But when I am saving data on zones field by following structure-
[{"slug":"dhaka","ploygon_zone":{"type":"polygon","coordinates":[[[89.84207153320312,24.02827811169503],[89.78233337402344,23.93040645231774],[89.82833862304688,23.78722976367578],[90.02197265625,23.801051951752406],[90.11329650878905,23.872024546162947],[90.11672973632812,24.00883517846163],[89.84207153320312,24.02827811169503]]]}}]
Then the elasticsearch mapping has been changed automatically by the following way-
"zone": {
"type": "nested",
"properties": {
"ploygon_zone": {
"properties": {
"coordinates": {
"type": "float"
},
"type": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
},
"polygon_zone": {
"type": "geo_shape"
},
"slug": {
"type": "keyword"
}
}
}
That's why when I try to search on zone__polygon_zone field, it always returns empty because its not polygon type data.
So, how can I save polygon data on elasticsearch trough django by nested geoshape field?
There is a type while index the data. Instead of ploygon_zone, it should be polygon_zone. I believe fixing the typo will solve the issue that you are facing.

How to add data to a topic using AvroProducer

I have a topic with the following schema. Could someone help me out on how to add data to the different fields.
{
"name": "Project",
"type": "record",
"namespace": "abcdefg",
"fields": [
{
"name": "Object",
"type": {
"name": "Object",
"type": "record",
"fields": [
{
"name": "Number_ID",
"type": "int"
},
{
"name": "Accept",
"type": "boolean"
}
]
}
},
{
"name": "DataStructureType",
"type": "string"
},
{
"name": "ProjectID",
"type": "string"
}
]
}
I tried the following code. I get list is not iterable or list is out of range.
from confluent_kafka import avro
from confluent_kafka.avro import AvroProducer
AvroProducerConf = {'bootstrap.servers': 'localhost:9092','schema.registry.url': 'http://localhost:8081'}
value_schema = avro.load('project.avsc')
avroProducer = AvroProducer(AvroProducerConf, default_value_schema = value_schema)
while True:
avroProducer.produce(topic = 'my_topic', value = {['Object'][0] : "value", ['Object'] [1] : "true", ['DataStructureType'] : "testvalue", ['ProjectID'] : "123"})
avroProducer.flush()
It's not clear what you're expecting something like this to do... ['Object'][0] and keys of a dict cannot be lists.
Try sending this, which matches your Avro schema
value = {
'Object': {
"Number_ID", 1,
"Accept": true
},
'DataStructureType' : "testvalue",
'ProjectID' : "123"
}

simple Elasticsearch nested search query

I have documents in ES (Tweepy JSON) like this
{
"_source": {
"id": 792477813014224900,
"metadata": {
"iso_language_code": "en",
"result_type": "recent"
},
"retweeted": false,
"retweet_count": 330,
"user": {
"id": 149250899,
"listed_count": 0,
"protected": false,
"followers_count": 347,
"entities": {
"description": {
"urls": []
}
},
"screen_name": "Zwido_"
}
And I would like to search and query one full document based by user_name field.
I tryied this code
{
"nested": {
"path": "_source",
"score_mode": "avg",
"query": {
"bool": {
"must": [
{
"text": {"_source.user.user_name": user}
}
]
}
}
}
}
But it doesn't work and I received error
TransportError(400, 'search_phase_execution_exception', 'failed to parse search source. unknown search element [nested]
What I am doing wrong?
Thanks for help.
You don't need to specify the _source field + you're missing a query at the top-level, do it like this instead.
{
"query": {
"nested": {
"path": "user",
"score_mode": "avg",
"query": {
"bool": {
"must": [
{
"match": {"user.screen_name": user}
}
]
}
}
}
}
}
UPDATE
If your user field is not of nested type, then you can simply do it like this:
{
"query": {
"bool": {
"must": [
{
"match": {
"user.screen_name": user
}
}
]
}
}
}
as mentioned in the elasticsearch documentation here you should change the mapping of your data to tell elasticsearch that it is nested object. Once that is done then you can query the object.

JSON Schema: Validate that exactly one property is present

I would like validate a JSON structure in which either the userId key or the appUserId key must be present (exactly one of them - not both).
For example,
{ "userId": "X" }
{ "appUserId": "Y" }
Are valid, but:
{ "userId": "X", "appUserId": "Y"}
{ }
Are not.
How can I validate this condition using a JSON Schema? I have tried the oneOf keyword, but it works for values, not keys.
This works for me:
from jsonschema import validate
schema = {
"type" : "object",
"properties" : {
"userId": {"type" : "number"},
"appUserId": {"type" : "number"},
},
"oneOf": [
{
"type": "object",
"required": ["userId"],
},
{
"type": "object",
"required": ["appUserId"],
}
],
}
validate({'userId': 1}, schema) # Ok
validate({'appUserId': 1}, schema) # Ok
validate({'userId': 1, 'appUserId': 1}, schema) # ValidationError
I would use a combination of min/maxProperties and additionalProperties in schema:
{
"type" : "object",
"properties" : {
"userId": { "type": "string" },
"appUserId": { "type": "string" },
},
"maxProperties": 1,
"minProperties": 1,
"additionalProperties": false
}
// invalid cases
{ }
{ "userId": "111", "appUserId": "222" }
{ "anotherUserId": "333" }
// valid cases
{ "userId": "111" }
{ "appUserId": "222" }

Categories