I have a following kind of structure to be handled:
payload = {
"name":"Event1",
"events":[
{
"name":"A",
"data":[
{
"name":"subscriptionId",
"data_id":0,
"data":0
},
{
"name":"updateCounter",
"data_id":1,
"data":0
},
{
"name":"noOfMessages",
"data_id":2,
"data":0
},
{
"name":"counter",
"data_id":3,
"data":0
},
{
"name":"resourceElements",
"data_id":4,
"data":0
},
{
"name":"type",
"data_id":5,
"data":0
},
{
"name":"subscription",
"data_id":6,
"data":0
},
{
"name":"element",
"data_id":7,
"data":[
{
"name":"type",
"data_id":0,
"data":0
},
{
"name":"plugLockState",
"data_id":1,
"data":{
"value":""
}
},
{
"name":"lockState",
"data_id":2,
"data":{
"value":""
}
},
{
"name":"flapState",
"data_id":6,
"data":{
"value":""
}
},
{
"name":"plugState",
"data_id":3,
"data":0
},
{
"name":"plugConnectionState",
"data_id":4,
"data":0
},
{
"name":"infrastructureState",
"data_id":5,
"data":0
}
]
}
]
}
]
}
I want to replace any key name within the nested structure by the parent, so the ideal result should look like this:
{
"name":"Event1",
"events":[
{
"name":"Event1.A",
"data":[
{
"name":"Event1.A.subscriptionId",
"data_id":0,
"data":0
},
{
"name":"Event1.A.updateCounter",
"data_id":1,
"data":0
},
{
"name":"Event1.A.noOfMessages",
"data_id":2,
"data":0
},
{
"name":"Event1.A.counter",
"data_id":3,
"data":0
},
{
"name":"Event1.A.resourceElements",
"data_id":4,
"data":0
},
{
"name":"Event1.A.type",
"data_id":5,
"data":0
},
{
"name":"Event1.A.subscription",
"data_id":6,
"data":0
},
{
"name":"Event1.A.element",
"data_id":7,
"data":[
{
"name":"Event1.A.element.type",
"data_id":0,
"data":0
},
{
"name":"Event1.A.element.plugLockState",
"data_id":1,
"data":{
"value":""
}
},
{
"name":"Event1.A.element.lockState",
"data_id":2,
"data":{
"value":""
}
},
{
"name":"Event1.A.element.flapState",
"data_id":6,
"data":{
"value":""
}
},
{
"name":"Event1.A.element.plugState",
"data_id":3,
"data":0
},
{
"name":"Event1.A.element.plugConnectionState",
"data_id":4,
"data":0
},
{
"name":"Event1.A.element.infrastructureState",
"data_id":5,
"data":0
}
]
}
]
}
]
}
so far I have written this recursive method:
def iterate_recursively(dictionary: dict, names=None):
if names is None:
names = []
for k, v in dictionary.items():
if isinstance(v, dict):
iterate_recursively(v)
elif isinstance(v, list):
for d in v:
if isinstance(d, dict):
names.append(d["name"])
iterate_recursively(d)
but I simply don't get it. How can the keys, based on my requirement, be changed while iterating recursively?
Here's a variant that returns a new dictionary (and thus leaving the original one unchanged).
code00.py:
#!/usr/bin/env python
import sys
from pprint import pprint as pp
payload = {
"name": "Event1",
"events": [
{
"name": "A",
"data": [
{
"name": "subscriptionId",
"data_id": 0,
"data": 0
},
{
"name": "updateCounter",
"data_id": 1,
"data": 0
},
{
"name": "noOfMessages",
"data_id": 2,
"data": 0
},
{
"name": "counter",
"data_id": 3,
"data": 0
},
{
"name": "resourceElements",
"data_id": 4,
"data": 0
},
{
"name": "type",
"data_id": 5,
"data": 0
},
{
"name": "subscription",
"data_id": 6,
"data": 0
},
{
"name": "element",
"data_id": 7,
"data": [
{
"name": "type",
"data_id": 0,
"data": 0
},
{
"name": "plugLockState",
"data_id": 1,
"data": {
"value": ""
}
},
{
"name": "lockState",
"data_id": 2,
"data": {
"value": ""
}
},
{
"name": "flapState",
"data_id": 6,
"data": {
"value": ""
}
},
{
"name": "plugState",
"data_id": 3,
"data": 0
},
{
"name": "plugConnectionState",
"data_id": 4,
"data": 0
},
{
"name": "infrastructureState",
"data_id": 5,
"data": 0
}
]
}
]
}
]
}
def concat_names(data, names=()):
if isinstance(data, dict):
name = data.get("name")
new_names = names + (name,) if name is not None else names
return {k: concat_names(v, names=new_names) if k != "name" else ".".join(new_names) for k, v in data.items()}
elif isinstance(data, (list, tuple)):
return [concat_names(e, names=names) for e in data]
else:
return data
def main(*argv):
pp(concat_names(payload), indent=2, sort_dicts=False)
if __name__ == "__main__":
print("Python {:s} {:03d}bit on {:s}\n".format(" ".join(elem.strip() for elem in sys.version.split("\n")),
64 if sys.maxsize > 0x100000000 else 32, sys.platform))
rc = main(*sys.argv[1:])
print("\nDone.")
sys.exit(rc)
Output:
[cfati#CFATI-5510-0:e:\Work\Dev\StackOverflow\q073621243]> "e:\Work\Dev\VEnvs\py_pc064_03.09_test0\Scripts\python.exe" ./code00.py
Python 3.9.9 (tags/v3.9.9:ccb0e6a, Nov 15 2021, 18:08:50) [MSC v.1929 64 bit (AMD64)] 064bit on win32
{ 'name': 'Event1',
'events': [ { 'name': 'Event1.A',
'data': [ { 'name': 'Event1.A.subscriptionId',
'data_id': 0,
'data': 0},
{ 'name': 'Event1.A.updateCounter',
'data_id': 1,
'data': 0},
{ 'name': 'Event1.A.noOfMessages',
'data_id': 2,
'data': 0},
{'name': 'Event1.A.counter', 'data_id': 3, 'data': 0},
{ 'name': 'Event1.A.resourceElements',
'data_id': 4,
'data': 0},
{'name': 'Event1.A.type', 'data_id': 5, 'data': 0},
{ 'name': 'Event1.A.subscription',
'data_id': 6,
'data': 0},
{ 'name': 'Event1.A.element',
'data_id': 7,
'data': [ { 'name': 'Event1.A.element.type',
'data_id': 0,
'data': 0},
{ 'name': 'Event1.A.element.plugLockState',
'data_id': 1,
'data': {'value': ''}},
{ 'name': 'Event1.A.element.lockState',
'data_id': 2,
'data': {'value': ''}},
{ 'name': 'Event1.A.element.flapState',
'data_id': 6,
'data': {'value': ''}},
{ 'name': 'Event1.A.element.plugState',
'data_id': 3,
'data': 0},
{ 'name': 'Event1.A.element.plugConnectionState',
'data_id': 4,
'data': 0},
{ 'name': 'Event1.A.element.infrastructureState',
'data_id': 5,
'data': 0}]}]}]}
Done.
You can do something like this:
def iterate_recursively(dictionary: dict, prefix_name=None):
if 'name' in dictionary:
if prefix_name is None:
prefix_name = dictionary['name']
else:
prefix_name += '.' + dictionary['name']
dictionary['name'] = prefix_name
for k, v in dictionary.items():
if isinstance(v, dict):
iterate_recursively(v, prefix_name)
elif isinstance(v, list):
for d in v:
iterate_recursively(d, prefix_name)
I have dictionary below.
my_d = {'country': ['Germany',"France"],
'games': ['Football,Motorsport'],
'bayern': ['Muller']}
I need to create a dictionary using above key and values
Each key will be added keyword in the output country.keyword
{
"query": {
"bool": {
"must": [
{
"terms": {
"country.keyword": [
"Germany",
"France"
]
}
},
{
"terms": {
"games.keyword": [
"Football",
"Motorsport"
]
}
},
{
"match": {
"bayern.keyword": ["Muller"]
}
}
]
}
}
}
if my_d = {'country': ['Germany',"France"]} or my_d = {'country': ['Germany',"France"],
'games': None,
'bayern':None}
{
"query": {
"bool": {
"must": [
{
"terms": {
"country.keyword": [
"Germany",
"France"
]
}
}
]
}
}
}
Generally I would recommend using Elasticsearch 3rd party python package do query Elasticsearch, but I believe this code should work (python 3.5+):
must_clauses = [{f"{key}.keyword": value} for key, value in my_d.items()]
terms = [{"terms": must_clause} for must_clause in must_clauses]
query_template = {
"query": {
"bool": {
"must":
terms
}
}
}
I have a field with phone numbers with this format - XXX-XXX-XXXX or XXXXXXXXXX (its a merged table).
I want to be able to search XXXXXXXXXX and get results from both formats.
I tried using the decimal digit filter but it didn't work.
Here are the settings that i have tried which are as follow:
mapping = {
'mappings': {
DOC_TYPE: {
'properties': {
'first_name': {
'type': 'text',
'analyzer': 'word_splitter'
},
'last_name': {
'type': 'text',
'analyzer': 'word_splitter'
},
'email': {
'type': 'text',
'analyzer': 'email'
},
'gender': {
'type': 'text'
},
'ip_address': {
'type': 'text'
},
'language': {
'type': 'text'
},
'phone': {
'type': 'text',
'analyzer': 'digits'
},
'id': {
'type': 'long'
}
}
}
},
'settings': {
'analysis': {
'analyzer': {
'my_analyzer': {
'type': 'whitespace'
},
'better': {
'type': 'standard'
},
'word_splitter': {
'type': 'custom',
'tokenizer': 'nGram',
'min_gram': 5,
'max_gram': 5,
'filter': [
'lowercase'
]
},
'email': {
'type': 'custom',
'tokenizer': 'uax_url_email'
},
'digits': {
'type': 'custom',
'tokenizer': 'whitespace',
'filter': [
'decimal_digit'
]
}
}
}
}
}
Any ideas ?
Use a char_filter to remove the hyphens before indexing. As a simple example:
Set up the custom analyzer and apply it to the phone field.
PUT my_index
{
"settings": {
"analysis": {
"analyzer": {
"phone_analyzer": {
"tokenizer": "standard",
"char_filter": [
"phone_char_filter"
]
}
},
"char_filter": {
"phone_char_filter": {
"type": "mapping",
"mappings": [
"- => "
]
}
}
}
},
"mappings": {
"_doc": {
"properties": {
"phone": {
"type": "text",
"analyzer": "phone_analyzer"
}
}
}
}
}
Add some docs
POST my_index/_doc
{"phone": "123-456-7890"}
POST my_index/_doc
{"phone": "2345678901"}
Search in xxx-xxx-xxxx format
GET my_index/_search
{
"query": {
"match": {
"phone": "123-456-7890"
}
}
}
Search in xxxxxxxxxx format
GET my_index/_search
{
"query": {
"match": {
"phone": "1234567890"
}
}
}