Remove duplicate values from list of nested dictionaries - python

I have list of dictionaries with nested structure. I need to remove all duplicate values. I'm newbie in Python and can't solve this task. Anyone can help me?
My list looks like:
[
{
"task_id":123,
"results":[
{
"url":"site.com",
"date":"04.18.2019"
},
{
"url":"another_site.com",
"date":"04.18.2019"
},
{
"url":"site1.com",
"date":"04.18.2019"
}
]
},
{
"task_id":456,
"results":[
{
"url":"site3.com",
"date":"04.18.2019"
},
{
"url":"site.com",
"date":"04.18.2019"
}
]
},
{
"task_id":789,
"results":[
{
"url":"site7.com",
"date":"04.18.2019"
},
{
"url":"site9.com",
"date":"04.18.2019"
},
{
"url":"site.com",
"date":"04.18.2019"
}
]
}
]
I need to set site.com only once. If any value of url is duplicated - exclude it from dict.
As result:
task 123 with 3 dicts in results
task 456 with 1 dict in results (exclude site.com)
task 789 with 2 dict in results (exclude site.com)
Desired output should looks like:
[
{
"task_id":123,
"results":[
{
"url":"site.com",
"date":"04.18.2019"
},
{
"url":"another_site.com",
"date":"04.18.2019"
},
{
"url":"site1.com",
"date":"04.18.2019"
}
]
},
{
"task_id":456,
"results":[
{
"url":"site3.com",
"date":"04.18.2019"
}
]
},
{
"task_id":789,
"results":[
{
"url":"site7.com",
"date":"04.18.2019"
},
{
"url":"site9.com",
"date":"04.18.2019"
}
]
}
]

let results to be your array.
u = set()
final = []
for dict in results:
for res in dict["results"]:
if res["url"] not in u:
u.add(res["url"])
final.append(res)
print(final)

You can use a list comprehension:
d = [{'task_id': 123, 'results': [{'url': 'site.com', 'date': '04.18.2019'}, {'url': 'another_site.com', 'date': '04.18.2019'}, {'url': 'site1.com', 'date': '04.18.2019'}]}, {'task_id': 456, 'results': [{'url': 'site3.com', 'date': '04.18.2019'}, {'url': 'site.com', 'date': '04.18.2019'}]}, {'task_id': 789, 'results': [{'url': 'site7.com', 'date': '04.18.2019'}, {'url': 'site9.com', 'date': '04.18.2019'}, {'url': 'site.com', 'date': '04.18.2019'}]}]
new_d = [{**a, 'results':[c for c in a['results'] if all(c not in b['results'] for b in d[:i])]} for i, a in enumerate(d)]
Output:
[
{
"task_id": 123,
"results": [
{
"url": "site.com",
"date": "04.18.2019"
},
{
"url": "another_site.com",
"date": "04.18.2019"
},
{
"url": "site1.com",
"date": "04.18.2019"
}
]
},
{
"task_id": 456,
"results": [
{
"url": "site3.com",
"date": "04.18.2019"
}
]
},
{
"task_id": 789,
"results": [
{
"url": "site7.com",
"date": "04.18.2019"
},
{
"url": "site9.com",
"date": "04.18.2019"
}
]
}
]

people = {
1: {'name': 'John',},
2: {'name': 'Marie'},
3: {'name': 'Ann',},
4: {'name': 'John'},
}
print(people)
unique = {}
for key, value in people.items():
if value not in unique.values():
unique[key] = value
print(unique)
try these

Related

Modify nested dict key name by its parent

I have a following kind of structure to be handled:
payload = {
"name":"Event1",
"events":[
{
"name":"A",
"data":[
{
"name":"subscriptionId",
"data_id":0,
"data":0
},
{
"name":"updateCounter",
"data_id":1,
"data":0
},
{
"name":"noOfMessages",
"data_id":2,
"data":0
},
{
"name":"counter",
"data_id":3,
"data":0
},
{
"name":"resourceElements",
"data_id":4,
"data":0
},
{
"name":"type",
"data_id":5,
"data":0
},
{
"name":"subscription",
"data_id":6,
"data":0
},
{
"name":"element",
"data_id":7,
"data":[
{
"name":"type",
"data_id":0,
"data":0
},
{
"name":"plugLockState",
"data_id":1,
"data":{
"value":""
}
},
{
"name":"lockState",
"data_id":2,
"data":{
"value":""
}
},
{
"name":"flapState",
"data_id":6,
"data":{
"value":""
}
},
{
"name":"plugState",
"data_id":3,
"data":0
},
{
"name":"plugConnectionState",
"data_id":4,
"data":0
},
{
"name":"infrastructureState",
"data_id":5,
"data":0
}
]
}
]
}
]
}
I want to replace any key name within the nested structure by the parent, so the ideal result should look like this:
{
"name":"Event1",
"events":[
{
"name":"Event1.A",
"data":[
{
"name":"Event1.A.subscriptionId",
"data_id":0,
"data":0
},
{
"name":"Event1.A.updateCounter",
"data_id":1,
"data":0
},
{
"name":"Event1.A.noOfMessages",
"data_id":2,
"data":0
},
{
"name":"Event1.A.counter",
"data_id":3,
"data":0
},
{
"name":"Event1.A.resourceElements",
"data_id":4,
"data":0
},
{
"name":"Event1.A.type",
"data_id":5,
"data":0
},
{
"name":"Event1.A.subscription",
"data_id":6,
"data":0
},
{
"name":"Event1.A.element",
"data_id":7,
"data":[
{
"name":"Event1.A.element.type",
"data_id":0,
"data":0
},
{
"name":"Event1.A.element.plugLockState",
"data_id":1,
"data":{
"value":""
}
},
{
"name":"Event1.A.element.lockState",
"data_id":2,
"data":{
"value":""
}
},
{
"name":"Event1.A.element.flapState",
"data_id":6,
"data":{
"value":""
}
},
{
"name":"Event1.A.element.plugState",
"data_id":3,
"data":0
},
{
"name":"Event1.A.element.plugConnectionState",
"data_id":4,
"data":0
},
{
"name":"Event1.A.element.infrastructureState",
"data_id":5,
"data":0
}
]
}
]
}
]
}
so far I have written this recursive method:
def iterate_recursively(dictionary: dict, names=None):
if names is None:
names = []
for k, v in dictionary.items():
if isinstance(v, dict):
iterate_recursively(v)
elif isinstance(v, list):
for d in v:
if isinstance(d, dict):
names.append(d["name"])
iterate_recursively(d)
but I simply don't get it. How can the keys, based on my requirement, be changed while iterating recursively?
Here's a variant that returns a new dictionary (and thus leaving the original one unchanged).
code00.py:
#!/usr/bin/env python
import sys
from pprint import pprint as pp
payload = {
"name": "Event1",
"events": [
{
"name": "A",
"data": [
{
"name": "subscriptionId",
"data_id": 0,
"data": 0
},
{
"name": "updateCounter",
"data_id": 1,
"data": 0
},
{
"name": "noOfMessages",
"data_id": 2,
"data": 0
},
{
"name": "counter",
"data_id": 3,
"data": 0
},
{
"name": "resourceElements",
"data_id": 4,
"data": 0
},
{
"name": "type",
"data_id": 5,
"data": 0
},
{
"name": "subscription",
"data_id": 6,
"data": 0
},
{
"name": "element",
"data_id": 7,
"data": [
{
"name": "type",
"data_id": 0,
"data": 0
},
{
"name": "plugLockState",
"data_id": 1,
"data": {
"value": ""
}
},
{
"name": "lockState",
"data_id": 2,
"data": {
"value": ""
}
},
{
"name": "flapState",
"data_id": 6,
"data": {
"value": ""
}
},
{
"name": "plugState",
"data_id": 3,
"data": 0
},
{
"name": "plugConnectionState",
"data_id": 4,
"data": 0
},
{
"name": "infrastructureState",
"data_id": 5,
"data": 0
}
]
}
]
}
]
}
def concat_names(data, names=()):
if isinstance(data, dict):
name = data.get("name")
new_names = names + (name,) if name is not None else names
return {k: concat_names(v, names=new_names) if k != "name" else ".".join(new_names) for k, v in data.items()}
elif isinstance(data, (list, tuple)):
return [concat_names(e, names=names) for e in data]
else:
return data
def main(*argv):
pp(concat_names(payload), indent=2, sort_dicts=False)
if __name__ == "__main__":
print("Python {:s} {:03d}bit on {:s}\n".format(" ".join(elem.strip() for elem in sys.version.split("\n")),
64 if sys.maxsize > 0x100000000 else 32, sys.platform))
rc = main(*sys.argv[1:])
print("\nDone.")
sys.exit(rc)
Output:
[cfati#CFATI-5510-0:e:\Work\Dev\StackOverflow\q073621243]> "e:\Work\Dev\VEnvs\py_pc064_03.09_test0\Scripts\python.exe" ./code00.py
Python 3.9.9 (tags/v3.9.9:ccb0e6a, Nov 15 2021, 18:08:50) [MSC v.1929 64 bit (AMD64)] 064bit on win32
{ 'name': 'Event1',
'events': [ { 'name': 'Event1.A',
'data': [ { 'name': 'Event1.A.subscriptionId',
'data_id': 0,
'data': 0},
{ 'name': 'Event1.A.updateCounter',
'data_id': 1,
'data': 0},
{ 'name': 'Event1.A.noOfMessages',
'data_id': 2,
'data': 0},
{'name': 'Event1.A.counter', 'data_id': 3, 'data': 0},
{ 'name': 'Event1.A.resourceElements',
'data_id': 4,
'data': 0},
{'name': 'Event1.A.type', 'data_id': 5, 'data': 0},
{ 'name': 'Event1.A.subscription',
'data_id': 6,
'data': 0},
{ 'name': 'Event1.A.element',
'data_id': 7,
'data': [ { 'name': 'Event1.A.element.type',
'data_id': 0,
'data': 0},
{ 'name': 'Event1.A.element.plugLockState',
'data_id': 1,
'data': {'value': ''}},
{ 'name': 'Event1.A.element.lockState',
'data_id': 2,
'data': {'value': ''}},
{ 'name': 'Event1.A.element.flapState',
'data_id': 6,
'data': {'value': ''}},
{ 'name': 'Event1.A.element.plugState',
'data_id': 3,
'data': 0},
{ 'name': 'Event1.A.element.plugConnectionState',
'data_id': 4,
'data': 0},
{ 'name': 'Event1.A.element.infrastructureState',
'data_id': 5,
'data': 0}]}]}]}
Done.
You can do something like this:
def iterate_recursively(dictionary: dict, prefix_name=None):
if 'name' in dictionary:
if prefix_name is None:
prefix_name = dictionary['name']
else:
prefix_name += '.' + dictionary['name']
dictionary['name'] = prefix_name
for k, v in dictionary.items():
if isinstance(v, dict):
iterate_recursively(v, prefix_name)
elif isinstance(v, list):
for d in v:
iterate_recursively(d, prefix_name)

Adding unique key before duplicate JSON keys

I have the following JSON string:
[
{
"id":"1",
"comment":"hello"
},
{
"id":"2",
"comment":"hi"
}
]
I'm trying to make it like this:
[
{
"finding-1":{
"id":"1",
"comment":"hello"
}
},
{
"finding-2":{
"id":"2",
"comment":"hi"
}
}
]
What is the cleanest way to do this in Python?
j = [
{
"id":"1",
"comment":"hello"
},
{
"id":"2",
"comment":"hi"
}
]
n = [{f'finding-{d["id"]}': d} for d in j]
# [{'finding-1': {'comment': 'hello', 'id': '1'}}, {'finding-2': {'comment': 'hi', 'id': '2'}}]

Construct a dictionary by using another dictionary keys and values

I have dictionary below.
my_d = {'country': ['Germany',"France"],
'games': ['Football,Motorsport'],
'bayern': ['Muller']}
I need to create a dictionary using above key and values
Each key will be added keyword in the output country.keyword
{
"query": {
"bool": {
"must": [
{
"terms": {
"country.keyword": [
"Germany",
"France"
]
}
},
{
"terms": {
"games.keyword": [
"Football",
"Motorsport"
]
}
},
{
"match": {
"bayern.keyword": ["Muller"]
}
}
]
}
}
}
if my_d = {'country': ['Germany',"France"]} or my_d = {'country': ['Germany',"France"],
'games': None,
'bayern':None}
{
"query": {
"bool": {
"must": [
{
"terms": {
"country.keyword": [
"Germany",
"France"
]
}
}
]
}
}
}
Generally I would recommend using Elasticsearch 3rd party python package do query Elasticsearch, but I believe this code should work (python 3.5+):
must_clauses = [{f"{key}.keyword": value} for key, value in my_d.items()]
terms = [{"terms": must_clause} for must_clause in must_clauses]
query_template = {
"query": {
"bool": {
"must":
terms
}
}
}

searching only digits in a mixed field (elasticsearch)

I have a field with phone numbers with this format - XXX-XXX-XXXX or XXXXXXXXXX (its a merged table).
I want to be able to search XXXXXXXXXX and get results from both formats.
I tried using the decimal digit filter but it didn't work.
Here are the settings that i have tried which are as follow:
mapping = {
'mappings': {
DOC_TYPE: {
'properties': {
'first_name': {
'type': 'text',
'analyzer': 'word_splitter'
},
'last_name': {
'type': 'text',
'analyzer': 'word_splitter'
},
'email': {
'type': 'text',
'analyzer': 'email'
},
'gender': {
'type': 'text'
},
'ip_address': {
'type': 'text'
},
'language': {
'type': 'text'
},
'phone': {
'type': 'text',
'analyzer': 'digits'
},
'id': {
'type': 'long'
}
}
}
},
'settings': {
'analysis': {
'analyzer': {
'my_analyzer': {
'type': 'whitespace'
},
'better': {
'type': 'standard'
},
'word_splitter': {
'type': 'custom',
'tokenizer': 'nGram',
'min_gram': 5,
'max_gram': 5,
'filter': [
'lowercase'
]
},
'email': {
'type': 'custom',
'tokenizer': 'uax_url_email'
},
'digits': {
'type': 'custom',
'tokenizer': 'whitespace',
'filter': [
'decimal_digit'
]
}
}
}
}
}
Any ideas ?
Use a char_filter to remove the hyphens before indexing. As a simple example:
Set up the custom analyzer and apply it to the phone field.
PUT my_index
{
"settings": {
"analysis": {
"analyzer": {
"phone_analyzer": {
"tokenizer": "standard",
"char_filter": [
"phone_char_filter"
]
}
},
"char_filter": {
"phone_char_filter": {
"type": "mapping",
"mappings": [
"- => "
]
}
}
}
},
"mappings": {
"_doc": {
"properties": {
"phone": {
"type": "text",
"analyzer": "phone_analyzer"
}
}
}
}
}
Add some docs
POST my_index/_doc
{"phone": "123-456-7890"}
POST my_index/_doc
{"phone": "2345678901"}
Search in xxx-xxx-xxxx format
GET my_index/_search
{
"query": {
"match": {
"phone": "123-456-7890"
}
}
}
Search in xxxxxxxxxx format
GET my_index/_search
{
"query": {
"match": {
"phone": "1234567890"
}
}
}

ordering json in python mapping object

I am using elasticsearch where the query is to be posted in json and should be in standard order or else the result will be wrong. the problem is that the python is changing my json ordering. my original json query is.
x= {
"query": {
"filtered": {
"query": {
"query_string": {
"query": "*a*"
}
},
"filter": {
"and": {
"filters": [
{
"term": {
"city": "london"
}
},
{
"term": {
"industry.industry_not_analyed": "oil"
}
}
]
}
}
}
},
"facets": {
"industry": {
"terms": {
"field": "industry.industry_not_analyed"
}
},
"city": {
"terms": {
"field": "city.city_not_analyzed"
}
}
}
}
but the resulting python object is as follow.
{
'query': {
'filtered': {
'filter': {
'and': {
'filters': [
{
'term': {
'city': 'london'
}
},
{
'term': {
'industry.industry_not_analyed': 'oil'
}
}
]
}
},
'query': {
'query_string': {
'query': '*a*'
}
}
}
},
'facets': {
'city': {
'terms': {
'field': 'city.city_not_analyzed'
}
},
'industry': {
'terms': {
'field': 'industry.industry_not_analyed'
}
}
}
}
the result is different than what I need how do I solve this.
Use OrderedDict() instead of {}. Note that you can't simply use OrderedDict(query=...) because that would create an unordered dict in the background. Use this code instead:
x = OrderedDict()
x['query'] = OrderedDict()
...
I suggest to implement a builder for this:
x = Query().filtered().query_string("*a*").and()....

Categories