Assemble separate sentences connected by dictionary attributes - python

I have the following list of dictionaries and I'm trying to come up with a single connected sentence based on whether the fact that the dictionary of the child sentences have the "ref" attribute connecting it to the father sentence.
clause_list = [
{"id": "T1", "text": "hi"},
{"id": "T2", "text": "I'm", "ref": "T1"},
{"id": "T3", "text": "Simone", "ref": "T2"},
]
Expected output is
"hi I'm Simone" but avoiding sentences like "hi I'm" or "I'm Simone"
What I tried so far is the following, but no matter how I flip it, the undesired sentences always get printed.
for c in clause_list:
for child in clause_list:
try:
if c["id"] == child["ref"] and "ref" not in c.keys():
print(c["text"], child["text"])
elif c["id"] == child["ref"] and "ref" in c.keys():
for father in clause_list:
if father["id"] == c["ref"] and "ref" not in father.keys():
print(father["text"], c["text"], child["text"])
except KeyError:
pass

probablly is best use a class and not a dict, but you could convert to list of dict to list of classes easy.
This work. Convert the list of dict to list of Clauses. and the clauses has the method search_ref that print the partial text if there are no referenced object, or add the referenced object and continue if there are.
if you have 2 objects i don't know exactly what you want
clause_list = [
{"id": "T1", "text": "hi"},
{"id": "T2", "text": "I'm", "ref": "T1"},
{"id": "T3", "text": "Simone", "ref": "T2"},
]
class Clause:
def __init__(self, id, text, ref:None):
self.id = id
self.text = text
self.ref = ref
def search_ref(self, Clauses, text=''):
parcialText = text + ' ' + self.text
for clause in Clauses:
if clause.ref == self.id:
return clause.search_ref(Clauses, parcialText)
print(parcialText)
Clauses = [Clause(id=c['id'], text=c['text'], ref=c.get('ref')) for c in clause_list]
for c in Clauses:
if c.ref is None:
c.search_ref(Clauses)

So right now you have your data set up as a list of dictionaries. I think it will be more helpful if you set it up as a linked list instead and use a bit of recursion. Maybe something like below. As a note, you'll need something to indicate the start of the sentence (I'm going to use "parent=True"):
clause_list = {
# ID's are the keys
'T1':{'text':"hi ", 'child':'T2','parent':True},
'T2':{'text':"I'm ", 'child':'T3'},
'T3':{'text':"Simone", 'child':None},
}
Then your code can look like this:
# Recursive function
def print_child(child_id, clause_list):
print(clause_list[child_id]['text'])
if 'child' in clause_list[child_id]:
print_child(clause_list[child_id]['child'], clause_list)
# Main function
for id in clause_list:
if 'parent' in clause_list[id]:
print(clause_list[id]['text'])
# Recurse through the parent to find others
if 'child' in clause_list[id]:
print_child(clause_list[id]['child'], clause_list)
Cloud definitely be cleaned up a bit and optimized, but this is the general gist. Hope it helps!

Another solution using a recursion:
clause_list = [
{"id": "T1", "text": "hi"},
{"id": "T2", "text": "I'm", "ref": "T1"},
{"id": "T3", "text": "Simone", "ref": "T2"},
]
inv_dict = {d.get("ref", ""): (d["id"], d["text"]) for d in clause_list}
def get_sentence(inv_dict, key=""):
if key in inv_dict:
id_, text = inv_dict[key]
return [text] + get_sentence(inv_dict, id_)
return []
print(" ".join(get_sentence(inv_dict)))
Prints:
hi I'm Simone

Related

Converting JSON to nested dictionary(python)

I have a json object like this:
{
parser_config:
[{"element_name": "jobs","type": "class", "attributes": ["title is-5", "company", "location"]},
{"element_name": "page-details","type":"class","attributes":["subtitle is-3", "title is-1"]}]
}
I want to create a nested dictionary out of the attributes like this:
{"jobs": {"class": "title is-5", "class_2": "location", "class_3": "company"},
"page-details": {"class": "subtitle is-3", "class_2": "title is-1"}}
I have created a function like this:
def AttributeConstructor(json_data: dict, attribute_type: str) -> dict:
specific_attributes = []
config_data = json_data['parser_config']
for i in range(len(config_data)):
for target in config_data[i]['attributes']:
target_type = config_data[i]['type']
if target_type == attribute_type:
element_name = config_data[i]['element_name']
specific_attributes.append(
{element_name: {target_type: target}})
return specific_attributes
I am not sure how to get the desired output of keeping 1 element name for all {target_type: target}. I am setting attribute_type to "class".
The goal is to convert the JSON into a nested dictionary using a function, I cannot seem to get my function to output everything so I am left with:
{"jobs": {"class": "company"},
"page-details": {"class": "title is-1"}}
instead of the whole nested dictionary. I also would like this to work on much larger JSON objects.
You only add singelton dictionaries {element_name: {target_type: target}} to the list.
You could try
def attribute_constructor(json_data, attribute_type):
return [
{rec["element_name"]: {
rec["type"] if i == 1 else f'{rec["type"]}_{i}': attr
for i, attr in enumerate(rec["attributes"], start=1)}}
for rec in json_data['parser_config'] if rec["type"] == attribute_type
]
or more closer to your attempt
def attribute_constructor(json_data, attribute_type):
specific_attributes = []
for rec in json_data['parser_config']:
target_type = rec["type"]
if target_type != attribute_type:
continue
specific_attributes.append(
{rec["element_name"]: {
target_type if i == 1 else f'{target_type}_{i}': attr
for i, attr in enumerate(rec["attributes"], start=1)}}
)
return specific_attributes
instead.
Result for
json_data = {
"parser_config": [
{"element_name": "jobs", "type": "class", "attributes": ["title is-5", "company", "location"]},
{"element_name": "page-details", "type": "class","attributes": ["subtitle is-3", "title is-1"]}
]
}
print(attribute_constructor(json_data, "class"))
is
[{'jobs': {'class': 'title is-5', 'class_2': 'company', 'class_3': 'location'}},
{'page-details': {'class': 'subtitle is-3', 'class_2': 'title is-1'}}]

Why is this dictionary comprehension for list of dictionaries not returning values?

I am iterating over a list of dictionaries in a list formatted as follows (each dictionary relates to one item of clothing, I have only listed the first:
new_products = [{'{"uniq_id": "1234", "sku": "abcdefgh", "name": "Levis skinny jeans", '
'"list_price": "75.00", "sale_price": "55.00", "category": "womens"}'}]
def find_product(dictionary, uniqid):
if 'uniq_id' in dictionary:
if ['uniq_id'] == uniqid:
return(keys, values in dictionary)
print(find_product(new_products, '1234'))
This is returning
None
The reason for the if statement in there is that not every product has a value for uniq_id so I was getting a key error on an earlier version of my code.
Your dictionary definition is quite unclear.
Assuming that you have given a list of dictionaries of size 1, it should be something like this:
new_products = [{"uniq_id": "1234", "sku": "abcdefgh", "name": "Levis skinny jeans", "list_price": "75.00", "sale_price": "55.00", "category": "womens"}]
def find_product(list_of_dicts, uniqid):
for dictionary in list_of_dicts:
if 'uniq_id' in dictionary:
if dictionary['uniq_id'] == uniqid:
return dictionary
print(find_product(new_products, '1234'))
You are using something like this:
new_products = [{'{ "some" : "stuff" }'}]
This is a list (the outer []) containing a set (the {})
{'{ "some" : "stuff" }'}
Note {1} is a set containing the number 1. Though it uses the curly braces it isn't a dictionary.
Your set contains a string:
'{ "some" : "stuff" }'
If I ask if 'some' is in this, I get True back, but if I ask for this string's keys there are no keys.
Make your new_products a list containing a dictionary (not a set), and don't put the payload in a string:
new_products = [{"uniq_id": "1234",
"sku": "abcdefgh",
"name": "Levis skinny jeans",
"list_price": "75.00",
"sale_price": "55.00",
"category": "womens"}]
Then loop over the dictionaries in the list in your function:
def find_product(dictionary_list, uniqid):
for d in dictionary_list:
if 'uniq_id' in d:
if d['uniq_id'] == uniqid:
return d.keys(), d.values()
return "not found" # or something better
>>> find_product(new_products, '1234')
(dict_keys(['uniq_id', 'sku', 'name', 'list_price', 'sale_price', 'category']), dict_values(['1234', 'abcdefgh', 'Levis skinny jeans', '75.00', '55.00', 'womens']))
>>> find_product(new_products, '12345')
'not found'

How to return collection from a function

I'm working with collection's namedtuple to return a list of tuple from within a function as such:
def getItems(things_list) -> list:
for i, j in enumerate(things_list):
[*things_id] = things_list[i].id
[*things_title] = things_list[i].title
things_structure = namedtuple('things', ['id', 'title'])
[*things_list] = [
things_structure(things_id, things_title)
]
return things_list
if I run
callGetItems = getItems(list_of_things) # assume list_of_things is a dictionary
print(callGetItems)
it will only print the first index of the return value, as you can see I'm actually expecting the whole dictionary to be printed with their respective id and title.(assume there is at least 3 different Key-value pairs in the dictionary)
P.s. If I print within the function, it prints all the elements stored in the [*things_list] variable as expected but the same cannot be said for iterating over the return value i.e., outside of the function. please help.
to deobfuscate things assume this is the dictionary list_of_things:
list_of_things = [
{"id" : 1,
"title" : "waterbottle",
"description" : "a liquid container"},
{"id": 2,
"title": "lunchbox",
"description": "a food container"}
]
# etc...
Is this what you were going for? Creating a list of named tuples from the original list of dicts?
from collections import namedtuple
list_of_things = [
{"id": 1, "title": "waterbottle", "description": "a liquid container"},
{"id": 2, "title": "lunchbox", "description": "a food container"},
]
def getItems(things_list) -> list:
things_structure = namedtuple("things", ["id", "title"])
return [things_structure(k["id"], k["title"]) for k in things_list]
new_things = getItems(list_of_things)
print(new_things)

Python parse JSON file

{
"Logging": {
"LogLevel": {
"Default": "Information",
"Microsoft": "Warning",
"Microsoft.Hosting.Lifetime": "Information",
"Microsoft.AspNetCore": "Warning",
"System.Net.Http.HttpClient.Default.ClientHandler": "Warning",
"System.Net.Http.HttpClient.Default.LogicalHandler": "Warning"
}
},
"AllowedHosts": "*",
"AutomaticTransferOptions": {
"DateOffsetForDirectoriesInDays": -1,
"DateOffsetForPortfoliosInDays": -3,
"Clause": {
"Item1": "1"
}
},
"Authentication": {
"ApiKeys": [
{
"Key": "AB8E5976-2A7C-4EEE-92C1-7B0B4DC840F6",
"OwnerName": "Cron job",
"Claims": [
{
"Type": "http://schemas.microsoft.com/ws/2008/06/identity/claims/role",
"Value": "StressTestManager"
}
]
},
{
"Key": "B11D4F27-483A-4234-8EC7-CA121712D5BE",
"OwnerName": "Test admin",
"Claims": [
{
"Type": "http://schemas.microsoft.com/ws/2008/06/identity/claims/role",
"Value": "StressTestAdmin"
},
{
"Type": "http://schemas.microsoft.com/ws/2008/06/identity/claims/role",
"Value": "TestManager"
}
]
},
{
"Key": "EBF98F2E-555E-4E66-9D77-5667E0AA1B54",
"OwnerName": "Test manager",
"Claims": [
{
"Type": "http://schemas.microsoft.com/ws/2008/06/identity/claims/role",
"Value": "TestManager"
}
]
}
],
"LDAP": {
"Domain": "domain.local",
"MachineAccountName": "Soft13",
"MachineAccountPassword": "vixuUEY7884*",
"EnableLdapClaimResolution": true
}
},
"Authorization": {
"Permissions": {
"Roles": [
{
"Role": "TestAdmin",
"Permissions": [
"transfers.create",
"bindings.create"
]
},
{
"Role": "TestManager",
"Permissions": [
"transfers.create"
]
}
]
}
}
}
I have JSON above and need to parse it with output like this
Logging__LogLevel__Default
Authentication__ApiKeys__0__Claims__0__Type
Everything is ok, but I always get some strings with this output
Authentication__ApiKeys__0__Key
Authentication__ApiKeys__0__OwnerName
Authentication__ApiKeys__0__Claims__0__Type
Authentication__ApiKeys__0__Claims__0__Value
Authentication__ApiKeys__0__Claims__0
Authentication__ApiKeys__2
Authorization__Permissions__Roles__0__Role
Authorization__Permissions__Roles__0__Permissions__1
Authorization__Permissions__Roles__1__Role
Authorization__Permissions__Roles__1__Permissions__0
Authorization__Permissions__Roles__1
Why does my code adds not full strings like
Authentication__ApiKeys__0__Claims__0
Authentication__ApiKeys__2
Authorization__Permissions__Roles__1
And why it doesn't print every value from
Authorization__Permissions__Roles__0__Permissions__*
and from
Authorization__Permissions__Roles__1__Permissions__*
I have this code in python3:
def checkdepth(sub_key, variable):
delmt = '__'
for item in sub_key:
try:
if isinstance(sub_key[item], dict):
sub_variable = variable + delmt + item
checkdepth(sub_key[item], sub_variable)
except TypeError:
continue
if isinstance(sub_key[item], list):
sub_variable = variable + delmt + item
for it in sub_key[item]:
sub_variable = variable + delmt + item + delmt + str(sub_key[item].index(it))
checkdepth(it, sub_variable)
print(sub_variable)
if isinstance(sub_key[item], int) or isinstance(sub_key[item], str):
sub_variable = variable + delmt + item
print (sub_variable)
for key in data:
if type(data[key]) is str:
print(key + '=' +str(data[key]))
else:
variable = key
checkdepth(data[key], variable)
I know that the problem in block where I process list data type, but I don't know where is the problem exactly
Use a recursive generator:
import json
with open('input.json') as f:
data = json.load(f)
def strkeys(data):
if isinstance(data,dict):
for k,v in data.items():
for item in strkeys(v):
yield f'{k}__{item}' if item else k
elif isinstance(data,list):
for i,v in enumerate(data):
for item in strkeys(v):
yield f'{i}__{item}' if item else str(i)
else:
yield None # termination condition, not a list or dict
for s in strkeys(data):
print(s)
Output:
Logging__LogLevel__Default
Logging__LogLevel__Microsoft
Logging__LogLevel__Microsoft.Hosting.Lifetime
Logging__LogLevel__Microsoft.AspNetCore
Logging__LogLevel__System.Net.Http.HttpClient.Default.ClientHandler
Logging__LogLevel__System.Net.Http.HttpClient.Default.LogicalHandler
AllowedHosts
AutomaticTransferOptions__DateOffsetForDirectoriesInDays
AutomaticTransferOptions__DateOffsetForPortfoliosInDays
AutomaticTransferOptions__Clause__Item1
Authentication__ApiKeys__0__Key
Authentication__ApiKeys__0__OwnerName
Authentication__ApiKeys__0__Claims__0__Type
Authentication__ApiKeys__0__Claims__0__Value
Authentication__ApiKeys__1__Key
Authentication__ApiKeys__1__OwnerName
Authentication__ApiKeys__1__Claims__0__Type
Authentication__ApiKeys__1__Claims__0__Value
Authentication__ApiKeys__1__Claims__1__Type
Authentication__ApiKeys__1__Claims__1__Value
Authentication__ApiKeys__2__Key
Authentication__ApiKeys__2__OwnerName
Authentication__ApiKeys__2__Claims__0__Type
Authentication__ApiKeys__2__Claims__0__Value
Authentication__LDAP__Domain
Authentication__LDAP__MachineAccountName
Authentication__LDAP__MachineAccountPassword
Authentication__LDAP__EnableLdapClaimResolution
Authorization__Permissions__Roles__0__Role
Authorization__Permissions__Roles__0__Permissions__0
Authorization__Permissions__Roles__0__Permissions__1
Authorization__Permissions__Roles__1__Role
Authorization__Permissions__Roles__1__Permissions__0
Using json_flatten this can be converted to pandas, but it's not clear if that's what you want. Also, when you do convert it can use df.iloc[0] to see why each column is being provided (ie you see the value for that key).
Note: you need to pass a list so I just wrapped your json above in [].
# https://github.com/amirziai/flatten
dic = your json from above
dic =[dic] # put it in a list
dic_flattened = (flatten(d, '__') for d in dic) # add your delimiter
df = pd.DataFrame(dic_flattened)
df.iloc[0]
Logging__LogLevel__Default Information
Logging__LogLevel__Microsoft Warning
Logging__LogLevel__Microsoft.Hosting.Lifetime Information
Logging__LogLevel__Microsoft.AspNetCore Warning
Logging__LogLevel__System.Net.Http.HttpClient.Default.ClientHandler Warning
Logging__LogLevel__System.Net.Http.HttpClient.Default.LogicalHandler Warning
AllowedHosts *
AutomaticTransferOptions__DateOffsetForDirectoriesInDays -1
AutomaticTransferOptions__DateOffsetForPortfoliosInDays -3
AutomaticTransferOptions__Clause__Item1 1
Authentication__ApiKeys__0__Key AB8E5976-2A7C-4EEE-92C1-7B0B4DC840F6
Authentication__ApiKeys__0__OwnerName Cron job
Authentication__ApiKeys__0__Claims__0__Type http://schemas.microsoft.com/ws/2008/06/identi...
Authentication__ApiKeys__0__Claims__0__Value StressTestManager
Authentication__ApiKeys__1__Key B11D4F27-483A-4234-8EC7-CA121712D5BE
Authentication__ApiKeys__1__OwnerName Test admin
Authentication__ApiKeys__1__Claims__0__Type http://schemas.microsoft.com/ws/2008/06/identi...
Authentication__ApiKeys__1__Claims__0__Value StressTestAdmin
Authentication__ApiKeys__1__Claims__1__Type http://schemas.microsoft.com/ws/2008/06/identi...
Authentication__ApiKeys__1__Claims__1__Value TestManager
Authentication__ApiKeys__2__Key EBF98F2E-555E-4E66-9D77-5667E0AA1B54
Authentication__ApiKeys__2__OwnerName Test manager
Authentication__ApiKeys__2__Claims__0__Type http://schemas.microsoft.com/ws/2008/06/identi...
Authentication__ApiKeys__2__Claims__0__Value TestManager
Authentication__LDAP__Domain domain.local
Authentication__LDAP__MachineAccountName Soft13
Authentication__LDAP__MachineAccountPassword vixuUEY7884*
Authentication__LDAP__EnableLdapClaimResolution true
Authorization__Permissions__Roles__0__Role TestAdmin
Authorization__Permissions__Roles__0__Permissions__0 transfers.create
Authorization__Permissions__Roles__0__Permissions__1 bindings.create
Authorization__Permissions__Roles__1__Role TestManager
Authorization__Permissions__Roles__1__Permissions__0 transfers.create
Ok, I looked at your code and it's hard to follow. You're variable and function names are not easy to understand their purpose. Which is fine cause everyone has to learn best practice and all the little tips and tricks in python. So hopefully I can help you out.
You have a recursive-ish function. Which is definingly the best way to handle a situation like this. However your code is part recursive and part not. If you go recursive to solve a problem you have to go 100% recursive.
Also the only time you should print in a recursive function is for debugging. Recursive functions should have an object that is passed down the function and gets appended to or altered and then passed back once it gets to the end of the recursion.
When you get a problem like this, think about which data you actually need or care about. In this problem we don't care about the values that are stored in the object, we just care about the keys. So we should write code that doesn't even bother looking at the value of something except to determine its type.
Here is some code I wrote up that should work for what you're wanting to do. But take note that because I did purely a recursive function my code base is small. Also my function uses a list that is passed around and added to and then at the end I return it so that we can use it for whatever we need. If you have questions just comment on this question and I'll answer the best I can.
def convert_to_delimited_keys(obj, parent_key='', delimiter='__', keys_list=None):
if keys_list is None: keys_list = []
if isinstance(obj, dict):
for k in obj:
convert_to_delimited_keys(obj[k], delimiter.join((parent_key, str(k))), delimiter, keys_list)
elif isinstance(obj, list):
for i, _ in enumerate(obj):
convert_to_delimited_keys(obj[i], delimiter.join((parent_key, str(i))), delimiter, keys_list)
else:
# Append to list, but remove the leading delimiter due to string.join
keys_list.append(parent_key[len(delimiter):])
return keys_list
for item in convert_to_delimited_keys(data):
print(item)

Index JSON searches python

I have the next JSON that I get from a URL:
[{
"id": 1,
"version": 23,
"external_id": "2312",
"url": "https://example.com/432",
"type": "typeA",
"date": "2",
"notes": "notes",
"title": "title",
"abstract": "dsadasdas",
"details": "something",
"accuracy": 0,
"reliability": 0,
"severity": 12,
"thing": "32132",
"other": [
"aaaaaaaaaaaaaaaaaa",
"bbbbbbbbbbbbbb",
"cccccccccccccccc",
"dddddddddddddd",
"eeeeeeeeee"
],
"nana": 8
},
{
"id": 2,
"version": 23,
"external_id": "2312",
"url": "https://example.com/432",
"type": "typeA",
"date": "2",
"notes": "notes",
"title": "title",
"abstract": "dsadasdas",
"details": "something",
"accuracy": 0,
"reliability": 0,
"severity": 12,
"thing": "32132",
"other": [
"aaaaaaaaaaaaaaaaaa",
"bbbbbbbbbbbbbb",
"cccccccccccccccc",
"dddddddddddddd",
"eeeeeeeeee"
],
"nana": 8
}]
My code:
import json
import urllib2
data = json.load(urllib2.urlopen('http://someurl/path/to/json'))
print data
I want to know how to access to the part "abstract" of the object that has "id" equal to 2 for example. The part "id" is unique so I can use id to index my searchs.
Thanks!
Here's one way to do it. You can create a generator via a generator expression, call next to iterate that generator once, and get back the desired object.
item = next((item for item in data if item['id'] == 2), None)
if item:
print item['abstract']
See also Python: get a dict from a list based on something inside the dict
EDIT : If you'd like access to all elements of the list that have a given key value (for example, id == 2) you can do one of two things. You can either create a list via comprehension (as shown in the other answer), or you can alter my solution:
my_gen = (item for item in data if item['id'] == 2)
for item in my_gen:
print item
In the loop, item will iterate over those items in your list which satisfy the given condition (here, id == 2).
You can use list comprehention to filter:
import json
j = """[{"id":1,"version":23,"external_id":"2312","url":"https://example.com/432","type":"typeA","date":"2","notes":"notes","title":"title","abstract":"dsadasdas","details":"something","accuracy":0,"reliability":0,"severity":12,"thing":"32132","other":["aaaaaaaaaaaaaaaaaa","bbbbbbbbbbbbbb","cccccccccccccccc","dddddddddddddd","eeeeeeeeee"],"nana":8},{"id":2,"version":23,"external_id":"2312","url":"https://example.com/432","type":"typeA","date":"2","notes":"notes","title":"title","abstract":"dsadasdas","details":"something","accuracy":0,"reliability":0,"severity":12,"thing":"32132","other":["aaaaaaaaaaaaaaaaaa","bbbbbbbbbbbbbb","cccccccccccccccc","dddddddddddddd","eeeeeeeeee"],"nana":8}]"""
dicto = json.loads(j)
results = [x for x in dicto if "id" in x and x["id"]==2]
And then you can print the 'abstract' values like so:
for result in results:
if "abstract" in result:
print result["abstract"]
import urllib2
import json
data = json.load(urllib2.urlopen('http://someurl/path/to/json'))
your_id = raw_input('enter the id')
for each in data:
if each['id'] == your_id:
print each['abstract']
In the above code data is list and each is a dict you can easily access the dict object.

Categories