Calculating the semantic descriptor of a nested list - python

I am trying to calculate the semantic description of a nested list to turn it into a nested dictionary. First I got distinct_words, each word of it will be the keys of my final dictionary.
def build_semantic_descriptors(sentences):
flat_list = [term for group in sentences for term in group]
distinct_words = set(flat_list)
d = {}
for row in sentences:
for words in row:
if words not in d:
d[words] = 1
else:
d[words] += 1
if __name__ == '__main__':
x = [["i", "am", "a", "sick", "man"],
["i", "am", "a", "spiteful", "man"],
["i", "am", "an", "unattractive", "man"],
["i", "believe", "my", "liver", "is", "diseased"],
["however", "i", "know", "nothing", "at", "all", "about", "my",
"disease", "and", "do", "not", "know", "for", "certain", "what", "ails", "me"]]
print(build_semantic_descriptors(x))
EXPECTED OUTPUT: {'i': {'am': 3, 'a': 2, 'sick': 1, 'man': 3, 'spiteful': 1, 'an': 1, 'unattractive': 1, 'believe': 1, 'my': 2, 'liver': 1, 'is': 1, 'diseased': 1, 'however': 1, 'know': 1, 'nothing': 1, 'at': 1, 'all': 1, 'about': 1, 'disease': 1, 'and': 1, 'do': 1, 'not': 1, 'for': 1, 'certain': 1, 'what': 1, 'ails': 1, 'me': 1}, 'am': {'i': 3, 'a': 2, 'sick': 1, 'man': 3, 'spiteful': 1, 'an': 1, 'unattractive': 1}, etc...}
At this moment this is my code. I already got the words I want as the keys, but I don't know how to count the words related to them and put into the final dictionary, I've tried using the counter above, but what it does is calculate the overall value of appearences.
Thanks in advance for any help.

Try this:
from collections import defaultdict
from itertools import product
def build_semantic_descriptors(sentences):
d = defaultdict(lambda: defaultdict(int))
for sentence in sentences:
should_skip_key = True
for (key, word) in product(sentence, sentence):
if key == word and should_skip_key:
should_skip_key = False
continue
d[key][word] += 1
return d
if __name__ == '__main__':
x = [["i", "am", "a", "sick", "man"],
["i", "am", "a", "spiteful", "man"],
["i", "am", "an", "unattractive", "man"],
["i", "believe", "my", "liver", "is", "diseased"],
["however", "i", "know", "nothing", "at", "all", "about", "my",
"disease", "and", "do", "not", "know", "for", "certain", "what", "ails", "me"]]
print(build_semantic_descriptors(x))
You need to loop each sentence twice, in order to get each word for each key. For this you can use itertools.product.
Also note that I use here collections.defaultdict which you should read about, it is a nice utility that sets the dictionary with a default if the key does not exist (allowing to skip the check that you had)

Related

How to create and print a Dictionary that has keys as the names in list and their values as number of times the name appears on the list

I have a list of names:
list = ['Ginger', 'Willow', 'Scout', 'Roscoe', 'Bear', 'Kobe', 'Baxter', 'Zara', 'Fiona', 'Milo', 'Oakley', 'Dakota', 'Prince', 'Bruno', 'Panda', 'Dexter', 'Ziggy', 'Roscoe', 'Lucy', 'Boomer', 'Fiona', 'Ella', 'Emma', 'Oakley']
Using this list, I've created the following dictionary:
listA = {"G": "Ginger", "W": "Willow", "S": "Scout", "R": ["Roscoe", "Roscoe"], "B": ["Bear", "Baxter", "Bruno", "Boomer"], "K": "Kobe", "Z": ["Zara", "Ziggy"], "F": ["Fiona", "Fiona"], "M": "Milo", "O": ["Oakley", "Oakley"], "D": ["Dakota", "Dexter"], "P": ["Prince", "Panda"], "L": "Lucy", "E": ["Ella", "Emma"]}
Printing the keys from the dictionary:
for key in listA.keys():
print(key)
I get:
G
W
S
R
B
K
Z
F
M
O
D
P
L
E
How can I get the number of times that each name appears in the list?
You don't need listA; collections.Counter does exactly what you're looking for.
import collections
data = ['Ginger', 'Willow', 'Scout', 'Roscoe', 'Bear', 'Kobe', 'Baxter', 'Zara', 'Fiona', 'Milo', 'Oakley', 'Dakota', 'Prince', 'Bruno', 'Panda', 'Dexter', 'Ziggy', 'Roscoe', 'Lucy', 'Boomer', 'Fiona', 'Ella', 'Emma', 'Oakley']
counter = collections.Counter(data)
print(counter) # Prints counter object.
counter_as_dict = dict(counter) # Can be transformed into a dictionary using dict().
print(counter_as_dict.keys()) # Prints names in dictionary.

Intersection of two Python lists based on condition

I want to make intersection of these two python lists:
list_1_begin = ["i", "love", "to", "eat", "fresh", "apples", "yeah", "eat", "fresh"]
list_2_find = ["eat", "fresh"]
And my expected result should look like this:
expected result = ["0", "0", "0", "1", "1", "0", "0", "1", "1"]
This can be done by two for loops, but what if I have first list of 10000 elements and second list of 100 elements, also the phrase can repeat multiple times. Is there any Pythonic way?
Important:
For example:
list_1_begin = ["i", "love", "to", "eat", "the", "fresh", "apples", "yeah", "eat", "fresh"]
list_2_find = ["eat", "fresh"]
Solution should look like this:
expected result = ["0", "0", "0", "0", "0", "0", "0", "0", "1", "1"]
So only if all elements from list_2_find are in the list_1_begin in exact order
To keep it pythonic and efficient convert list_2_find to a set and use a list comprehension:
list_1_begin = ["i", "love", "to", "eat", "fresh", "apples", "yeah", "eat", "fresh"]
list_2_find = ["eat", "fresh"]
set_2_find = set(list_2_find)
result = [str(int(e in set_2_find)) for e in list_1_begin]
print(result)
Output
['0', '0', '0', '1', '1', '0', '0', '1', '1']
As an alternative for formatting a bool as an int, one approach is to use an f-string as follows:
result = [f"{(e in set_2_find):d}" for e in list_1_begin]
Output
['0', '0', '0', '1', '1', '0', '0', '1', '1']
Some additional info on f-string formatting can be found here.
UPDATE
If the matches must be sequential, use:
from itertools import chain
list_1_begin = ["i", "love", "to", "eat", "the", "fresh", "apples", "yeah", "eat", "fresh"]
list_2_find = ["eat", "fresh"]
len_1 = len(list_1_begin)
len_2 = len(list_2_find)
pos = chain.from_iterable([range(e, e + len_2) for e in range(len_1) if list_1_begin[e:e + len_2] == list_2_find])
positions_set = set(pos)
result = [f"{(i in positions_set):d}" for i in range(len_1)]
print(result)
Output
['0', '0', '0', '0', '0', '0', '0', '0', '1', '1']

List comprehension getting values from JSON

I have this json with different levels:
[{'A': 1, 'B': 2, 'CA': {'CA1': '3', 'CA23': '4'}},
{'A': 1, 'B': {'CA1': '3'}, 'CA': {'CA1': '3', 'CA23': '4'}}]
And I want to get only the values for each row using list comprehension:
The expected result is:
[[1, 2, '3', '4'], [1, '3', '3', '4']]
Without using list comprehension this code work:
values = []
for row in json:
rows = []
for item in row.items():
if str(row[item[0]]).startswith("{"):
temp = row[item[0]].values()
else:
temp = [row[item[0]]]
rows.extend(temp)
values.append(rows)
Some ideas?
Here's a way to do it that cheats a little by using an auxiliary helper function to flatten the nested dictionary objects comprising each "row" of your data-structure.
import json # For pretty-printing data and results.
from collections.abc import MutableMapping
def flatten(nested):
''' Yield values from nested dictionary data structure. '''
for value in nested.values():
if isinstance(value, MutableMapping): # Nested?
yield from flatten(value)
else:
yield value
json_values = [{'A': 1, 'B': 2, 'CA': {'CA1': '3', 'CA23': '4'}},
{'A': 1, 'B': {'CA1': '3'}, 'CA': {'CA1': '3', 'CA23': '4'}}]
print('Before:')
print(json.dumps(json_values, indent=4))
# Here's the list comprehension.
result = [list(flatten(nested)) for nested in json_values]
print()
print('After:')
print(json.dumps(result, indent=4))
Output:
Before:
[
{
"A": 1,
"B": 2,
"CA": {
"CA1": "3",
"CA23": "4"
}
},
{
"A": 1,
"B": {
"CA1": "3"
},
"CA": {
"CA1": "3",
"CA23": "4"
}
}
]
After:
[
[
1,
2,
"3",
"4"
],
[
1,
"3",
"3",
"4"
]
]

Too many loops problem when loading a file

I would like to reduce the number of for loop when trying to loop into my file.
For example:
item =[{"item1": "man", "item2": "many", "item3":"mannyy"}]], [[{"item1": "power", "item2": "cow", "item3":"king"}]], [[{"item1": "man", "item2": "many", "item3":"mannyy"}]], [[{"item1": "power", "item2": "cow", "item3":"king"}]
for i, x in enumerate(item):
print(x["item1"])
I am wondering whether is there a possibility to reduce the number of loop while at the same time could get the enumerate for "i", "ii", "iii" too? I do not wish to use too many for loops
You can try the following code..
item = [{'item2': 'many', 'item1': 'man', 'item3': 'mannyy'},{'item2': 'cow', 'item1': 'power', 'item3': 'king'}, {'item2': 'many', 'item1': 'man', 'item3': 'mannyy'}, {'item2': 'cow', 'item1': 'power', 'item3': 'king'}]
for i in a:
print(i['item1'])

Identify frequency of elements in each key in python

I have a list of dictionary as follows.
mylist = [ {"0": ["code1", "code5"], "1" ["code8", "code7", "code2"]},
{"1": ["code2", "code3"], "2" ["code4", "code5", "code7"], "3": ["code1", "code10"]},
{"0": ["code8", "code5", "code1"], "2" ["code7", "code5", "code2"]} ]
Now, I want to calculate the codes count for each key in the dictionary. For example "0": ["code1", "code5"] and "0": ["code8", "code5"] should give: mydict_for_0 = {"code1": 1, "code5": 2, "code8": 1}
So, for the above mylist the output should be;
mydict_for_0 = {"code1": 2, "code5": 2, "code8": 1}
mydict_for_1 = {"code2": 2, "code3": 1, "code7": 1, "code8": 1}
mydict_for_2 = {"code4": 1, "code5": 2, "code7": 2, {"code2": 1}
mydict_for_3 = {"code1": 1, "code10": 1}
Please help me to do this using python!
Try with defaultdict, Counter from collections module, find all same key's value list, extend them into one list, save into a defaultdict(list):
from collections import defaultdict, Counter
new_dict = defaultdict(list)
for e in mylist:
for key,value in e.items():
new_dict[key].extend(value)
new_dict will be:
defaultdict(list,
{'0': ['code1', 'code5', 'code8', 'code5', 'code1'],
'1': ['code8', 'code7', 'code2', 'code2', 'code3'],
'2': ['code4', 'code5', 'code7', 'code7', 'code5', 'code2'],
'3': ['code1', 'code10']})
After that, loop all items to pass the values list into Counter, to count the occurrences of list:
result = {}
for key,value in new_dict.items():
result['mydict_for_'+key] = dict(Counter(value))
result will be:
{'mydict_for_0': {'code1': 2, 'code5': 2, 'code8': 1},
'mydict_for_1': {'code2': 2, 'code3': 1, 'code7': 1, 'code8': 1},
'mydict_for_2': {'code2': 1, 'code4': 1, 'code5': 2, 'code7': 2},
'mydict_for_3': {'code1': 1, 'code10': 1}}
This might be the solution
final_result = []
for i in mylist:
current_list = mylist[i]
d = {}
for key in current_list:
try:
d[m]+=1
except KeyError as e:
d.update({m: 1})
final_result.append(d)
for i in final_result:
print(i)

Categories