creating undirected graph from directed graph - python

I'm just too confused, and can't come up with proper way to do this:
I have this directed graph:
and have two dictionaries, which show outgoing and incoming scores
graph_to = {'a':{'b':2,'c':3},'b':{'a':1,'d':4}}
graph_from = {'a':{'b':1},'b':{'a':2},'c':{'a':3},'d':{'b':4}}
For example, in graph_to, node a goes to node b with score 2 and to node c with score 3; and in graph_from node a receives score 1 from node b.
I want to create undirected graph such that scores between two nodes are summed up. It should become this dictionary:
graph = {
'a':{'b':3,'c':3},
'b':{'a':3,'d':4},
'c':{'a':3},
'd':{'b':4}
}

You could try to make a collections.defaultdict() of collections.Counter() objects, and sum the edge counts as you iterate both graph dicts:
from collections import defaultdict
from collections import Counter
from pprint import pprint
graph_to = {'a':{'b':2,'c':3},'b':{'a':1,'d':4}}
graph_from = {'a':{'b':1},'b':{'a':2},'c':{'a':3},'d':{'b':4}}
undirected_graph = defaultdict(Counter)
def sum_edges(graph, result):
for node, edges in graph.items():
for edge in edges:
result[node][edge] += edges[edge]
sum_edges(graph_to, undirected_graph)
sum_edges(graph_from, undirected_graph)
pprint(undirected_graph)
Which gives:
defaultdict(<class 'collections.Counter'>,
{'a': Counter({'b': 3, 'c': 3}),
'b': Counter({'d': 4, 'a': 3}),
'c': Counter({'a': 3}),
'd': Counter({'b': 4})})
Note: Counter and defaultdict are subclasses of dict, so you can treat them the same as normal dictionaries.
If you really want normal dictionaries in the final undirected graph, you can use either of these dict comprehensions:
dict((k, dict(v)) for k, v in undirected_graph.items())
# {'a': {'b': 3, 'c': 3}, 'b': {'a': 3, 'd': 4}, 'c': {'a': 3}, 'd': {'b': 4}}
{k: dict(v) for k, v in undirected_graph.items()}
# {'a': {'b': 3, 'c': 3}, 'b': {'a': 3, 'd': 4}, 'c': {'a': 3}, 'd': {'b': 4}}
Additionally, you can also use dict.update() here to refactor sum_edges():
def sum_edges(graph, result):
for node, edges in graph.items():
result[node].update(edges)

I hope we appreciate taking things in own hand, here's with simple logic
out_dict = {}
for key in graph_to :
for sub_key in graph_to[key]:
if key in graph_from and sub_key in graph_from[key]:
out_dict[key] = {sub_key: graph_to[key][sub_key] + graph_from[key][sub_key]}
else:
out_dict[key].update({sub_key: graph_to[key][sub_key]})
graph_from.update(out_dict)
print(graph_from)
Output:
{'a': {'b': 3, 'c': 3}, 'b': {'a': 3, 'd': 4}, 'c': {'a': 3}, 'd': {'b': 4}}

Related

Cartesian product of two dict in two lists in Python

Here is my code.
>>> a = [{'a': 1}, {'b': 2}]
>>> b = [{'c': 3}, {'d': 4}]
I want to show:
[{'a':1, 'c':3}, {'b':2, 'c':3}, {'a':1, 'd':4}, {'b':2, 'd':4}]
Is there a way I can do it only with list/dict comprehension?
A one line, no import solution can consist of a lambda function:
f = lambda d, c:[c] if not d else [i for k in d[0] for i in f(d[1:], {**c, **k})]
a = [{'a': 1}, {'b': 2}]
b = [{'c': 3}, {'d': 4}]
print(f([a, b], {}))
Output:
[{'a': 1, 'c': 3}, {'a': 1, 'd': 4}, {'b': 2, 'c': 3}, {'b': 2, 'd': 4}]
However, a much cleaner solution can include itertools.product:
from itertools import product
result = [{**j, **k} for j, k in product(a, b)]
Output:
[{'a': 1, 'c': 3}, {'a': 1, 'd': 4}, {'b': 2, 'c': 3}, {'b': 2, 'd': 4}]
You can try this.
a = [{'a': 1}, {'b': 2}]
b = [{'c': 3}, {'d': 4}]
d = [ {**i, **j} for i in a for j in b ]
print(d)

How to efficiently calculate prefix sum of frequencies of characters in a string?

Say, I have a string
s = 'AAABBBCAB'
How can I efficiently calculate the prefix sum of frequencies of each character in the string, i.e.:
psum = [{'A': 1}, {'A': 2}, {'A': 3}, {'A': 3, 'B': 1}, {'A': 3, 'B': 2}, {'A': 3, 'B': 3}, {'A': 3, 'B': 3, 'C': 1}, {'A': 4, 'B': 3, 'C': 1}, {'A': 4, 'B': 4, 'C': 1}]
You can do it in one line using itertools.accumulate and collections.Counter:
from collections import Counter
from itertools import accumulate
s = 'AAABBBCAB'
psum = list(accumulate(map(Counter, s)))
This gives you a list of Counter objects. Now, to get frequencies for any substring of s in O(1) time, you can simply subtract counters, e.g.:
>>> psum[6] - psum[1] # get frequencies for s[2:7]
Counter({'B': 3, 'A': 1, 'C': 1})
this is an option:
from collections import Counter
c = Counter()
s = 'AAABBBCAB'
psum = []
for char in s:
c.update(char)
psum.append(dict(c))
# [{'A': 1}, {'A': 2}, {'A': 3}, {'A': 3, 'B': 1}, {'A': 3, 'B': 2},
# {'A': 3, 'B': 3}, {'A': 3, 'B': 3, 'C': 1}, {'A': 4, 'B': 3, 'C': 1},
# {'A': 4, 'B': 4, 'C': 1}]
i use collections.Counter in order to keep a 'running sum' and add (a copy of the result) to the list psum. this way i iterate once only over the string s.
if you prefer to have collections.Counter objects in your result, you could change the last line to
psum.append(c.copy())
in order to get
[Counter({'A': 1}), Counter({'A': 2}), ...
Counter({'A': 4, 'B': 4, 'C': 1})]
the same result could also be achieved with this (using accumulate was first proposed in Eugene Yarmash's answer; i just avoid map in favour of a generator expression):
from itertools import accumulate
from collections import Counter
s = "AAABBBCAB"
psum = list(accumulate(Counter(char) for char in s))
just for completeness (as there is no 'pure dict' answer here yet). if you do not want to use Counter or defaultdict you could use this as well:
c = {}
s = 'AAABBBCAB'
psum = []
for char in s:
c[char] = c.get(char, 0) + 1
psum.append(c.copy())
although defaultdict is usually more performant than dict.get(key, default).
You actually don't even need a counter for this, just a defaultdict would suffice!
from collections import defaultdict
c = defaultdict(int)
s = 'AAABBBCAB'
psum = []
#iterate through the character
for char in s:
#Update count for each character
c[char] +=1
#Add the updated dictionary to the output list
psum.append(dict(c))
print(psum)
The output looks like
[{'A': 1}, {'A': 2}, {'A': 3}, {'A': 3, 'B': 1},
{'A': 3, 'B': 2}, {'A': 3, 'B': 3},
{'A': 3, 'B': 3, 'C': 1}, {'A': 4, 'B': 3, 'C': 1},
{'A': 4, 'B': 4, 'C': 1}]
Simplest would be to use the Counter object from collections.
from collections import Counter
s = 'AAABBBCAB'
[ dict(Counter(s[:i]) for i in range(1,len(s))]
Yields:
[{'A': 1}, {'A': 2}, {'A': 3}, {'A': 3, 'B': 1}, {'A': 3, 'B': 2},
{'A': 3, 'B': 3}, {'A': 3, 'B': 3, 'C': 1}, {'A': 4, 'B': 3, 'C': 1}]
In Python 3.8 you can use a list comprehension with an assignment expression (aka "the walrus operator"):
>>> from collections import Counter
>>> s = 'AAABBBCAB'
>>> c = Counter()
>>> [c := c + Counter(x) for x in s]
[Counter({'A': 1}), Counter({'A': 2}), Counter({'A': 3}), Counter({'A': 3, 'B': 1}), Counter({'A': 3, 'B': 2}), Counter({'A': 3, 'B': 3}), Counter({'A': 3, 'B': 3, 'C': 1}), Counter({'A': 4, 'B': 3, 'C': 1}), Counter({'A': 4, 'B': 4, 'C': 1})]

How to convert list of dictionaries to dictionaries

mylist = [{'a': 1, 'b': 2}, {'c': 3, 'd': 4}, {'e': 5, 'f': 6}]
i want it as
myDict ={'a': 1, 'b': 2,'c': 3, 'd': 4,'e': 5, 'f': 6}
You can make use of ChainMap.
from collections import ChainMap
myDict = dict(ChainMap(*mylist ))
This will take each dictionary and iterate through its key value pairs in for (k,v) in elem.items() part and assign them to a new dictionary.
mylist = [{'a': 1, 'b': 2}, {'c': 3, 'd': 4}, {'e': 5, 'f': 6}]
new_dict = {k:v for elem in mylist for (k,v) in elem.items()}
print new_dict
This will replace the duplicated keys.
I would create a new dictionary, iterate over the dictionaries in mylist, then iterate over the key/value pairs in that dictionary. From there, you can add each key/value pair to myDict.
mylist = [{'a': 1, 'b': 2}, {'c': 3, 'd': 4}, {'e': 5, 'f': 6}]
myDict = {}
for Dict in mylist:
for key in Dict:
myDict[key] = Dict[key]
print(myDict)

Store variables in dictionary for large data

I can print variables in python.
for h in jl1["results"]["attributes-list"]["volume-attributes"]:
state = str(h["volume-state-attributes"]["state"])
if aggr in h["volume-id-attributes"]["containing-aggregate-name"]:
if state == "online":
print(h["volume-id-attributes"]["owning-vserver-name"]),
print(' '),
print(h["volume-id-attributes"]["name"]),
print(' '),
print(h["volume-id-attributes"]["containing-aggregate-name"]),
print(' '),
print(h["volume-space-attributes"]["size-used"]
These print function returns for example 100 lines. Now I want to print only top 5 values based on filter of "size-used".
I am trying to take these values in dictionary and filter out top five values for "size-used" but not sure how to take them in dictionary.
Some thing like this
{'vserver': (u'rcdn9-c01-sm-prod',), 'usize': u'389120', 'vname': (u'nprd_root_m01',), 'aggr': (u'aggr1_n01',)}
Any other options like namedtuples is also appreciated.
Thanks
To get a list of dictionaries sorted by a certain key, use sorted. Say I have a list of dictionaries with a and b keys and want to sort them by the value of the b element:
my_dict_list = [{'a': 3, 'b': 1}, {'a': 1, 'b': 4}, {'a': 4, 'b': 4},
{'a': 2, 'b': 7}, {'a': 2, 'b': 4.3}, {'a': 2, 'b': 9}, ]
my_sorted_dict_list = sorted(my_dict_list, key=lambda element: element['b'], reverse=True)
# Reverse is set to True because by default it sorts from smallest to biggest; we want to reverse that
# Limit to five results
biggest_five_dicts = my_sorted_dict_list[:5]
print(biggest_five_dicts) # [{'a': 2, 'b': 9}, {'a': 2, 'b': 7}, {'a': 2, 'b': 4.3}, {'a': 1, 'b': 4}, {'a': 4, 'b': 4}]
heapq.nlargest is the obvious way to go here:
import heapq
interesting_dicts = ... filter to keep only the dicts you care about (e.g. online dicts) ...
for large in heapq.nlargest(5, interesting_dicts,
key=lambda d: d["volume-space-attributes"]["size-used"]):
print(...)

Get max keys of a list of dictionaries

If I have:
dicts = [{'a': 4,'b': 7,'c': 9},
{'a': 2,'b': 1,'c': 10},
{'a': 11,'b': 3,'c': 2}]
How can I get the maximum keys only, like this:
{'a': 11,'c': 10,'b': 7}
Use collection.Counter() objects instead, or convert your dictionaries:
from collections import Counter
result = Counter()
for d in dicts:
result |= Counter(d)
or even:
from collections import Counter
from operator import or_
result = reduce(or_, map(Counter, dicts), Counter())
Counter objects support finding the maximum per key natively through the | operation; & gives you the minimum.
Demo:
>>> result = Counter()
>>> for d in dicts:
... result |= Counter(d)
...
>>> result
Counter({'a': 11, 'c': 10, 'b': 7})
or using the reduce() version:
>>> reduce(or_, map(Counter, dicts), Counter())
Counter({'a': 11, 'c': 10, 'b': 7})
>>> dicts = [{'a': 4,'b': 7,'c': 9},
... {'a': 2,'b': 1,'c': 10},
... {'a': 11,'b': 3,'c': 2}]
>>> {letter: max(d[letter] for d in dicts) for letter in dicts[0]}
{'a': 11, 'c': 10, 'b': 7}
dicts = [{'a': 4,'b': 7,'c': 9},
{'a': 2,'b': 1,'c': 10},
{'a': 11,'b': 3,'c': 2}]
def get_max(dicts):
res = {}
for d in dicts:
for k in d:
res[k] = max(res.get(k, float('-inf')), d[k])
return res
>>> get_max(dicts)
{'a': 11, 'c': 10, 'b': 7}
Something like this should work:
dicts = [{'a': 4,'b': 7,'c': 9},
{'a': 2,'b': 1,'c': 10},
{'a': 11,'b': 3,'c': 2}]
max_keys= {}
for d in dicts:
for k, v in d.items():
max_keys.setdefault(k, []).append(v)
for k in max_keys:
max_keys[k] = max(max_keys[k])

Categories