Related
I'm currently trying to make an algorithm that gives me the total cost of a graph when all nodes have been visited but am failing miserably and honestly out of ideas. My goal is to get the total costs of the graphs below, using the Dijkstra algorithm.
Here's what I have so far:
from collections import defaultdict
import heapq
def build_graph():
# Create the graph with the all given nodes and costs
edges = aeroDistances
graph = defaultdict(dict)
for edge in edges.items():
tuple1 = edge[0][0]
tuple2 = edge[0][1]
cost = edge[1]
connection1 = {tuple2 : cost}
connection2 = {tuple1 : cost}
graph[tuple1].update(connection1)
graph[tuple2].update(connection2)
return dict(graph)
def dijkstra(graph, starting_vertex):
# All the distances set to infinity
distances = {vertex: float('infinity') for vertex in graph}
# Distance from the starting vertex
distances[starting_vertex] = 0
# Priority queue
pq = [(0, starting_vertex)]
while len(pq) > 0:
current_distance, current_vertex = heapq.heappop(pq)
# Nodes can get added to the priority queue multiple times. We only
# process a vertex the first time we remove it from the priority queue
if current_distance > distances[current_vertex]:
continue
for neighbor, weight in graph[current_vertex].items():
distance = current_distance + weight
# Only consider this new path if it's better than any path we've
# already found
if distance < distances[neighbor]:
distances[neighbor] = distance
heapq.heappush(pq, (distance, neighbor))
return distances, distance
numCidades = 0
numAeroportos = 0
numEstradas = 0
#custoAeroportos = {}
#custoEstradas = {}
#custoAeroportos = {(1, 2): 2, (1, 3): 4}
#custoEstradas = {(3, 1, 'E'): 2}
#custoAeroportos = {(1, 2): 1, (2, 3): 2, (3, 4): 1, (4, 1): 1}
custoAeroportos = {(1, 2): 1, (1, 3): 6, (2, 4): 2, (3, 4): 2}
custoEstradas = {(2, 3): 3}
listCidades = [1,2,3]
distances = []
indexValue = 0
indexKey = 0
currentIndex = 0
# Deconstruct the dict into a list of keys (tuples)
# Deconstruct the dict into a list of values
# Make it easier to sort the connections by creating a list of tuples and
# their respective weights and zip them toghether
distancesAeroKeys = list(custoAeroportos.keys())
distancesAeroValues = list(custoAeroportos.values())
aeroDistances = dict(map(list, zip(*[distancesAeroKeys, distancesAeroValues])))
print()
print("AeroDistances: " + str(aeroDistances))
graph = build_graph()
print()
print("Graph: " + str(graph))
print()
print("Dijkstra: " + str(dijkstra(graph, 1)))
The two graphs, dicts, I'm currently trying this with are named custoAeroportos and I can't seem to get the total minimum cost when all nodes are visited.
Here're the graphs, they are fairly simple:
This one has a total cost of 5
This one has a total cost of 3
The total cost I'm getting is wrong and I can't figure it out.
For the first graph:
AeroDistances: {(1, 2): 1, (1, 3): 6, (2, 4): 2, (3, 4): 2}
Graph: {1: {2: 1, 3: 6}, 2: {1: 1, 4: 2}, 3: {1: 6, 4: 2}, 4: {2: 2, 3: 2}}
Dijkstra: ({1: 0, 2: 1, 3: 5, 4: 3}, 7)
For the second graph, which somehow is correct:
AeroDistances: {(1, 2): 1, (2, 3): 2, (3, 4): 1, (4, 1): 1}
Graph: {1: {2: 1, 4: 1}, 2: {1: 1, 3: 2}, 3: {2: 2, 4: 1}, 4: {3: 1, 1: 1}}
Dijkstra: ({1: 0, 2: 1, 3: 2, 4: 1}, 3)
I really appreciate your help, thank you.
Your function returns the distance of the path from the starting vertex to whichever was the last node that was added to the heap. This is not really what you want to return. Certainly when the BFS-tree has multiple outgoing edges from some vertices, this path has little to do with the total distance.
Instead you need to accumulate the weights of the edges that are "accepted", i.e. those that are (implicitly) popped from the heap and improve the distance for that node.
So I would suggest extending the tuples on the heap with one more information: the weight of the last edge that brought us to that node. When the node is accepted, then this edge becomes part of the spanning tree, and its weight should then be added to an accumulating total.
Here is the adapted code. The changes have accompanying comments:
def dijkstra(graph, starting_vertex):
distances = {vertex: float('infinity') for vertex in graph}
distances[starting_vertex] = 0
graph_distance = 0 # this will be returned
pq = [(0, 0, starting_vertex)] # middle value is edge weight
while len(pq) > 0:
current_distance, edge_weight, current_vertex = heapq.heappop(pq)
if current_distance > distances[current_vertex]:
continue
graph_distance += edge_weight # accumulate
for neighbor, weight in graph[current_vertex].items():
distance = current_distance + weight
if distance < distances[neighbor]:
distances[neighbor] = distance
heapq.heappush(pq, (distance, weight, neighbor)) # include weight
return distances, graph_distance # ...return it
Given the following function, what would be the correct and pythonic way to archiving the same (and faster) result?
My code is not efficient and I believe I'm missing something that is staring at me.
The idea is to find a pattern that is [[A,B],[A,C],[C,B]] without having to generate additional permutations (since this will result in a higher processing time for the comparisons).
The length of the dictionary fed into find_path in real-life would be approximately 10,000, so having to iterate over that amount with the current code version below is not efficient.
from time import perf_counter
from typing import List, Generator, Dict
def find_path(data: Dict) -> Generator:
for first_pair in data:
pair1: List[str] = first_pair.split("/")
for second_pair in data:
pair2: List[str] = second_pair.split("/")
if pair2[0] == pair1[0] and pair2[1] != pair1[1]:
for third_pair in data:
pair3: List[str] = third_pair.split("/")
if pair3[0] == pair2[1] and pair3[1] == pair1[1]:
amount_pair_1: int = data.get(first_pair)[
"amount"
]
id_pair_1: int = data.get(first_pair)["id"]
amount_pair_2: int = data.get(second_pair)[
"amount"
]
id_pair_2: int = data.get(second_pair)["id"]
amount_pair_3: int = data.get(third_pair)[
"amount"
]
id_pair_3: int = data.get(third_pair)["id"]
yield (
pair1,
amount_pair_1,
id_pair_1,
pair2,
amount_pair_2,
id_pair_2,
pair3,
amount_pair_3,
id_pair_3,
)
raw_data = {
"EZ/TC": {"id": 1, "amount": 9},
"LM/TH": {"id": 2, "amount": 8},
"CD/EH": {"id": 3, "amount": 7},
"EH/TC": {"id": 4, "amount": 6},
"LM/TC": {"id": 5, "amount": 5},
"CD/TC": {"id": 6, "amount": 4},
"BT/TH": {"id": 7, "amount": 3},
"BT/TX": {"id": 8, "amount": 2},
"TX/TH": {"id": 9, "amount": 1},
}
processed_data = list(find_path(raw_data))
for i in processed_data:
print(("The path to traverse is:", i))
>> ('The path to traverse is:', (['CD', 'TC'], 4, 6, ['CD', 'EH'], 7, 3, ['EH', 'TC'], 6, 4))
>> ('The path to traverse is:', (['BT', 'TH'], 3, 7, ['BT', 'TX'], 2, 8, ['TX', 'TH'], 1, 9))
>> ('Time to complete', 5.748599869548343e-05)
# Timing for a simple ref., as mentioned above, the raw_data is a dict containing about 10,000 keys
You can't do that with this representation of the graph. This algorithm has O(|E|^3) time complexity. It is a good idea to store edges as array of lists, each list will store only adjacent vertexes. And then it is easy to do what you need. Fortunately, you can re-represent graph in O(|E|) time.
How to do that
We will store graph as array of vertices (but in this case because of string vertex-values we take a dictionary). We want to access in all neighbours by a vertex. Let's do that -- we will store in the array lists of all neighbours of the given vertex.
Now we just need to construct our structure by set of edges (aka row_data).
How to add an edge in graph? Easy! We should find a vertex from in our array and add a vertex to to the list of it's neighbours
So, the construct_graph function could be like:
def construct_graph(raw_data): # here we will change representation
graph = defaultdict(list) # our graph
for pair in raw_data: # go through every edge
u, v = pair.split("/") # get from and to vertexes
graph[u].append(v) # and add this edge in our structure
return graph # return our new graph to other functions
How to find path length 2
We will use dfs on our graph.
def dfs(g, u, dist): # this is a simple dfs function
if dist == 2: # we has a 'dist' from our start
return [u] # and if we found already answer, return it
for v in g.get(u, []): # otherwise check all neighbours of current vertex
ans = dfs(g, v, dist + 1) # run dfs in every neighbour with dist+1
if ans: # and if that dfs found something
ans.append(u) # store it in ouy answer
return ans # and return it
return [] # otherwise we found nothing
And then we just try it for every vertex.
def main():
graph = construct_graph(raw_data)
for v in graph.keys(): # here we will try to find path
ans = dfs(graph, v, 0) # starting with 0 dist
if ans: # and if we found something
print(list(reversed(ans))) # return it, but answer will be reversed
I have a file including lines as follows,
finalInjectionList is input file: [0, 2, 3] [0, 2, 3, 4] [0, 3] [1, 2, 4] [2, 3] [2, 3, 4]
Here [0, 2, 3, 4] and [1, 2, 4] are the best supersets for my problem and I want to write them to an outputfile. Because those are supersets of some other elements and NOT subsets of any line.
my code:
import ast
import itertools
def get_data(filename):
with open(filename, 'r') as fi:
data = fi.readlines()
return data
def get_ast_set(line):
return set(ast.literal_eval(line))
def check_infile(datafile, savefile):
list1 = [get_ast_set(row) for row in get_data(datafile)]
print(list1)
outlist = []
#for i in range(len(list1)):
for a, b in itertools.combinations(list1, 2):
if a.issuperset(b):
with open(savefile, 'a') as fo:
fo.writelines(str(a))
if __name__ == "__main__":
datafile = str("./finalInjectionList")
savefile = str("./filteredSets" )
check_infile(datafile, savefile)
My code writes all supersets, e.g {2, 3, 4} also. But {0, 2, 3, 4} covers {2, 3, 4} already, so I do not want to write {2, 3, 4} to output file.
Is there any suggestion?
Your logic in the for loop with itertools.combinations is a bit flawed, as it would create a combination ((2,3,4} , (2,3)), where (2,3,4) is the superset.
I would approach the problem by removing items from the list if they are a subset of another item.
import itertools
import ast
with open(r"C:\Users\%USERNAME%\Desktop\test.txt", 'r') as f:
data = f.readlines()
data = [d.replace('\n','') for d in data]
data = [set(ast.literal_eval(d)) for d in data]
data.sort(key=len)
data1 = data
for d in data:
flag = 0
for d1 in data1:
print(d, d1)
if d == d1:
print('both sets are same')
continue
if d.issubset(d1):
print(str(d) + ' is a subset of ' + str(d1))
flag = 1
break
else:
print(str(d) + ' is not a subset of ' + str(d1))
if flag == 1:
# if the set is a subset of another set, remove it
data1 = [d1 for d1 in data1 if d1 != d]
print('set: ',data1) # data1 will contain your result at the end of the loop
With input:
0, 2, 3
0, 2, 3, 4
0, 3
1, 2, 4
2, 3
2, 3, 4
The output will be
[{1, 2, 4}, {0, 2, 3, 4}]
which can be written to the file
Solved by modifying routine check_infile
import ast
import itertools
# A union by rank and path compression based
# program to detect cycle in a graph
from collections import defaultdict
def findparent(d, node):
"""Goes through chain of parents, until we reach node which is its own parent
Meaning, no node has it has a subset"""
if d[node] == node:
return node
else:
return findparent(d, d[node])
def get_data(filename):
with open(filename, 'r') as fi:
data = fi.readlines()
return data
def get_ast_set(line):
return set(ast.literal_eval(line))
def check_infile(datafile, savefile):
"""Find minimum number of supersets as follows:
1) identify superset of each set
2) Go through superset chains (findparents) to find set of nodes which are supersets (roots) """
list1 = [get_ast_set(row) for row in get_data(datafile)]
print(list1)
outlist = []
n = len(list1)
# Initially each node is its own parent (i.e. include self as superset)
# Here parent means superset
parents = {u:u for u in range(n)}
for u in range(n):
a = list1[u]
for v in range(u+1, n):
b = list1[v]
if a.issuperset(b):
parents[v] = u # index u is superset of v
elif b.issuperset(a):
parents[u] = v # index v is superset of u
# Print root nodes
roots = set()
for u in range(n):
roots.add(findparent(parents, u))
with open(savefile, 'w') as fo:
for i in roots:
fo.write(str(list1[i]))
fo.write('\n')
if __name__ == "__main__":
datafile = str("./finalInjectionList.txt")
savefile = str("./filteredSets.txt" )
check_infile(datafile, savefile)
Test File (finalInjectionList.txt)
[0, 2, 3]
[0, 2, 3, 4]
[0, 3]
[1, 2, 4]
[2, 3]
[2, 3, 4]
Output File (filteredSets.txt)
{0, 2, 3, 4}
{1, 2, 4}
I am reading about association analysis in book titled Machine learning in action. Following code is given in book
The k-2 thing may be a little confusing. Let’s look at that a little
further. When you were creating {0,1} {0,2}, {1,2} from {0}, {1}, {2},
you just combined items. Now, what if you want to use {0,1} {0,2},
{1,2} to create a three-item set? If you did the union of every set,
you’d get {0, 1, 2}, {0, 1, 2}, {0, 1, 2}. That’s right. It’s the same
set three times. Now you have to scan through the list of three-item
sets to get only unique values. You’re trying to keep the number of
times you go through the lists to a minimum. Now, if you compared the
first element {0,1} {0,2}, {1,2} and only took the union of those that
had the same first item, what would you have? {0, 1, 2} just one time.
Now you don’t have to go through the list looking for unique values.
def aprioriGen(Lk, k): #creates Ck
retList = []
lenLk = len(Lk)
for i in range(lenLk):
for j in range(i+1, lenLk):
L1 = list(Lk[i])[:k-2]; L2 = list(Lk[j])[:k-2] # Join sets if first k-2 items are equal
L1.sort(); L2.sort()
if L1==L2:
retList.append(Lk[i] | Lk[j])
return retLis
Suppose i am calling above function
Lk = [frozenset({2, 3}), frozenset({3, 5}), frozenset({2, 5}), frozenset({1, 3})]
k = 3
aprioriGen(Lk,3)
I am geting following output
[frozenset({2, 3, 5})]
I think there is bug in above logic since we are missing other combinations like {1,2,3}, {1,3,5}. Isn't it? Is my understanding right?
I think you are following the below link, Output set depends on the minSupport what we pass.
http://adataanalyst.com/machine-learning/apriori-algorithm-python-3-0/
If we reduce the minSupport value to 0.2, we get all sets.
Below is the complete code
# -*- coding: utf-8 -*-
"""
Created on Mon Dec 31 16:57:26 2018
#author: rponnurx
"""
from numpy import *
def loadDataSet():
return [[1, 3, 4], [2, 3, 5], [1, 2, 3, 5], [2, 5]]
def createC1(dataSet):
C1 = []
for transaction in dataSet:
for item in transaction:
if not [item] in C1:
C1.append([item])
C1.sort()
return list(map(frozenset, C1))#use frozen set so we
#can use it as a key in a dict
def scanD(D, Ck, minSupport):
ssCnt = {}
for tid in D:
for can in Ck:
if can.issubset(tid):
if not can in ssCnt: ssCnt[can]=1
else: ssCnt[can] += 1
numItems = float(len(D))
retList = []
supportData = {}
for key in ssCnt:
support = ssCnt[key]/numItems
if support >= minSupport:
retList.insert(0,key)
supportData[key] = support
return retList, supportData
dataSet = loadDataSet()
print(dataSet)
C1 = createC1(dataSet)
print(C1)
#D is a dataset in the setform.
D = list(map(set,dataSet))
print(D)
L1,suppDat0 = scanD(D,C1,0.5)
print(L1)
def aprioriGen(Lk, k): #creates Ck
retList = []
print("Lk")
print(Lk)
lenLk = len(Lk)
for i in range(lenLk):
for j in range(i+1, lenLk):
L1 = list(Lk[i])[:k-2]; L2 = list(Lk[j])[:k-2]
L1.sort(); L2.sort()
if L1==L2: #if first k-2 elements are equal
retList.append(Lk[i] | Lk[j]) #set union
return retList
def apriori(dataSet, minSupport = 0.5):
C1 = createC1(dataSet)
D = list(map(set, dataSet))
L1, supportData = scanD(D, C1, minSupport)
L = [L1]
k = 2
while (len(L[k-2]) > 0):
Ck = aprioriGen(L[k-2], k)
Lk, supK = scanD(D, Ck, minSupport)#scan DB to get Lk
supportData.update(supK)
L.append(Lk)
k += 1
return L, supportData
L,suppData = apriori(dataSet,0.2)
print(L)
Output:
[[frozenset({5}), frozenset({2}), frozenset({4}), frozenset({3}), frozenset({1})], [frozenset({1, 2}), frozenset({1, 5}), frozenset({2, 3}), frozenset({3, 5}), frozenset({2, 5}), frozenset({1, 3}), frozenset({1, 4}), frozenset({3, 4})], [frozenset({1, 3, 5}), frozenset({1, 2, 3}), frozenset({1, 2, 5}), frozenset({2, 3, 5}), frozenset({1, 3, 4})], [frozenset({1, 2, 3, 5})], []]
Thanks,
Rajeswari Ponnuru
I coded a solution for DFS non-recursive, but i can't modify it to make a topological sort:
def dfs(graph,start):
path = []
stack = [start]
while stack != []:
v = stack.pop()
if v not in path: path.append(v)
for w in reversed(graph[v]):
if w not in path and not w in stack:
stack.append(w)
return path
Any ideas how to modify it?
With the recursive version i can easy have the sorting:
def dfs_rec(graph,start,path):
path = path + [start]
for edge in graph[start]:
if edge not in path:
path = dfs_rec(graph, edge,path)
print start
return path
Input:
>>> graph = {
1: [2, 3],
2: [4, 5, 6],
3: [4,6],
4: [5,6],
5: [6],
6: []
}
>>> dfs_rec(graph,1,[])
6
5
4
2
3
1
[1, 2, 4, 5, 6, 3]
>>> dfs(graph,1)
[1, 2, 4, 5, 6, 3]
>>> graph = {
1: [3],
3: [5,6],
5: [4],
4: [7],
7: [],
6: []
}
>>> print dfs_rec(graph,1,[])
7
4
5
6
3
1
[1, 3, 5, 4, 7, 6]
>>> print dfs(graph,1)
[1, 3, 5, 4, 7, 6]
so i need to get this ordering in the non-recursive also.
Non-recursive solution:
I think that this also could be the solution, mark me if i am wrong.
def dfs(graph,start):
path = []
stack = [start]
label = len(graph)
result = {}
while stack != []:
#this for loop could be done in other ways also
for element in stack:
if element not in result:
result[element] = label
label = label - 1
v = stack.pop()
if v not in path: path.append(v)
for w in reversed(graph[v]):
if w not in path and not w in stack:
stack.append(w)
result = {v:k for k, v in result.items()}
return path,result
Input:
graph = { 1: [3], 3:[5,6] , 5:[4] , 4:[7], 7:[],6:[]}
print dfs(graph,1)
Output:
([1, 3, 5, 4, 7, 6], {1: 7, 2: 4, 3: 5, 4: 6, 5: 3, 6: 1})
1
/
3
/\
5 6
/
4
/
7
FWIW, here is some code I worked up for a non-recursive topological sort.
from collections import defaultdict, namedtuple
from itertools import islice
Results = namedtuple('Results', ['sorted', 'cyclic'])
def topological_sort(dependency_pairs):
'Sort values subject to dependency constraints'
num_heads = defaultdict(int) # num arrows pointing in
tails = defaultdict(list) # list of arrows going out
heads = [] # unique list of heads in order first seen
for h, t in dependency_pairs:
num_heads[t] += 1
if h in tails:
tails[h].append(t)
else:
tails[h] = [t]
heads.append(h)
ordered = [h for h in heads if h not in num_heads]
for h in ordered:
for t in tails[h]:
num_heads[t] -= 1
if not num_heads[t]:
ordered.append(t)
cyclic = [n for n, heads in num_heads.items() if heads]
return Results(ordered, cyclic)
if __name__ == '__main__':
print( topological_sort('aa'.split()) )
print( topological_sort('ah bg cf ch di ed fb fg hd he ib'.split()) )
from collections import defaultdict, deque
class Graph:
def __init__(self, directed=False, nodes=None, edges=None):
self.graph = defaultdict(list)
self.directed = directed
self.add_nodes(nodes)
self.add_edges(edges)
#property
def nodes(self):
if not self.directed:
return list(self.graph.keys())
elif self.directed:
nodes = set()
nodes.update(self.graph.keys())
for node in self.graph.keys():
for neighbor in self.graph[node]:
nodes.add(neighbor)
return list(nodes)
def add_node(self, node):
if node not in self.nodes:
self.graph[node] = list()
def add_nodes(self, nodes):
if nodes is None:
return None
for node in nodes:
self.add_node(node)
#property
def edges(self):
edges = list()
for source, neighbors in self.graph.items():
for neighbor in neighbors:
edges.append((source, neighbor))
return edges
def add_edge(self, edge):
node1, node2 = edge
self.graph[node1].append(node2)
if not self.directed:
self.graph[node2].append(node1)
def add_edges(self, edges):
if edges is None:
return None
for edge in edges:
self.add_edge(edge)
def topological_util(self, node, visited, label):
visited[node] = True
for edge in self.graph[node]:
if not visited[edge]:
self.topological_util(edge, visited, label)
label.appendleft(node)
def topological_sort(self):
visited = dict.fromkeys(self.nodes, False)
# store all nodes in topological order, the index is the position
label = deque()
for node in self.nodes:
if not visited[node]:
self.topological_util(node, visited, label)
return label
#this algorithm gives the logic of topological sorting..if u want to run this
#give adjacency mat of your choice and this algorithm works on graph elements ranging from 0 to n
a=[[0,0,1,0,0,0],[0,0,1,0,0,0],[0,0,0,1,1,0],[0,0,0,0,1,0],[0,0,0,0,0,0],[0,0,1,0,0,0]]
vis=[0 for i in range(0,len(a))]
s=[]
orderstack=[]#stores the reverse order of topological sorted elements
def dfs_for_topological_sorting(a,vis,i):
vis[i]=1
x=0
for j in range(0,len(a[0])):
if(a[i][j]==1 and vis[j]==0):
x=1
s.append(j)
#print(s)
dfs_for_topological_sorting(a,vis,j)
if(x==0 and len(s)!=0):
orderstack.append(s[len(s)-1])
if(len(s)>0):
dfs_for_topological_sorting(a,vis,s.pop())
for i in range(0,len(a)):
if(i not in orderstack):
s.append(i)
dfs_for_topological_sorting(a,vis,i)
print(orderstack[len(orderstack)-1::-1])
from collections import defaultdict # importing defaultdict
def topological_sort(graph,b,a): # defining function
T = []
visited = []
in_degree = []
for i in range(a+1):
in_degree.append(0) # initialising the degree of each vertex =0
visited.append(0) # initialising all the vertics unvisited
for i in range(1,a+1):
for j in graph[i]:
in_degree[j] = in_degree[j] + 1 # now assigning and incrementing
Queue=[] # the degree of each vertex acc.
for i in range(1,a+1):
if in_degree[i]==0:
Queue.append(i) # appending those vertices which have zero
visited[i] = 1 # degree and making it as visited
while Queue :
vertex = Queue.pop(Queue.index(min(Queue))) # popping each element in
T.append(vertex) # lexicographical order and
for j in graph[vertex]: # appending to T
if visited[j]==0:
in_degree[j] = in_degree[j] - 1
if in_degree[j] == 0:
Queue.append(j) #according to each popped vertex
visited[j] = 1 #as key in graph check whether
return T #in list corresponding to key
# as value,is it not visited and
#decreasing its value when it
#becomes zero,append it to queue
#and mark it as visited
graph=defaultdict(list)
a,b=list(map(int,input().split())) #a=no. of vertices
for i in range(b): #b=no. of edges
p,q=list(map(int,input().split()))
graph[p].append(q) # we take input in graph as DAG
ss=topological_sort(graph,b,a) # calling function
for i in ss:
print(i,end=" ")
'''Input
5 6
1 2
1 3
2 3
2 4
3 4
3 5
Your Code's Output
1 2 3 4 5
Expected Correct Output
1 2 3 4 5 '''