I want to optimise my model that finds the highest possible overlap (assignment) of items (probes) against a sequence (sequence). I have the starting and end positions of all items and can thus build up my model as follows:
import pyomo
import pyomo.environ as pe
import pyomo.opt as po
sequence = [0, 1, 2, 3, 4, 5]
probes = ["a", "b", "c"]
probe_starts = {"a": 0, "b": 2, "c": 3}
probe_ends = {"a": 2, "b": 4, "c": 5}
# Model definition
model = pe.ConcreteModel()
model.sequence = pe.Set(initialize=sequence)
model.probes = pe.Set(initialize=probes)
model.starts = pe.Param(model.probes, initialize=probe_starts)
model.ends = pe.Param(model.probes, initialize=probe_ends)
model.assignment = pe.Var(model.sequence, model.probes, domain=pe.Binary)
# Objective
expr = sum([model.assignment[j, i] for j in model.sequence for i in model.probes])
model.objective = pe.Objective(expr=expr, sense=pe.maximize)
I now have the following three constraints:
Items can only bind one at a time (no overlapping items)
Because items have a start and end position they are limited in only getting assigned after their respective starts and ends
If items are assigned, they have to bind in their entirety spanning from their start to their end
# One probe per sequence position
model.one_probe_bound = pe.ConstraintList()
for s in model.sequence:
model.one_probe_bound.add(sum(model.assignment[s, p] for p in model.probes) <= 1)
# No assignment before/after start/end
model.define_length = pe.ConstraintList()
for s in model.sequence:
for p in model.probes:
if s < model.starts[p]:
model.define_length.add(model.assignment[s, p] == 0)
if s > model.ends[p]:
model.define_length.add(model.assignment[s, p] == 0)
Both of the constraints above work without issue but I can't find a way to input the logical or from my third condition. I tried to use the disjunction as described in this stackoverflow answer:
# Only allow full assignment or none
def disjunct_rule(b, p, i):
m = b.model()
expr = sum(m.assignment[s, p] for s in m.sequence)
if i:
return expr == m.ends[p] - m.starts[p]
else:
return expr == 0
def disjunction_rule(m, p):
return [m.disjunct[p, i] for i in [True, False]]
def xfrm(m):
pe.TransformationFactory("gdp.bigm").apply_to(m)
model.disjunct = pyomo.gdp.Disjunct(model.probes, [True, False], rule=disjunct_rule)
model.disjunction = pyomo.gdp.Disjunction(model.probes, rule=disjunction_rule)
model.xfrm = pe.BuildAction(rule=xfrm)
Looking at the matrix representation of model.assignment with sequence along the columns and probes along the rows, I get the following:
array([[1, 1, 1, 0, 0, 0],
[0, 0, 0, 1, 1, 0],
[0, 0, 0, 0, 0, 1]])
As can be seen above, I get assignments that aren't spanning the entire length of the item (e.g. c / 3rd item is only assigned at the last position whereas it should have to bind at the two previous ones too. The only valid solution I can see in this toy example is the following:
array([[1, 1, 1, 0, 0, 0],
[0, 0, 0, 0, 0, 0],
[0, 0, 0, 1, 1, 1]])
Where items a and c are selected in their entirety and b isn't selected at all. This way we have all constraints matched. The solver I used was glpk. Thanks in advance for any suggestions.
Here is a cut at this....
The other way I mentioned would be to introduce another binary variable for each point in sequence and control that (somehow) via the assignment of probes. The method below should be much more efficient than that.
Try this out with a larger dataset... The current only has 1 feasible solution. Also, I assumed that better solutions would use fewer probes, so I re-wrote the objective to that.
The basis of this solution is the constraint that you must connect to the start once (constraint 1) and the end (constraint 2) once. And any intermediate connections must be consistent (constraint 3).
I used some sub-setting in a few spots where needed.
Code:
# model to make contiguous connections across a sequence
# using as few connections (probes) as possible
import pyomo
import pyomo.environ as pe
sequence = [0, 1, 2, 3, 4, 5]
probes = ["a", "b", "c"]
probe_starts = {"a": 0, "b": 2, "c": 3}
probe_ends = {"a": 2, "b": 4, "c": 5}
# Model definition
model = pe.ConcreteModel()
model.sequence = pe.Set(initialize=sequence)
model.probes = pe.Set(initialize=probes)
model.starts = pe.Param(model.probes, initialize=probe_starts)
model.ends = pe.Param(model.probes, initialize=probe_ends)
model.assign = pe.Var(model.probes, domain=pe.Binary) # 1 if probe p is used...
# Objective
obj = sum(model.assign[p] for p in model.probes) # use as few as possible...?
model.objective = pe.Objective(expr=obj, sense=pe.minimize)
# Constraints
# must connect start once
model.C1 = pe.Constraint(expr=sum(model.assign[p] for p in model.probes
if model.starts[p] == sequence[0]) == 1)
# must connect end once
model.C2 = pe.Constraint(expr=sum(model.assign[p] for p in model.probes
if model.ends[p] == sequence[-1]) == 1)
# must connect any intermediate connections...
# if probe p1 is selected, must select an eligible p2 follow on
def connect(model, p1):
# create subset on the fly of legal follow-on connections
# assumption here that sequence is a sequential list of ints by using the "+1"
p2s = [p for p in model.probes if model.starts[p] == model.ends[p1] + 1]
if not p2s:
return pe.Constraint.Skip
return sum(model.assign[p2] for p2 in p2s) == model.assign[p1]
non_completing_probes = [p for p in model.probes if model.ends[p] != sequence[-1]]
model.C3 = pe.Constraint(non_completing_probes, rule=connect)
solver = pe.SolverFactory('glpk')
result = solver.solve(model)
print(result)
model.display()
Yields:
Problem:
- Name: unknown
Lower bound: 2.0
Upper bound: 2.0
Number of objectives: 1
Number of constraints: 4
Number of variables: 4
Number of nonzeros: 5
Sense: minimize
Solver:
- Status: ok
Termination condition: optimal
Statistics:
Branch and bound:
Number of bounded subproblems: 0
Number of created subproblems: 0
Error rc: 0
Time: 0.006771087646484375
Solution:
- number of solutions: 0
number of solutions displayed: 0
Model unknown
Variables:
assign : Size=3, Index=probes
Key : Lower : Value : Upper : Fixed : Stale : Domain
a : 0 : 1.0 : 1 : False : False : Binary
b : 0 : 0.0 : 1 : False : False : Binary
c : 0 : 1.0 : 1 : False : False : Binary
Objectives:
objective : Size=1, Index=None, Active=True
Key : Active : Value
None : True : 2.0
Constraints:
C1 : Size=1
Key : Lower : Body : Upper
None : 1.0 : 1.0 : 1.0
C2 : Size=1
Key : Lower : Body : Upper
None : 1.0 : 1.0 : 1.0
C3 : Size=1
Key : Lower : Body : Upper
a : 0.0 : 0.0 : 0.0
Related
I am trying to write a program which finds duplicate coordinates (x, y, z) in a 3D array. The script should mark one or multiple duplicate points with a given tolerance - one point could have more than one duplicate. I found lots of different approaches which among others use sorting approaches.
To try the code I created the following test data set:
21.9799629872016 57.4044376777929 0
22.7807110172432 57.6921361034533 0
28.660840151287 61.5676757599822 0
28.6608401512 61.56767575998 0
30.6654296288019 56.2221038199424 0
20.3752036442253 49.1392209993897 0
32.8036584048178 43.927288357851 0
35.8105426210901 51.9456462679106 0
40.8888359641279 58.6944308422108 0
40.88883596412 70.6944308422108 0
41.0892949118794 58.1598736482068 0
39.6860822776189 64.775018924006 0
39.1515250836149 64.8418385732565 0
8.21402748063493 63.5054455882466 0
8.2140275006 63.5074455882 0
8.21404548063493 63.5064455882466 0
8.2143214806 63.5084455882 0
The code I came up with is:
# given tolerance
tol = 0.01
# initialize empty list for the found duplicates
duplicates = []
# loop over all nodes
for i in range(0,len(nodes)):
# current node
curr_node = nodes[i]
# create difference vector
diff = nodes - curr_node
# get all duplicate indices (the node itself is found as well)
condition = np.where((abs(diff[:,0])<tol) & (abs(diff[:,1])<tol) & (abs(diff[:,2])<tol))
# check if more than one entry is present. If larger than 1, duplicate points exist
if len(condition[0]) > 1:
# loop over all found duplicate points
for j in range(0,len(condition[0])):
# add duplicate if not already marked as duplicate
if j>0 and condition[0][j] not in duplicates:
duplicates.append(condition[0][j] )
This code returns what I am expecting:
duplicates = [3, 14, 15, 16]
However, the code is very slow. For 300,000 points it takes about 10 minutes. I am wondering if there is any faster way to implement this.
You can place points in a grid of tolerance-sized cubes. Then, for each point, you only need to check the points from the same cube + 26 adjacent ones instead of all other points.
# compute the grid
for p in points:
cube = (
int(p[0] / tolerance),
int(p[1] / tolerance),
int(p[2] / tolerance))
grid[cube].append(p)
# check
for p in points:
cube = as above
for adj in adjacent_cubes(cube)
for p2 in grid[adj]
check_distance(p, p2)
You could sort the nodes upfront, to reduce the amount of loops needed:
import timeit
import random
nodes = [
[21.9799629872016, 57.4044376777929, 0],
[22.7807110172432, 57.6921361034533, 0],
[28.660840151287, 61.5676757599822, 0], [28.6608401512, 61.56767575998, 0],
[30.6654296288019, 56.2221038199424, 0],
[20.3752036442253, 49.1392209993897, 0],
[32.8036584048178, 43.927288357851, 0],
[35.8105426210901, 51.9456462679106, 0],
[40.8888359641279, 58.6944308422108, 0],
[40.88883596412, 70.6944308422108, 0],
[41.0892949118794, 58.1598736482068, 0],
[39.6860822776189, 64.775018924006, 0],
[39.1515250836149, 64.8418385732565, 0],
[8.21402748063493, 63.5054455882466, 0], [8.2140275006, 63.5074455882, 0],
[8.21404548063493, 63.5064455882466, 0], [8.2143214806, 63.5084455882, 0]
]
duplicates = [3, 14, 15, 16]
assertList = [n for i, n in enumerate(nodes) if i in duplicates]
def new(nodes, tol=0.01):
print(f"Searching duplicates in {len(nodes)} nodes")
coordinateLen = range(len(nodes[0]))
nodes.sort()
last = nodes[0]
duplicates = []
for i, node in enumerate(nodes[1:]):
if not all(0 <= node[idx] - last[idx] < tol for idx in coordinateLen):
last = node
else:
duplicates.append(node)
print(f"Found: {len(duplicates)} duplicates")
return duplicates
# generate random numbers!
randomNodes = [
[random.uniform(0, 100),
random.uniform(0, 100),
random.uniform(0, 1)] for _ in range(300000)
]
# make sure there are at least the same 4 duplicates!
randomNodes += nodes
for i, lst in enumerate((nodes, randomNodes)):
for func in ("new", ):
t1 = timeit.Timer(f"{func}({lst})", f"from __main__ import {func}")
# verify values of found duplicates are [3, 14, 15, 16] !!
if i == 0:
print(all(x for x in new(nodes) if x in assertList))
print(f"{func} took: {t1.timeit(number=10)} seconds")
print("")
Out:
Searching duplicates in 17 nodes
Found: 4 duplicates
True
....
new took: 0.00034904800000001845 seconds
Searching duplicates in 300017 nodes
Found: 4 duplicates
...
new took: 14.316181525000001 seconds
I'm having issue with this function question:
x = [[0,0,0,0,0],[0,0,0,0,1],[0,1,0,0,0]]
Function: Book(seat) #assuming the seat is A5
The function assumes the seat is valid in the format A, B and C. the function needs to transform the letter part of seat to an integer A = 0, B = 1 and C = 2. The string digit also needs to be changed to "1" → 0, "2" → 1, "3" → 2, "4" → 3 and "5" → 4. These can be used to check if the chair in the x list-of-lists is already booked 1 or not 0. If it is not booked, then it should be changed to booked and the function should return True else it should return False.
My solution is
a = {"A":[0,0,0,0,0], "B":[0,0,0,0,1], "C":[0,1,0,0,],}
rowIndex = ["A","B","C"]
columnIndex = [1,2,3,4,5]
def book(seat):
row = seat[0]
column = seat[1]
while row in rowIndex and column in columnIndex:
if x[row][column-1] == 0:
return True
else: return False
It output False (seat already booked) in respective of the seat I book. I think there is an issue with my code but can't seems to figure it out.
There are a number of problems with your code for the function:
There is no x variable defined — you called it a in the
a = {"A":[0,0,0,0,0], "B":[0,0,0,0,1], "C":[0,1,0,0,],}
After the
row = seat[0]
column = seat[1]
you then test the values in the following:
while row in rowIndex and column in columnIndex:
which will prevent any of the rest of the code from executing unless it's True.
What you need inside of the while to iterate through all the possibilities would require two for loops, one nested inside the other. However…
You don't need to loop at all as illustrated below.
BOOKED = 1
x = [[0,0,0,0,0], [0,0,0,0,1], [0,1,0,0,0]]
letter_to_index = {"A": 0, "B": 1, "C": 2}
digit_to_index = {"1": 0, "2": 1, "3": 2, "4": 3, "5": 4}
def book(seat):
# Convert each seat character to integer.
row = letter_to_index[seat[0]]
col = digit_to_index[seat[1]]
if x[row][col] == BOOKED:
return False
else:
# Book the seat and return True
x[row][col] = BOOKED
return True
if __name__ == '__main__':
print(book('A5')) # -> True
# Try doing it again.
print(book('A5')) # -> False
Here is a simpler implementation of your code. You don't need to use loops. You have a dictionary. You can lookup the dictionary in a much simpler way.
a = {"A":[0,0,0,0,0], "B":[0,0,0,0,1], "C":[0,1,0,0,],}
def book(seat):
r,c = seat #this allows A to be r and 5 to be C
#check if value of r in keys of a
#also check if seat # is within length of seats for key
if r in a and int(c) <= len(a[r]):
#if valid request, then check if seat already booked
#if not, set seat to booked by setting value to 1
#return True
#if already booked, return False
if a[r][int(c)-1] == 0:
a[r][int(c)-1] = 1
return True
else:
return False
# if not a value request, send appropriate message
else:
return 'invalid request'
print ('A5', book('A5'))
print ('C2', book('C2'))
print ('A7', book('A7'))
print (a)
Output of this will be:
A5 True
C2 False
A7 invalid request
{'A': [0, 0, 0, 0, 1], 'B': [0, 0, 0, 0, 1], 'C': [0, 1, 0, 0]}
I have a problem with formulating constraints for 3d indexed variables with Pyomo:
I have the following variable list:
1 Var Declarations
E : Size=6, Index=N
Key : Lower : Value : Upper : Fixed : Stale : Domain
(0, 0, 2) : 0 : 10 : None : False : False : NonNegativeReals
(0, 0, 3) : 0 : 10 : None : False : False : NonNegativeReals
(0, 2, 0) : 0 : 10 : None : False : False : NonNegativeReals
(0, 3, 0) : 0 : 10 : None : False : False : NonNegativeReals
(1, 3, 1) : 0 : 10 : None : False : False : NonNegativeReals
(1, 4, 1) : 0 : 10 : None : False : False : NonNegativeReals
they are essentially values in a 3D array (2x5x5) which can take a value other then 0. I try to optimize them, with some constraints:
- the sum value of the variables in a row should be maximized
- the sum value of the variables in a column should take a certain value
My question is regarded to both constraints: I tried to formulate the first constraint like this:
def max_perf_rule(model, a, b):
return sum(model.E[a,b,c] for c in range(5) if (a,b,c) in model.N) <= H[a,b]
model.max_perf = Constraint(range(2), range(5), rule = max_perf_rule)
, where model.N= [(0, 0, 2), (0, 0, 3), (0, 2, 0), (0, 3, 0), (1, 3, 1), (1, 4, 1)]
The variables are originally given by model.N (list of 3d tuples) but I need the two "range(2)" and "range(5)" as inputs in this constraint in order to be able to refer to the proper row.
No matter what I try I cannot create the desired constraints. They should look something like this:
max_perf : Size=10, Index=max_perf_index, Active=True
(0, 0) : E[0,0,2] + E[0,0,3] : <= 0.0
(0, 2) : E[0,2,0] : <= 2688.0
(0, 3) : E[0,3,0] : <= 896.0
(1, 3) : E[1,3,1] : <= 448.0
(1, 4) : E[1,4,1] : <= 9999999.0
...but I keep getting the following error:
"ERROR: Constructing component 'max_perf' from data=None failed: ValueError:
Invalid constraint expression. The constraint expression resolved to a
trivial Boolean (True) instead of a Pyomo object. Please modify your rule
to return Constraint.Feasible instead of True.
Error thrown for Constraint 'max_perf[0,1]'"
I have no idea what this is; I even tried to reproduce the situation with a dictionary instead of model.E and it worked nicely.
Do you have any solution for the problem?
Thanks in advance!
The problem is that with the way you set up your indexing sets, not every combination of range(2) x range(5) x range(5) appears in model.N. For some combinations the sum in the constraint rule will not have any terms and therefore evaluate to a constant value of 0. The easiest way to get around this would be to add a check in your constraint rule to make sure the sum is not empty:
def max_perf_rule(model, a, b):
temp = sum(model.E[a,b,c] for c in range(5) if (a,b,c) in model.N)
if type(temp) is int: # This will be true if the sum was empty
return Constraint.Skip
return temp <= H[a,b]
model.max_perf = Constraint(range(2), range(5), rule = max_perf_rule)
Suppose I have a matrix that is 100000 x 100
import numpy as np
mat = np.random.randint(2, size=(100000,100))
I wish to go through this matrix, and if each row contains entirely either 1 or 0 I wish to change a state variable to that value. If the state is not changed, I wish to set the entire row the value of state. The initial value of state is 0.
Naively in a for loop this can be done as follows
state = 0
for row in mat:
if set(row) == {1}:
state = 1
elif set(row) == {0}:
state = 0
else:
row[:] = state
However, when the size of the matrix increases this takes an impractical amount of time. Could someone point me in the direction in how to leverage numpy to vectorize this loop and speed it up?
So for a sample input
array([[0, 1, 0],
[0, 0, 1],
[1, 1, 1],
[0, 0, 1],
[0, 0, 1]])
The expected output in this case would be
array([[0, 0, 0],
[0, 0, 0],
[1, 1, 1],
[1, 1, 1],
[1, 1, 1]])
Approach #1: NumPy-Vectorized
Here's a vectorized one -
def check_all(a, state): # a is input matrix/array
# Get zeros and ones all masks
zm = (a==0).all(1)
om = (a==1).all(1)
# "Attach" boundaries with False values at the start of these masks.
# These will be used to detect rising edges (as indices) on these masks.
zma = np.r_[False,zm]
oma = np.r_[False,om]
omi = np.flatnonzero(oma[:-1] < oma[1:])
zmi = np.flatnonzero(zma[:-1] < zma[1:])
# Group the indices and the signatures (values as 1s and -1s)
ai = np.r_[omi,zmi]
av = np.r_[np.ones(len(omi),dtype=int),-np.ones(len(zmi),dtype=int)]
# Sort the grouped-indices, thus we would know the positions
# of these group starts. Then index into the signatures/values
# and indices with those, giving us the information on how these signatures
# occur through the length of the input
sidx = ai.argsort()
val,aidx = av[sidx],ai[sidx]
# The identical consecutive signatures are to be removed
mask = np.r_[True,val[:-1]!=val[1:]]
v,i = val[mask],aidx[mask]
# Also, note that we are assigning all 1s as +1 signature and all 0s as -1
# So, in case the starting signature is a 0, assign a value of 0
if v[0]==-1:
v[0] = 0
# Initialize 1D o/p array, which stores the signatures as +1s and -1s.
# The bigger level idea is that performing cumsum at the end would give us the
# desired 1D output
out1d = np.zeros(len(a),dtype=a.dtype)
# Assign the values at i positions
out1d[i] = v
# Finally cumsum to get desired output
out1dc = out1d.cumsum()
# Correct the starting positions based on starting state value
out1dc[:i[0]] = state
# Convert to 2D view for mem. and perf. efficiency
out = np.broadcast_to(out1dc[:,None],a.shape)
return out
Approach #2: Numba-based
Here's another numba-based one for memory and hence perf. efficiency -
#njit(parallel=True)
def func1(zm, om, out, start_state, cur_state):
# This outputs 1D version of required output.
# Start off with the starting given state
newval = start_state
# Loop through zipped zeros-all and ones-all masks and in essence do :
# Switch between zeros and ones based on whether the other ones
# are occuring through or not, prior to the current state
for i,(z,o) in enumerate(zip(zm,om)):
if z and cur_state:
cur_state = ~cur_state
newval = 0
if o and ~cur_state:
cur_state = ~cur_state
newval = 1
out[i] = newval
return out
def check_all_numba(a, state):
# Get zeros and ones all masks
zm = (a==0).all(1)
om = (a==1).all(1)
# Decide the starting state
cur_state = zm.argmax() < om.argmax()
# Initialize 1D o/p array with given state values
out1d = np.full(len(a), fill_value=state)
func1(zm, om, out1d, state, cur_state)
# Broadcast into the 2D view for memory and perf. efficiency
return np.broadcast_to(out1d[:,None],a.shape)
You can do this without any loops by leveraging np.accumulate:
R = 5 # 100000
C = 3 # 100
mat = np.random.randint(2, size=(R,C))
print(mat) # original matrix
state = np.zeros((1,C)) # or np.ones((1,C))
mat = np.concatenate([state,mat]) # insert state row
zRows = np.isin(np.sum(mat,1),[0,C]) # all zeroes or all ones
iRows = np.arange(R+1) * zRows.astype(np.int) # base indexes
mat = mat[np.maximum.accumulate(iRows)][1:] # indirection, remove state
print(mat) # modified
#original
[[0 0 1]
[1 1 1]
[1 0 1]
[0 0 0]
[1 0 1]]
# modified
[[0 0 0]
[1 1 1]
[1 1 1]
[0 0 0]
[0 0 0]]
The way it works is by preparing an indirection array for rows that need to be changed. This is done from an np.arange of row indexes in which we set to zero the the indexes that will need replacement. Accumulating the maximum index will map each replaced row to an all-zero or all-one row before it.
For example:
[ 0, 1, 2, 3, 4, 5 ] # row indexes
[ 0, 1, 0, 0, 1, 0 ] # rows that are all zeroes or all ones (zRows)
[ 0, 1, 0, 0, 4, 0 ] # multiplied (iRows)
[ 0, 1, 1, 1, 4, 4 ] # np.maximum.accumulate
This gives us a list of indexes where row content should be taken from.
The state is represented by an extra row inserted at the beginning of the matrix before performing the operation and removed afterward.
This solution will be marginally slower for very small matrices (5x3) but it can give you a 20x speed boost for larger ones (100000x100: 0.7 second vs 14 seconds).
Here is a simple and fast numpy method:
import numpy as np
def pp():
m,n = a.shape
A = a.sum(axis=1)
A = np.where((A==0)|(A==n))[0]
if not A.size:
return np.ones_like(a) if state else np.zeros_like(a)
st = np.concatenate([np.arange(A[0]!=0), A, [m]])
v = a[st[:-1],0]
if A[0]:
v[0] = state
return np.broadcast_to(v.repeat(st[1:]-st[:-1])[:,None],(m,n))
I made some timings using this
state=0
a = (np.random.random((100000,100))<np.random.random((100000,1))).astype(int)
simple test case:
0.8655898020006134 # me
4.089095343002555 # Alain T.
2.2958932030014694 # Divakar 1
2.2178015549980046 # & 2
Like a previous problem I had earlier, I am trying to create a breadth-first search algorithm that takes a graph and outputs the vertex visit order. It takes an adjacency matrix (representing the graph) as its input and here is what I have so far.
import sys
import Queue
# Input has to be adjacency matrix or list
graphAL2 = {0 : [1,2,3],
1 : [0,3,4],
2 : [0,4,5],
3 : [0,1,5],
4 : [1,2],
5 : [2,3] }
# NEED TO FIX:
# - Final graphAL2v print is only displaying key values as 1, not iterating
# through graph and visiting each vertex
def main():
count = 0
graphAL2v = {}
for key, value in graphAL2.items():
graphAL2v[key] = 0
print(graphAL2v)
for key in graphAL2v: # each vertex v in V
if graphAL2v[key] == 0: # is marked with 0
bfs(key, count, graphAL2, graphAL2v)
print(graphAL2v)
def bfs(v, count, graphal, graphv):
count = count + 1
print('Visiting', v)
# Mark v with count and initialize queue with v
graphv[v] = count
visited = Queue.Queue()
while not visited.empty(): #queue not empty:
print('queue is not empty')
for element in graphal[v]: # each vertex w in V adjacent to front vertex
if element == 0:
count = count + 1
# mark w with count
graphal[v] = count
visited.put()
visited.get()
if __name__ == '__main__':
sys.exit(main())
The problem that I am running into is that my output
{0: 0, 1: 0, 2: 0, 3: 0, 4: 0, 5: 0}
('Visiting', 0)
('Visiting', 1)
('Visiting', 2)
('Visiting', 3)
('Visiting', 4)
('Visiting', 5)
{0: 1, 1: 1, 2: 1, 3: 1, 4: 1, 5: 1}
displays the visit order as 1 for all vertices in the list when it should be displaying the visit order as a different number for each vertex as it traverses the "graph." I believe that this error is stemming from within the while loop of the bfs() function. Any suggestions for trying to fix the code so I can achieve the desired output? I'm also not that familiar with queues in Python so any help is appreciated.
There are lots of issues in your code -
First of all, you are never putting anything in the Queue that you create, so its always empty, you need to put the v inside the queue before the while loop , that is the starting point.
Secondly, in the for loop, you are checking element == 0 , which is wrong, you need to check if graphv[element] == 0 , that is whether the element has been already visited or not.
Thirdly, in the for loop, you need to set graphv[element] = count , that signifies that you vivisted element .
You are not putting anything inside the queue with - visited.put() , you need to pass the element to put inside the Queue as parameter.
When getting back the element from the Queue, you need to assign it back to v, otherwise v would never change, v signifies the current element being iterated.
Example code -
import sys
import Queue
# Input has to be adjacency matrix or list
graphAL2 = {0 : [1,2,3],
1 : [0,3,4],
2 : [0,4,5],
3 : [0,1,5],
4 : [1,2],
5 : [2,3] }
# NEED TO FIX:
# - Final graphAL2v print is only displaying key values as 1, not iterating
# through graph and visiting each vertex
def main():
count = 0
graphAL2v = {}
for key, value in graphAL2.items():
graphAL2v[key] = 0
print(graphAL2v)
for key in graphAL2v: # each vertex v in V
if graphAL2v[key] == 0: # is marked with 0
bfs(key, count, graphAL2, graphAL2v)
print(graphAL2v)
def bfs(v, count, graphal, graphv):
count = count + 1
print('Visiting', v)
# Mark v with count and initialize queue with v
graphv[v] = count
visited = Queue.Queue()
visited.put(v)
while not visited.empty(): #queue not empty:
print('queue is not empty')
for element in graphal[v]: # each vertex w in V adjacent to front vertex
if graphv[element] == 0:
count = count + 1
# mark w with count
graphv[element] = count
visited.put(element)
v = visited.get()
return count
if __name__ == '__main__':
sys.exit(main())
Demo (after above changes) -
{0: 0, 1: 0, 2: 0, 3: 0, 4: 0, 5: 0}
Visiting 0
queue is not empty
queue is not empty
queue is not empty
queue is not empty
queue is not empty
queue is not empty
{0: 1, 1: 2, 2: 3, 3: 4, 4: 5, 5: 6}
You can use one of the graph libraries that provide search algorithms, such as NetworkX:
from networkx import *
graphAL2 = {0 : [1,2,3],
1 : [0,3,4],
2 : [0,4,5],
3 : [0,1,5],
4 : [1,2],
5 : [2,3] }
g = Graph()
# create graph
for node in graphAL2:
g.add_node(node)
for target_node in graphAL2[node]:
g.add_edge(node, target_node)
print bfs_successors(g, 0)
This will get the successors of each node, exporting the search order from that should be a piece of cake.
Output:
{0: [1, 2, 3], 1: [4], 2: [5]}