delete random edges of a node till degree = 1 networkx

delete random edges of a node till degree = 1 networkx - python

If I create a bipartite graph G using random geomtric graph where nodes are connected within a radius. I then want to make sure all nodes have a particular degree (i.e. only one or two edges).
My main aim is to take one of the node sets (i.e node type a) and for each node make sure it has a maximum degree set by me. So for instance if a take node i that has a degree of 4, delete random edges of node i until its degree is 1.
I wrote the following code to run in the graph generator after generating edges. It deletes edges but not until all nodes have the degree of 1.
for n in G:
mu = du['G.degree(n)']
while mu > 1:
G.remove_edge(u,v)
if mu <=1:
break
return G
full function below:
import networkx as nx
import random
def my_bipartite_geom_graph(a, b, radius, dim):
G=nx.Graph()
G.add_nodes_from(range(a+b))
for n in range(a):
G.node[n]['pos']=[random.random() for i in range(0,dim)]
G.node[n]['type'] = 'A'
for n in range(a, a+b):
G.node[n]['pos']=[random.random() for i in range(0,dim)]
G.node[n]['type'] = 'B'
nodesa = [(node, data) for node, data in G.nodes(data=True) if data['type'] == 'A']
nodesb = [(node, data) for node, data in G.nodes(data=True) if data['type'] == 'B']
while nodesa:
u,du = nodesa.pop()
pu = du['pos']
for v,dv in nodesb:
pv = dv['pos']
d = sum(((a-b)**2 for a,b in zip(pu,pv)))
if d <= radius**2:
G.add_edge(u,v)
for n in nodesa:
mu = du['G.degree(n)']
while mu > 1:
G.remove_edge(u,v)
if mu <=1:
break
return G
Reply to words like jared. I tried using you code plus a couple changes I had to make:
def hamiltPath(graph):
maxDegree = 2
remaining = graph.nodes()
newGraph = nx.Graph()
while len(remaining) > 0:
node = remaining.pop()
neighbors = [n for n in graph.neighbors(node) if n in remaining]
if len(neighbors) > 0:
neighbor = neighbors[0]
newGraph.add_edge(node, neighbor)
if len(newGraph.neighbors(neighbor)) >= maxDegree:
remaining.remove(neighbor)
return newGraph
This ends up removing nodes from the final graph which I had hoped it would not.

Suppose we have a Bipartite graph. If you want each node to have degree 0, 1 or 2, one way to do this would be the following. If you want to do a matching, either look up the algorithm (I don't remember it), or change maxDegree to 1 and I think it should work as a matching instead. Regardless, let me know if this doesn't do what you want.
def hamiltPath(graph):
"""This partitions a bipartite graph into a set of components with each
component consisting of a hamiltonian path."""
# The maximum degree
maxDegree = 2
# Get all the nodes. We will process each of these.
remaining = graph.vertices()
# Create a new empty graph to which we will add pairs of nodes.
newGraph = Graph()
# Loop while there's a remaining vertex.
while len(remaining) > 0:
# Get the next arbitrary vertex.
node = remaining.pop()
# Now get its neighbors that are in the remaining set.
neighbors = [n for n in graph.neighbors(node) if n in remaining]
# If this list of neighbors is non empty, then add (node, neighbors[0])
# to the new graph.
if len(neighbors) > 0:
# If this is not an optimal algorithm, I suspect the selection
# a vertex in this indexing step is the crux. Improve this
# selection and the algorthim might be optimized, if it isn't
# already (optimized in result not time or space complexity).
neighbor = neighbors[0]
newGraph.addEdge(node, neighbor)
# "node" has already been removed from the remaining vertices.
# We need to remove "neighbor" if its degree is too high.
if len(newGraph.neighbors(neighbor)) >= maxDegree:
remaining.remove(neighbor)
return newGraph
class Graph:
"""A graph that is represented by pairs of vertices. This was created
For conciseness, not efficiency"""
def __init__(self):
self.graph = set()
def addEdge(self, a, b):
"""Adds the vertex (a, b) to the graph"""
self.graph = self.graph.union({(a, b)})
def neighbors(self, node):
"""Returns all of the neighbors of a as a set. This is safe to
modify."""
return (set(a[0] for a in self.graph if a[1] == node).
union(
set(a[1] for a in self.graph if a[0] == node)
))
def vertices(self):
"""Returns a set of all of the vertices. This is safe to modify."""
return (set(a[1] for a in self.graph).
union(
set(a[0] for a in self.graph)
))
def __repr__(self):
result = "\n"
for (a, b) in self.graph:
result += str(a) + "," + str(b) + "\n"
# Remove the leading and trailing white space.
result = result[1:-1]
return result
graph = Graph()
graph.addEdge("0", "4")
graph.addEdge("1", "8")
graph.addEdge("2", "8")
graph.addEdge("3", "5")
graph.addEdge("3", "6")
graph.addEdge("3", "7")
graph.addEdge("3", "8")
graph.addEdge("3", "9")
graph.addEdge("3", "10")
graph.addEdge("3", "11")
print(graph)
print()
print(hamiltPath(graph))
# Result of this is:
# 10,3
# 1,8
# 2,8
# 11,3
# 0,4

I don't know if it is your problem but my wtf detector is going crazy when I read those two final blocks:
while nodesa:
u,du = nodesa.pop()
pu = du['pos']
for v,dv in nodesb:
pv = dv['pos']
d = sum(((a-b)**2 for a,b in zip(pu,pv)))
if d <= radius**2:
G.add_edge(u,v)
for n in nodesa:
mu = du['G.degree(n)']
while mu > 1:
G.remove_edge(u,v)
if mu <=1:
break
you never go inside the for loop, since nodesa needs to be empty to reach it
even if nodesa is not empty, if mu is an int, you have an infinite loop in in your last nested while since you never modify it.
even if you manage to break from this while statement, then you have mu > 1 == False. So you immediatly break out of your for loop
Are you sure you are doing what you want here? can you add some comments to explain what is going on in this part?

Related

Dijkstra variation works most of the time, but gives wrong output on a test that I do not have access to

first of all let me pretext this by the fact that this is for an university homework, so I only want hints and not solutions.
The problem consists of finding the path from s to t that has the smallest maximum amount of snow on one of the edges, while choosing shortest distance for tie breaking. (i. e. if multiple edges with the same snow amount are considered, we take the one with the shortest length). For any two vertices, there is at most one edge that connects them.
n - number of vertices
m - number of edges
s - source
t - target
a - list of edge beginnings
b - list of edge ends
w - list of lengths corresponding to the edges
c - list of amounts of snow corresponding to the edges
I would really appreciate the help as I've been racking my head over this for a long time.
I tried this.
import heapq
# # intersections, # roads, start list, end list, len list, snow list
def snowy_road(n, m, s, t, a, b, w, c):
# Initialize distances to all nodes as infinite except for the source node, which has distance 0
distancesc = [float("inf")] * n
distancesc[s-1] = 0
distancesw = [float("inf")] * n
distancesw[s-1] = 0
# Create a set to store the nodes that have been visited
visited = set()
# Create a priority queue to store the nodes that still need to be processed
# We will use the distance to the source node as the priority
# (i.e. the next node to process will be the one with the smallest distance to the source)
pq = []
pq.append((0, 0, s-1))
while pq:
# Pop the node with the smallest distance from the priority queue
distc, distw, u = heapq.heappop(pq)
# Skip the node if it has already been visited
if u in visited:
continue
# Mark the node as visited
visited.add(u)
# Update the distances to all adjacent nodes
for i in range(m):
if a[i] == u+1:
v = b[i]-1
elif b[i] == u+1:
v = a[i]-1
else:
continue
altc = c[i]
if distc != float("inf"):
altc = max(distc, altc)
altw = distw + w[i]
if altc < distancesc[v]:
distancesc[v] = altc
distancesw[v] = altw
heapq.heappush(pq, (altc, altw, v))
elif altc == distancesc[v] and altw < distancesw[v]:
distancesw[v] = altw
heapq.heappush(pq, (altc, altw, v))
# Return the distance to the target node
return distancesc[t-1], distancesw[t-1]

If anyone is wondering, I solved this using two consequent Dijkstras. The first one finds the value of the shortest bottleneck path, the second one takes this value into account and doest a shortest path normal Dijkstra on the graph using w, while considering only edges that have a snow coverage <= than the bottleneck we found. Thank you to everyone that responded!

Efficient algorithm to generate random multigraph (undirected) given nodes and degree

I wrote a simple algorithm to generate 1000 random graphs given total nodes and associated node degree. Graph is undirected with multiple edges and no self-loops. My current algorithm is very slow:
Graph properties: no self-loops, undirected, multiple edges (i.e. many edges b/w same pair of vertices)
Pick 2 random node ids from node list using np.random
Check if node degree is full, if not connect 2 nodes and add to dictionary with connections
If a certain node has reached node degree removed it from list so it is not picked again.
randomness is graph generation is crucial as graphs form part of a statistical test
Algorithm reaches exception, where only 1 node is left out (mostly the node with large degree compared to other nodes) In such a case I take the intermediate generated graph and start breaking random edges and connecting these edges to the left out node until all nodes reach their original node degree.
This above is the bottleneck I suppose as I generate a graph and degenerate it later due to the exception.
every random graph in 1000 iterations must be different that older ones. I check this later in the code by appending to a list. If any 2 graphs are exactly same regenerate
all graphs are realisable given above conditions as I already have original graphs with these configs to start with, I am just generating 1000 random versions of these original graphs
from collections import defaultdict
for iteration in range(1000): # 1000 random graphs
all_stop = False
all_stop_2 = False
count = count +1
success_dict = defaultdict(list)
popped_dict = defaultdict(dict)
grid_dens_dict = defaultdict(set) # final graph being built
# pfxlinks = [edge1,edge2,edge3,edge4,........edgen] # just a placeholder edge list to help me identify which node connected with which node in success_dict
pfxlinks = [i for i in range(842814)] # total 842814 edges each connected to a node in grid_ids_withlinks , so if we add all node degrees in grid_ids_withlinks we get (842814 * 2)
grid_ids_withlinks = {1107415: 751065,1125583: 15256,1144686: 108969,1115625: 17038,1081048: 6749,1103814: 6476,1108340: 107431,1111992: 45946,1117451: 3594,1093803: 10860,1117452: 2126,1089226: 52518,1082859: 21211,1105613: 94587,1092862: 43891,1083786: 17073,1092899: 999,1141954: 4347,1106506: 2072,1094690: 119736,1116547: 3284,1104705: 2404,1135637: 3815,1121070:16598,1087417: 4514,1103777: 310,1114682: 4265,1091948: 5468,1093788: 2176, 1098316: 2067,1105597: 19090,1141055: 8454,1097427: 3041,1092875: 4159,1086500: 2204,1095619: 9732,1087430: 2041,1112884: 2167,1097413: 17056,1107414: 34769,1111088: 2025,1083768: 2176,1130180: 1886, 1144699: 988,1146499: 6818,1111081: 12509,1104687: 6186,1092866: 4272,1091037: 3,1121044: 39,1098333: 294,1118359: 27,1151091: 21,1107441: 10766,1141094: 3523,1102898: 53,1115634: 2199,1100140: 4347,1086515: 3029,1116505: 238,1082883: 4070,1118366:2065,1102866: 1590,1115631: 4345,1091990: 2131,1144703: 4053,1075589: 19,1081062: 2124,1097425: 11,1133804: 8,1112864: 158,1088307: 112,1138312: 112,1127446: 6245,1108356: 155,1082874: 6315,1115640: 3978,1107432: 2234,1131077: 2032,1115590: 2672,1094696: 13,1136502: 52,1094683: 20,1110183: 2,1113821: 56,1106515: 6,1120183: 11,1083765: 23,1101079: 6,1091944: 12,1085599: 10,1083783: 25,1148339: 6}
# dict with node_id : node degree (total nodes: 93)
for pfxpair in pfxlinks:
start_put = False
end_put = False
if all_stop_2 == True:
break
while True:
if all_stop == True:
all_stop_2 = True
break
try:
grid_start_id, grid_end_id = (np.random.choice(list(grid_ids_withlinks.keys()),size = 2, replace = False)) # get 2 random node ids
grid_start_id = int(grid_start_id)
grid_end_id = int(grid_end_id)
if start_put == False:
start_value = grid_dens_dict.get(grid_start_id) # if node id exists in my dict and node degree hasnt reached capacity
start_process_condition = (not start_value) or ( (start_value) and (len(grid_dens_dict[grid_start_id]) < grid_ids_withlinks[grid_start_id]) )
if start_process_condition:
grid_dens_dict[grid_start_id].add(pfxpair)
start_put = True
if len(grid_dens_dict[grid_start_id]) == grid_ids_withlinks[grid_start_id]: # node degree for node full, remove from dict
try:
#print('deleted key: ',grid_start_id, 'with size:',grid_dens_dict[grid_start_id],'Capacity:',grid_ids_withlinks[grid_start_id])
popped_dict[grid_start_id] = {'orig_capacity': grid_ids_withlinks[grid_start_id],'size':len(grid_dens_dict[grid_start_id]) }
grid_ids_withlinks.pop(grid_start_id)
except:
print('already popped')
else:
print('check')
if end_put == False:
end_value = grid_dens_dict.get(grid_end_id)
if (not end_value) or (end_value and (len(grid_dens_dict[grid_end_id]) < grid_ids_withlinks[grid_end_id])):
grid_dens_dict[grid_end_id].add(pfxpair)
end_put = True
if len(grid_dens_dict[grid_end_id]) == grid_ids_withlinks[grid_end_id]:
try:
#print('deleted key: ',grid_end_id, 'with size:',grid_dens_dict[grid_end_id],'Capacity:',grid_ids_withlinks[grid_end_id])
popped_dict[grid_end_id] = {'orig_capacity': grid_ids_withlinks[grid_end_id],'size':len(grid_dens_dict[grid_end_id]) }
grid_ids_withlinks.pop(grid_end_id)
except: # only 1 node left with large degree, start breaking edges
print('already popped')
else:
print('check')
if (start_put == False and end_put == True): # only end while when both nodes have been assigned a link
grid_dens_dict[grid_end_id].discard(pfxpair)
end_put = False
if (start_put == True and end_put == False):
grid_dens_dict[grid_start_id].discard(pfxpair)
start_put = False
if start_put == True and end_put == True:
success_dict[pfxpair].append((grid_start_id,grid_end_id))
break
except:
#print('In except block')
grid2grid = defaultdict(list)
for k,v in success_dict.items():
grid2grid[v[0][0]].append(v[0][1])
grid2grid[v[0][1]].append(v[0][0])
# ppick 2 random gridids
while True:
pop_id1, pop_id2 = (np.random.choice(list(grid_dens_dict.keys()),size = 2, replace = False)) # get 2 random node ids for popping
pop_id1 = int(pop_id1)
pop_id2 = int(pop_id2)
if (pop_id1 != list(grid_ids_withlinks.keys())[0] and pop_id2 != list(grid_ids_withlinks.keys())[0] and (pop_id1 in grid2grid[pop_id2] and pop_id2 in grid2grid[pop_id1])): ##have an assigned link
grid2grid[pop_id1].remove(pop_id2)
grid2grid[pop_id2].remove(pop_id1)
grid2grid[list(grid_ids_withlinks.keys())[0]].append(pop_id1)
grid2grid[list(grid_ids_withlinks.keys())[0]].append(pop_id2)
grid2grid[pop_id1].append(list(grid_ids_withlinks.keys())[0])
grid2grid[pop_id2].append(list(grid_ids_withlinks.keys())[0])
if len(grid2grid[list(grid_ids_withlinks.keys())[0]]) == grid_ids_withlinks[list(grid_ids_withlinks.keys())[0]]:
for k,v in grid2grid.items():
grid2grid[k] = Counter(v)
if len(all_itr_list) != 0:
# check if current grpah same as any previous
for graph in all_itr_list:
#same_counter = 0
#for a,b in graph.items():
shared_items = {k: graph[k] for k in graph if k in grid2grid and graph[k] == grid2grid[k]}
len(shared_items)
if len(shared_items) == grid_ids_withlinks: # total no of grids
#print('no of same nodes: ',len(shared_items))
break
all_itr_list.append(grid2grid)
filename = 'path'
with open(filename,'wb') as handle:
pickle.dump(grid2grid, handle)
all_stop = True
break
print('iteration no:',count)
if all_stop == False:
grid2grid = defaultdict(list)
for k,v in success_dict.items():
grid2grid[v[0][0]].append(v[0][1])
grid2grid[v[0][1]].append(v[0][0])
for k,v in grid2grid.items():
grid2grid[k] = Counter(v)
all_itr_list.append(grid2grid)
filename = 'path'
with open(filename,'wb') as handle:
pickle.dump(grid2grid, handle)
> ##from answer
import pickle
from collections import defaultdict
import numpy as np
for iteration in range(1000):
graph = defaultdict(list)
filename2 = r'path'
filename3 = r'path'
with open(filename2,'rb') as handle:
pfxlinks = pickle.load(handle,encoding ='latin-1')
with open(filename3,'rb') as handle:
grid_ids_withlinks = pickle.load(handle,encoding ='latin-1')
nodes = list(grid_ids_withlinks.keys())
degrees = list(grid_ids_withlinks.values())
while len(nodes) > 0:
# Get a random index from current nodes
node_idx = np.random.randint(0, len(nodes)-1)
# Store the node and its corresponding degree
node = nodes[node_idx]
degree = degrees[node_idx]
# Swap that node and its degree with the last node/degree and pop
# This helps us to remove them in O(1) time
# We don't need them anymore since we are going to exhaust the required edges
# for this particular node.
# This also prevents self-edges.
nodes[node_idx], nodes[-1] = nodes[-1], nodes[node_idx]
nodes.pop()
degrees[node_idx], degrees[-1] = degrees[-1], degrees[node_idx]
degrees.pop()
for _ in range(degree): # this is the amount of edges this node needs
# To make sure we don't get out of bounds.
# This could potentially happen unless
# there is a guarantee that the degrees and number of nodes
# are made such that they fit exactly
if len(nodes) == 0:
break
neighbor_idx = np.random.randint(0, len(nodes)-1)
graph[node].append(nodes[neighbor_idx])
graph[nodes[neighbor_idx]].append(node)
degrees[neighbor_idx] -= 1
if degrees[neighbor_idx] == 0:
# we need to remove the neighbor node if it has its maximum edges already
nodes[neighbor_idx], nodes[-1] = nodes[-1], nodes[neighbor_idx]
nodes.pop()
degrees[neighbor_idx], degrees[-1] = degrees[-1], degrees[neighbor_idx]
degrees.pop()
print('done')

Since you have also posted parts of the code that are unrelated to the core algorithm, it makes going through the code and finding bottlenecks relatively difficult.
Here's an algorithm that is faster from what I've seen in your code. It runs in O(n * m) for creating each graph, where n is the number of nodes, and m is the max degree that any of the nodes can have. In other words it's O(V + E) where V is the number of vertices and E the number of edges.
Create a list for the nodes, called nodes, like [1, 2, ..., n].
Create a corresponding list for degrees, called degrees, where degrees[i] is the degree of nodes[i].
Create a store for your graph however you like it. Adjacency list, matrix. Just make sure that adding edges to the graph is of O(1) complexity. Let's call this graph. A defaultdict(list) from collections in python would make a good adjacency list. For this algorithm I assume graph is a defaultdict(list).
Run a while loop on nodes. while len(nodes) > 0: and do as follows:
# Get a random index from current nodes
node_idx = random.randint(0, len(nodes)-1)
# Store the node and its corresponding degree
node = nodes[node_idx]
degree = degrees[node_idx]
# Swap that node and its degree with the last node/degree and pop
# This helps us to remove them in O(1) time
# We don't need them anymore since we are going to exhaust the required edges
# for this particular node.
# This also prevents self-edges.
nodes[node_idx], nodes[-1] = nodes[-1], nodes[node_idx]
nodes.pop()
degrees[node_idx], degrees[-1] = degrees[-1], degrees[node_idx]
degrees.pop()
for _ in degree: # this is the amount of edges this node needs
# To make sure we don't get out of bounds.
# This could potentially happen unless
# there is a guarantee that the degrees and number of nodes
# are made such that they fit exactly
if len(nodes) == 0:
break
neighbor_idx = random.randint(0, len(nodes)-1)
graph[node].append(nodes[neighbor_idx])
graph[nodes[neighbor_idx]].append(node)
degrees[neighbor_idx] -= 1
if degrees[neighbor_idx] == 0:
# we need to remove the neighbor node if it has its maximum edges already
nodes[neighbor_idx], nodes[-1] = nodes[-1], nodes[neighbor_idx]
nodes.pop()
degrees[neighbor_idx], degrees[-1] = degrees[-1], degrees[neighbor_idx]
degrees.pop()
This algorithm potentially leaves one node at the end that has not all its required edges, but this isn't a shortcoming of the algorithm but can happen if the number of edges for the nodes dosn't work out. I'm not sure how to express is mathematically though.
Also note that this algorithm could produce multiple edges between two nodes. It isn't clear to me if this is allowed or not for the particular graph you are looking for. If so, the code can be ammended such that it avoids such edges without sacrificing the time complexity. But it has the potential to leave multiple nodes with less edges than required. This wouldn't be a shortcoming of the algorithm but a result of how the degrees for particular nodes are defined.

Dijkstra algorithm in python using dictionaries

Dear computer science enthusiasts,
I have stumbled upon an issue when trying to implement the Dijkstra-algorithm to determine the shortest path between a starting node and all other nodes in a graph.
To be precise I will provide you with as many code snippets and information as I consider useful to the case. However, should you miss anything, please let me know.
I implemented a PQueue class to handle Priority Queues of each individual node and it looks like this:
class PQueue:
def __init__(self):
self.items = []
def push(self, u, value):
self.items.append((u, value))
# insertion sort
j = len(self.items) - 1
while j > 0 and self.items[j - 1][1] > value:
self.items[j] = self.items[j - 1] # Move element 1 position backwards
j -= 1
# node u now belongs to position j
self.items[j] = (u, value)
def decrease_key(self, u, value):
for i in range(len(self.items)):
if self.items[i][0] == u:
self.items[i][1] = value
j = i
break
# insertion sort
while j > 0 and self.items[j - 1][1] > value:
self.items[j] = self.items[j - 1] # Move element 1 position backwards
j -= 1
# node u now belongs to position j
self.items[j] = (u, value)
def pop_min(self):
if len(self.items) == 0:
return None
self.items.__delitem__(0)
return self.items.index(min(self.items))
In case you're not too sure about what the Dijkstra-algorithm is, you can refresh your knowledge here.
Now to get to the actual problem, I declared a function dijkstra:
def dijkstra(self, start):
# init
totalCosts = {} # {"node"= cost,...}
prevNodes = {} # {"node"= prevNode,...}
minPQ = PQueue() # [[node, cost],...]
visited = set()
# start init
totalCosts[str(start)] = 0
prevNodes[str(start)] = start
minPQ.push(start, 0)
# set for all other nodes cost to inf
for node in range(self.graph.length): # #nodes
if node != start:
totalCosts[str(node)] = np.inf
while len(minPQ.items) != 0: # Main loop
# remove smallest item
curr_node = minPQ.items[0][0] # get index/number of curr_node
minPQ.pop_min()
visited.add(curr_node)
# check neighbors
for neighbor in self.graph.adj_list[curr_node]:
# check if visited
if neighbor not in visited:
# check cost and put it in totalCost and update prev node
cost = self.graph.val_edges[curr_node][neighbor] # update cost of curr_node -> neighbor
minPQ.push(neighbor, cost)
totalCosts[str(neighbor)] = cost # update cost
prevNodes[str(neighbor)] = curr_node # update prev
# calc alternate path
altpath = totalCosts[str(curr_node)] + self.graph.val_edges[curr_node][neighbor]
# val_edges is a adj_matrix with values for the connecting edges
if altpath < totalCosts[str(neighbor)]: # check if new path is better
totalCosts[str(neighbor)] = altpath
prevNodes[str(neighbor)] = curr_node
minPQ.decrease_key(neighbor, altpath)
Which in my eyes should solve the problem mentioned above (optimal path for a starting node to every other node). But it does not. Can someone help me clean up this mess that I have been trying to debug for a while now. Thank you in advance!
Assumption:
In fact I realized that my dictionaries used to store the previously visited nodes (prevNodes) and the one where I save the corresponding total cost of visiting a node (totalCosts) are unequally long. And I do not understand why.

How to structure a program to work with minesweeper configurations

EDIT: This was a while ago and I've since got it working, if you'd like to see the code it's included at github.com/LewisGaul/minegaulerQt.
I'm trying to write a program to calculate probabilities for the game minesweeper, and have had some difficulty working out how best to structure it. While it may seem quite simple at first with the example below, I would like to know the best way to allow for more complex configurations. Note I am not looking for help with how to calculate probabilities - I know the method, I just need to implement it!
To make it clear what I'm trying to calculate, I will work through a simple example which can be done by hand. Consider a minesweeper configuration
# # # #
# 1 2 #
# # # #
where # represents an unclicked cell. The 1 tells us there is exactly 1 mine in the leftmost 7 unclicked cells, the 2 tells us there are exactly 2 in the rightmost 7. To calculate the probability of each individual cell containing a mine, we need to determine all the different cases (only 2 in this simple case):
1 mine in leftmost 3 cells, 2 mines in rightmost 3 cells (total of 3 mines, 3x3=9 combinations).
1 mine in center 4 cells, 1 mine in rightmost 3 cells (total of 2 mines, 4x3=12 combinations).
Given the probability of a mine being in a random cell is about 0.2, it is (in a random selection of cells) about 4 times more likely there is a total of 2 mines rather than a total of 3, so the total number of mines in a configuration matters, as well as the number of combinations of each configuration. So in this case the probability of case 1 is 9/(9+4x12)=0.158, and the probability of there being a mine in a given leftmost cell is therefore about 0.158/3=0.05, as those cells are effectively equivalent (they share exactly the same revealed neighbours).
I have created a GUI with Tkinter which allows me to easily enter configurations such as the one in the example, which stores the grid as a numpy array. I then made a NumberGroup class which isolates each of the clicked/numbered cells, storing the number and a set of the coordinates of its unclicked neighbours. These can be subtracted to get equivalence groups... Although this would not be as straightforward if there were three or more numbers instead of just two. But I am unsure how to go from here to getting the different configurations. I toyed with making a Configuration class, but am not hugely familiar with how different classes should work together. See working code below (numpy required).
Note: I am aware I could have attempted to use a brute force approach, but if possible I would like to avoid that, keeping the equivalent groups separate (in the above example there are 3 equivalence groups, the leftmost 3, the middle 4, the rightmost 3). I would like to hear your thoughts on this.
import numpy as np
grid = np.array(
[[0, 0, 0, 0],
[0, 2, 1, 0],
[0, 0, 0, 0]]
)
dims = (3, 4) #Dimensions of the grid
class NumberGroup(object):
def __init__(self, mines, coords, dims=None):
"""Takes a number of mines, and a set of coordinates."""
if dims:
self.dims = dims
self.mines = mines
self.coords = coords
def __repr__(self):
return "<Group of {} cells with {} mines>".format(
len(self.coords), self.mines)
def __str__(self):
if hasattr(self, 'dims'):
dims = self.dims
else:
dims = (max([c[0] for c in self.coords]) + 1,
max([c[1] for c in self.coords]) + 1)
grid = np.zeros(dims, int)
for coord in self.coords:
grid[coord] = 1
return str(grid).replace('0', '.').replace('1', '#')
def __sub__(self, other):
if type(other) is NumberGroup:
return self.coords - other.coords
elif type(other) is set:
return self.coords - other.coords
else:
raise TypeError("Can only subtract a group or a set from another.")
def get_neighbours(coord, dims):
x, y = coord
row = [u for u in range(x-1, x+2) if u in range(dims[0])]
col = [v for v in range(y-1, y+2) if v in range(dims[1])]
return {(u, v) for u in row for v in col}
groups = []
all_coords = [(i, j) for i in range(dims[0])
for j in range(dims[1])]
for coord, nr in [(c, grid[c]) for c in all_coords if grid[c] > 0]:
empty_neighbours = {c for c in get_neighbours(coord, dims)
if grid[c] == 0}
if nr > len(empty_neighbours):
print "Error: number {} in cell {} is too high.".format(nr, coord)
break
groups.append(NumberGroup(nr, empty_neighbours, dims))
print groups
for g in groups:
print g
print groups[0] - groups[1]
UPDATE:
I have added a couple of other classes and restructured a bit (see below for working code), and it is now capable of creating and displaying the equivalence groups, which is a step in the right direction. However I still need to work out how to iterate through all the possible mine-configurations, by assigning a number of mines to each group in a way that creates a valid configuration. Any help is appreciated.
For example,
# # # #
# 2 1 #
# # # #
There are three equivalence groups G1: the left 3, G2: the middle 4, G3: the right 3. I want the code to loop through, assigning groups with mines in the following way:
G1=2 (max the first group) => G2=0 => G3=1 (this is all configs with G1=2)
G1=1 (decrease by one) => G2=1 => G3=0 (this is all with G1=1)
G1=0 => G2=2 INVALID
So we arrive at both configurations. This needs to work for more complicated setups!
import numpy as np
def get_neighbours(coord, dims):
x, y = coord
row = [u for u in range(x-1, x+2) if u in range(dims[0])]
col = [v for v in range(y-1, y+2) if v in range(dims[1])]
return {(u, v) for u in row for v in col}
class NrConfig(object):
def __init__(self, grid):
self.grid = grid
self.dims = grid.shape # Dimensions of grid
self.all_coords = [(i, j) for i in range(self.dims[0])
for j in range(self.dims[1])]
self.numbers = dict()
self.groups = []
self.configs = []
self.get_numbers()
self.get_groups()
self.get_configs()
def __str__(self):
return str(self.grid).replace('0', '.')
def get_numbers(self):
for coord, nr in [(c, self.grid[c]) for c in self.all_coords
if self.grid[c] > 0]:
empty_neighbours = {c for c in get_neighbours(
coord, self.dims) if self.grid[c] == 0}
if nr > len(empty_neighbours):
print "Error: number {} in cell {} is too high.".format(
nr, coord)
return
self.numbers[coord] = Number(nr, coord, empty_neighbours,
self.dims)
def get_groups(self):
coord_neighbours = dict()
for coord in [c for c in self.all_coords if self.grid[c] == 0]:
# Must be a set so that order doesn't matter!
coord_neighbours[coord] = {self.numbers[c] for c in
get_neighbours(coord, self.dims) if c in self.numbers}
while coord_neighbours:
coord, neighbours = coord_neighbours.popitem()
equiv_coords = [coord] + [c for c, ns in coord_neighbours.items()
if ns == neighbours]
for c in equiv_coords:
if c in coord_neighbours:
del(coord_neighbours[c])
self.groups.append(EquivGroup(equiv_coords, neighbours, self.dims))
def get_configs(self):
pass # WHAT GOES HERE?!
class Number(object):
"""Contains information about the group of cells around a number."""
def __init__(self, nr, coord, neighbours, dims):
"""Takes a number of mines, and a set of coordinates."""
self.nr = nr
self.coord = coord
# A list of the available neighbouring cells' coords.
self.neighbours = neighbours
self.dims = dims
def __repr__(self):
return "<Number {} with {} empty neighbours>".format(
int(self), len(self.neighbours))
def __str__(self):
grid = np.zeros(self.dims, int)
grid[self.coord] = int(self)
for coord in self.neighbours:
grid[coord] = 9
return str(grid).replace('0', '.').replace('9', '#')
def __int__(self):
return self.nr
class EquivGroup(object):
"""A group of cells which are effectively equivalent."""
def __init__(self, coords, nrs, dims):
self.coords = coords
# A list of the neighbouring Number objects.
self.nr_neighbours = nrs
self.dims = dims
if self.nr_neighbours:
self.max_mines = min(len(self.coords),
max(map(int, self.nr_neighbours)))
else:
self.max_mines = len(coords)
def __repr__(self):
return "<Equivalence group containing {} cells>".format(
len(self.coords))
def __str__(self):
grid = np.zeros(self.dims, int)
for coord in self.coords:
grid[coord] = 9
for number in self.nr_neighbours:
grid[number.coord] = int(number)
return str(grid).replace('0', '.').replace('9', '#')
grid = np.array(
[[0, 0, 0, 0],
[0, 2, 1, 0],
[0, 0, 0, 0]]
)
config = NrConfig(grid)
print config
print "Number groups:"
for n in config.numbers.values():
print n
print "Equivalence groups:"
for g in config.groups:
print g

If you don't want to brute-force it, you could model the process as a decision tree. Suppose we start with your example:
####
#21#
####
If we want to start placing mines in a valid configuration, we at this point essentially have eight choices. Since it doesn't really matter which square we pick within an equivalence group, we can narrow that down to three choices. The tree branches. Let's go down one branch:
*###
#11#
####
I placed a mine in G1, indicated by the asterisk. Also, I've updated the numbers (just one number in this case) associated with this equivalence group, to indicate that these numbered squares can now border one fewer mines.
This hasn't reduced our freedom of choice for the following step, we can still place a mine in any of the equivalence groups. Let's place another one in G1:
*XX#
*01#
XXX#
Another asterisk marks the new mine, and the numbered square has again been lowered by one. It has now reached zero, meaning it cannot border any more mines. That means that for our next choice of mine placement, all the equivalence groups dependent upon this numbered square are ruled out. Xs mark squares where we can now not place any mine. We can only make one choice now:
*XX*
*00X
XXXX
Here the branch ends and you've found a valid configuration. By running along all the branches in this tree in this manner, you should find all of them. Here we found your first configuration. Of course, there's more than one way to get there. If we had started by placing a mine in G3, we would have been forced to place the other two in G1. That branch leads to the same configuration, so you should check for duplicates. I don't see a way to avoid this redundancy right now.
The second configuration is found by either starting with G2, or placing one mine in G1 and then the second in G2. In either case you again end up at a branch end:
**XX
X00X
XXXX
Invalid configurations like your example with zero mines in G1 do not pop up. There are no valid choices along the tree that lead you there. Here is the whole tree of valid choices.
Choice 1: 1 | 2 | 3
Choice 2: 1 2 3 | 1 | 1
Choice 3: 3 1 | |1
Valid configurations are the branch ends at which no further choice is possible, i.e.
113
12
131
21
311
which obviously fall into two equivalent classes if we disregard the order of the numbers.

Python 3.3.2 - 'Grouping' System with Characters

I have a fun little problem.
I need to count the amount of 'groups' of characters in a file. Say the file is...
..##.#..#
##..####.
.........
###.###..
##...#...
The code will then count the amount of groups of #'s. For example, the above would be 3. It includes diagonals. Here is my code so far:
build = []
height = 0
with open('file.txt') as i:
build.append(i)
height += 1
length = len(build[0])
dirs = {'up':(-1, 0), 'down':(1, 0), 'left':(0, -1), 'right':(0, 1), 'upleft':(-1, -1), 'upright':(-1, 1), 'downleft':(1, -1), 'downright':(1, 1)}
def find_patches(grid, length):
queue = []
queue.append((0, 0))
patches = 0
while queue:
current = queue.pop(0)
line, cell = path[-1]
if ## This is where I am at. I was making a pathfinding system.

Here’s a naive solution I came up with. Originally I just wanted to loop through all the elements once an check for each, if I can put it into an existing group. That didn’t work however as some groups are only combined later (e.g. the first # in the second row would not belong to the big group until the second # in that row is processed). So I started working on a merge algorithm and then figured I could just do that from the beginning.
So how this works now is that I put every # into its own group. Then I keep looking at combinations of two groups and check if they are close enough to each other that they belong to the same group. If that’s the case, I merge them and restart the check. If I completely looked at all possible combinations and could not merge any more, I know that I’m done.
from itertools import combinations, product
def canMerge (g, h):
for i, j in g:
for x, y in h:
if abs(i - x) <= 1 and abs(j - y) <= 1:
return True
return False
def findGroups (field):
# initialize one-element groups
groups = [[(i, j)] for i, j in product(range(len(field)), range(len(field[0]))) if field[i][j] == '#']
# keep joining until no more joins can be executed
merged = True
while merged:
merged = False
for g, h in combinations(groups, 2):
if canMerge(g, h):
g.extend(h)
groups.remove(h)
merged = True
break
return groups
# intialize field
field = '''\
..##.#..#
##..####.
.........
###.###..
##...#...'''.splitlines()
groups = findGroups(field)
print(len(groups)) # 3

I'm not exactly sure what your code is trying to do. Your with statement opens a file, but all you do is append the file object to a list before the with ends and it gets closed (without its contents ever being read). I suspect his is not what you intend, but I'm not sure what you were aiming for.
If I understand your problem correctly, you are trying to count the connected components of a graph. In this case, the graph's vertices are the '#' characters, and the edges are wherever such characters are adjacent to each other in any direction (horizontally, vertically or diagonally).
There are pretty simple algorithms for solving that problem. One is to use a disjoint set data structure (also known as a "union-find" structure, since union and find are the two operations it supports) to connect groups of '#' characters together as they're read in from the file.
Here's a fairly minimal disjoint set I wrote to answer another question a while ago:
class UnionFind:
def __init__(self):
self.rank = {}
self.parent = {}
def find(self, element):
if element not in self.parent: # leader elements are not in `parent` dict
return element
leader = self.find(self.parent[element]) # search recursively
self.parent[element] = leader # compress path by saving leader as parent
return leader
def union(self, leader1, leader2):
rank1 = self.rank.get(leader1,1)
rank2 = self.rank.get(leader2,1)
if rank1 > rank2: # union by rank
self.parent[leader2] = leader1
elif rank2 > rank1:
self.parent[leader1] = leader2
else: # ranks are equal
self.parent[leader2] = leader1 # favor leader1 arbitrarily
self.rank[leader1] = rank1+1 # increment rank
And here's how you can use it for your problem, using x, y tuples for the nodes:
nodes = set()
groups = UnionFind()
with open('file.txt') as f:
for y, line in enumerate(f): # iterate over lines
for x, char in enumerate(line): # and characters within a line
if char == '#':
nodes.add((x, y)) # maintain a set of node coordinates
# check for neighbors that have already been read
neighbors = [(x-1, y-1), # up-left
(x, y-1), # up
(x+1, y-1), # up-right
(x-1, y)] # left
for neighbor in neighbors:
if neighbor in nodes:
my_group = groups.find((x, y))
neighbor_group = groups.find(neighbor)
if my_group != neighbor_group:
groups.union(my_group, neighbor_group)
# finally, count the number of unique groups
number_of_groups = len(set(groups.find(n) for n in nodes))

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.