I am finished with the case when the node that i want to remove is the root node or the leaf node, but i need to be able to remove also when it has siblings or children, which i am finding very hard.
class Node:
def __init__(self, key=None, data=None):
self.key = key
self.data = data
self.left = None
self.right = None
class BST:
def __init__(self):
self.root = None
self.size = 0
def remove(self, key):
self.root = self._remove(key, self.root)
def insert(self, key, data):
self.root = self._insert(self.root, key, data)
def _insert(self, root, key, data):
if root == None:
self.size += 1
return Node(key, data)
if root.key == key:
return root
elif root.key > key:
root.left = self._insert(root.left, key, data)
else:
root.right = self._insert(root.right, key, data)
return root
def _remove(self, key, node):
if node == None:
return None
if key == node.key:
if node.left != None and node.right == None: # if trying to remove root and right side is empty
return node.left
elif node.left == None and node.right != None: # if trying to remove root and left side is empty
return node.right
elif node.left == None and node.right == None: # if trying to remove leaf
return node
# two more cases to check when it has siblings
# iterates recursively in the bst
elif key <= node.key:
node.left = self._remove(key, node.left)
else:
node.right = self._remove(key, node.right)
return node
I posted the whole code so if anyone wants to test in their machine is welcome to do so, or someone can use it for educational purpose.
In the future try to perform some debugging and provide some sample output and expected output.
Consider below
def _remove(self, key, node):
if node == None:
return None
if key == node.key:
if node.left and not node.right: # only left
return node.left
elif node.right and not node.left: # only right
return node.right
elif not node.right and not node.left: # neither
return None
else : # both
inorder_successor = node.right
while inorder_successor.left:
inorder_successor = inorder_successor.left
# remember to replace inorder_successor with it's right child
...
...
return inorder_successor
# iterates recursively in the bst
elif key <= node.key:
node.left = self._remove(key, node.left)
else:
node.right = self._remove(key, node.right)
return node
A few observations about what changed
You check for None using, is != None which is a very non Pythonic way to it. Just check for is and is not instead
The right way to replace a node in a BST that has both children is with the inorder successor (left most descendant of the right child of the deleted node)
Related
I am currently working on implementing a Binary search tree but I have a problem with creating the add_node() function. I tried to make it in a recursive way, but the result was weird.
For example- If I run add_node(1), add_node(5), and add_node(0), my tree has only 1, no 5 and 0.
I would be happy if you tell me what the problem is.
def add_node(self, value: int) -> None:
if self.root == None:
self.root = Node(value)
return
else:
return self.add_recursion(self.root, value)
def add_recursion(self, node: Node, value: int) -> None:
if node == None:
node = Node(value)
return
elif value < node.value:
return self.add_recursion(node.left, value)
else:
return self.add_recursion(node.right, value)
When a None value is passed into a function, it is passed by value, not by reference, since... there is no reference.
elif value < node.value:
return self.add_recursion(node.left, value)
else:
return self.add_recursion(node.right, value)
When node.left or node.right is None, a Node ends up being created but not attached to node.
So what you could do is handle the cases where they are None separately.
def add_recursion(self, node: Node, value: int) -> None:
if node == None:
node = Node(value)
elif value < node.value:
if node.left == None:
node.left = Node(value)
else:
self.add_recursion(node.left, value)
else:
if node.right == None:
node.right = Node(value)
else:
self.add_recursion(node.right, value)
While this is workable, it becomes quite ugly. Look at the answer by Locke to see a better way to structure your code.
Also, an additional tip for readability, avoid using return where they aren't necessary such as at the end of a flow.
The issue is how you handle if node.left or node.right does not exist. Since the arguments are copied for each call, setting the value of node in add_recursion has no effect on the tree.
def foo(val):
print("foo start:", val)
val = 5
print("foo end:", val)
bar = 3
foo(bar) # Value of bar is copied for call
print("after foo:", bar) # Prints bar is still 3
I think you might also be getting confused due to how the root node is handled. Here is some starter code on how can handle the initial call to add_recursive.
class Node:
def __init__(self, value: int):
self.value = value
self.left = None
self.right = None
def add_recursive(self, value: int):
# TODO: recursively add to left or right
# For example, here is how you could recursively make a linked list
if self.right is None:
self.right = Node(value)
else:
self.right.add_recursive(value)
class BinarySearchTree:
def __init__(self):
self.root = None
def add_node(self, value: int):
if self.root is None:
self.root = Node(value)
else:
# Call add_recursive on root instead of with root.
self.root.add_recursive(value)
tree = BinarySearchTree()
tree.add_node(1)
tree.add_node(5)
tree.add_node(0)
I have problem with infinite loop in getMinimal() method. It works in this way :
1)Take node,
2)If node has other node on the left - go to other one.
3)Repeat as far as node has sth on the left side
4)Return the minimal node.
But sometimes it works in infinite loop for example from 1000 to 400, then to 4 then..to 1000! I have no ide where I make mistake. I reviewed this code many times,every single "pointer" to parent/left/right node is okay! Please - help.
Algorithm works okay to "handwritten" trees - ~20nodes. I wanted to test it in better cases - 2500nodes,generated by random lib (from -10k to 10k).
import random
class Node:
def __init__(self, val):
self.val = val
self.parent = None
self.right = None
self.left = None
# Class of node.
def str(self):
return str(self.val)
class MyTree:
def __init__(self, node):
self.root = node
def insert(self, node):
current = self.root
a = True
while a:
if node.val > current.val:
if current.right is not None:
current = current.right
continue
else:
current.right = node
node.parent = current
a = False
if node.val <= current.val:
if current.left is not None:
current = current.left
continue
else:
current.left = node
node.parent = current
a = False
def search(self, node):
current = self.root
while node.val != current.val:
if node.val > current.val:
current = current.right
continue
elif node.val <= current.val:
current = current.left
continue
if node.val == current.val:
return current
else:
print("There is no such node!")
def delete(self, node):
if isinstance(node, (float, int)):
node = self.search(node)
if node is self.root:
self.__deleteRoot()
return
else:
if node.right is None and node.left is None:
self.__deleteNN(node)
return
if node.right is None and node.left is not None:
self.__deleteLN(node)
return
if node.right is not None and node.left is None:
self.__deleteNR(node)
return
if node.right is not None and node.left is not None:
self.__deleteLR(node)
return
def __deleteNN(self, node):
if node.parent.left is node:
node.parent.left = None
if node.parent.right is node:
node.parent.right = None
def __deleteLN(self, node):
parent = node.parent
son = node.left
# parent replaced
if parent.left is node:
parent.left = son
if parent.right is node:
parent.right = son
son.parent = parent
def __deleteNR(self,node):
parent = node.parent
son = node.right
# replace parent
if parent.left is node:
parent.left = son
if parent.right is node:
parent.right = son
son.parent = parent
def __deleteLR(self, node):
minimal = self.getMinimal(node.right)
if minimal.parent.left is minimal:
minimal.parent.left = None
if minimal.parent.right is minimal:
minimal.parent.right = None
# parent of minimal done..
if node.parent.left is node:
node.parent.left = minimal
if node.parent.right is node:
node.parent.right = minimal
minimal.right = node.right
minimal.left = node.left
def getMinimal(self, node):
k = node
while k.left is not None:
k = k.left
return k
def getMax(self):
current = self.root
while current.right:
current = current.right
return current
def __trav(self, node):
if not node:
return
print(node.val)
self.__trav(node.left)
self.__trav(node.right)
def printTrav(self):
self.__trav(self.root)
def __deleteRoot(self):
if self.root.left is None and self.root.right is None:
self.root = None
return
if self.root.left is None and self.root.right is not None:
# left empty,right full
self.root.right.parent = None
self.root = self.root.right
return
if self.root.left is not None and self.root.right is None:
# right empty, left full
self.root.left.parent = None
self.root = self.root.left
return
# node has both children
if self.root.left is not None and self.root.right is not None:
temp = self.getMinimal(self.root.right) # minimal from right subtree
# sometimes it could be like this..
# r
# \
# x
if temp.parent.left is temp:
temp.parent.left = None
else:
temp.parent.right = None
self.root.left.parent = temp
self.root.right.parent = temp
temp.right = self.root.right
temp.left = self.root.left
self.root = temp
self.root.parent = None
return
def search(self, val):
node = self.root
if node.val == val:
return node
if val > node.val and node.right is not None:
node = node.right
if val < node.val and node.left is not None:
node = node.left
else:
print("There's no such value!")
return
def printMax(self):
print(self.getMax().val)
def printMin(self):
print(self.getMinimal(self.root).val)
arr=[None]*2500
for x in range(2500):
arr[x]=Node(random.randint(-10000,10000))
myTree = MyTree(arr[0])
for x in range(1,2500):
myTree.insert(arr[x])
for x in range(2500):
myTree.delete(arr[x])
It is suspicious that you define search twice.
Still that said, here is how I would debug this. I would modify your program to read from a file, try to run, and then detect an endless loop and bail out. Now write random files until you have one that causes you to crash.
Once you have a random file that shows the bug, the next step is to make it minimal. Here is a harness that can let you do that.
import itertools
flatten = itertools.chain.from_iterable
# bug_found should be a function that takes a list of elements and runs your test.
# example should be an array that demonstrates the bug.
def find_minimal (bug_found, example):
parts = [example]
while 1 < max(len(part) for part in parts):
i = 0
while i < len(parts):
if 1 == len(parts[i]):
i = i + 1
else:
part = parts.pop(i)
# Divide in 2.
mid = len(part)/2
part1 = part[0:mid]
part2 = part[mid:len(part)]
# Do we need part1?
parts.insert(i, part1)
if bug_found(flatten(parts)):
i = i + 1
parts.insert(i, part2)
else:
parts[i] = part2
# Do we need part2?
if bug_found_func(flatten(parts)):
i = i + 1
else:
parts.pop(i)
return list(flatten(parts))
Just let it run, and after some time it is likely to find a small example. Which will greatly aid in debugging.
So - I found 2 serious bugs in code. Both in LR ("standard" node and root). As I suspected - bugs were in pointers. Now tree is working (tested few times for 20k,30k and 100k nodes). Solved.
class Node:
def __init__(self, val):
self.val = val
self.left = None
self.right = None
class BST:
def __init__(self, root=None):
self.root = root
def remove(self, val):
if self.root == None:
return root
else:
self._remove(val, self.root)
def _remove(self, val, node):
if node == None:
return node # Item not found
if val < node.val:
self._remove(val, node.left)
elif val > node.val:
self._remove(val, node.right)
else:
# FOUND NODE TO REMOVE
if node.left != None and node.right != None: # IF TWO CHILDREN
node.val = self._find_min(node.right)
node.right = self._remove(node.val, node.right)
else: # ZERO OR ONE CHILD
if node.left == None: # COVERS ZERO CHILD CASE
node = node.right
elif node.right == None:
node = node.left
return node
Cannot figure out why this function will not delete some values. I debugged with print statements and can see that if I try to remove a value, the function will enter the else block and successfully remove a node with two children. However, when attempting to remove a node with one or zero children, the codes executes with no errors, but when I print the tree to view its contents the node is still there.
The node to be removed will have at least one None child, and it seems straightforward to set the node equal to its right (or left) child, which I assume sets the node to None.
I have some experience with Java, but fairly new to Python, and I sometimes run into trouble with the "self" protocol, but I don't think that is the case here.
I build a binary tree with python code, now I could print it in order with testTree.printInorder(testTree.root). I have tried to lookup some node ,and the function findNode doesn't work anymore . print testTree.findNode(testTree.root,20) whatever I put in just return None.
class TreeNode:
def __init__(self, value):
self.left = None;
self.right = None;
self.data = value;
class Tree:
def __init__(self):
self.root = None
def addNode(self,node,value):
if node == None:
self.root = TreeNode(value)
else:
if value < node.data:
if node.left == None:
node.left = TreeNode(value)
else:
self.addNode(node.left,value)
else:
if node.right == None:
node.right = TreeNode(value)
else:
self.addNode(node.right,value)
def printInorder(self,node):
if node != None:
self.printInorder(node.left)
print node.data
self.printInorder(node.right)
def findNode(self,node,value):
if self.root != None:
if value == node.data:
return node.data
elif value < node.data and node.left != None:
self.findNode(node.left,value)
elif value > node.data and node.right != None:
self.findNode(node.right,value)
else:
return None
testTree = Tree()
testTree.addNode(testTree.root, 200)
testTree.addNode(testTree.root, 300)
testTree.addNode(testTree.root, 100)
testTree.addNode(testTree.root, 30)
testTree.addNode(testTree.root, 20)
#testTree.printInorder(testTree.root)
print testTree.findNode(testTree.root,20)
Any function without an explicit return will return None.
You have not returned the recursive calls within findNode. So, here.
if value == node.data:
return node.data
elif value < node.data and node.left != None:
return self.findNode(node.left,value)
elif value > node.data and node.right != None:
return self.findNode(node.right,value)
Now, I can't help but thinking this is a bit noisy. You'll always start adding from the root, yes?
testTree.addNode(testTree.root, 200)
You could rather do this
testTree.addNode(200)
And to do that, you basically implement your methods on the TreeNode class instead. So, for the addNode.
You could also "return up" from the recursion, rather than "pass down" the nodes as parameters.
class TreeNode:
def __init__(self, value):
self.left = None
self.right = None
self.data = value
def addNode(self,value):
if self.data == None: # Ideally, should never end-up here
self.data = value
else:
if value < self.data:
if self.left == None:
self.left = TreeNode(value)
else:
self.left = self.left.addNode(value)
else:
if self.right == None:
self.right = TreeNode(value)
else:
self.right = self.right.addNode(value)
return self # Return back up the recursion
Then, in the Tree class, just delegate the addNode responsibility to the root
class Tree:
def __init__(self):
self.root = None
def addNode(self,value):
if self.root == None:
self.root = TreeNode(value)
else:
self.root = self.root.addNode(value)
When you recurse to children in findNode you need to return the result, otherwise the function will implicitly return None:
def findNode(self,node,value):
if self.root != None:
if value == node.data:
return node.data
elif value < node.data and node.left != None:
return self.findNode(node.left,value) # Added return
elif value > node.data and node.right != None:
return self.findNode(node.right,value) # Added return
else:
return None
I am trying to practice BST tree implementation with python, following is my code,
import pdb
class Node():
def __init__(self, parent=None, key=None):
self.parent = parent if parent != None else None
self.left = None
self.right = None
self.key = key if key != None else None
class BST():
def __init__(self):
self.root = Node()
def insertKey (self, key):
#pdb.set_trace()
# transverse till we find empty position
if (self.root.key == None):
self.root.key = key
else:
node = self.root
while (node.left != None and node.right != None):
if node.key < key:
node = node.right
else:
node = node.left
#we have node either left or right is empty
if node.key < key:
node.right = Node (node, key)
else:
node.left = Node (node, key)
def inOrder (self, node):
#pdb.set_trace()
if node != None:
self.inOrder (node.left)
print node.key
self.inOrder (node.right)
def printLeft (self, node):
if node != None:
self.printLeft (node)
print node.key
def debugAll (self):
self.inOrder (self.root)
#self.printLeft (self.root)
def fromArray (self, numbers):
srt = sorted(numbers)
print srt
length = len(srt)
mid = length/2
rootEle = srt[mid]
self.insertKey (rootEle)
for i in range (1, mid+1):
try:
#pdb.set_trace()
self.insertKey (srt[mid-i])
self.insertKey (srt[mid+i])
except IndexError:
pass
bst = BST()
bst.fromArray ([1,2,4,3,6,5,10,8,9])
bst.debugAll ()
However the result of the inOrder tree walk is unexpected
1
4
5
6
10
I tried to debug through the pdb while inserting the keys, the keys are properly inserted, but when transversing the tree, some Node are skipped because they're marked as 'NoneType'. May be I am missing out on some language specifics here.
For a start, the code you have below isn't right:
while (node.left != None and node.right != None):
if node.key < key:
node = node.right
else:
node = node.left
It will stop descending if either the left or the right node doesn't exist.
EDIT: If you modify the loop like this, it works. Could be better optimized, but it's a start...
class Node():
def __init__(self, parent=None, key=None):
self.parent = parent if parent != None else None
self.left = None
self.right = None
self.key = key if key != None else None
class BST():
def __init__(self):
self.root = Node()
def insertKey (self, key):
#pdb.set_trace()
# transverse till we find empty position
if (self.root.key == None):
self.root.key = key
else:
node = self.root
while 1:
if node.key < key:
if node.right is None:
node.right = Node(node, key)
break
else:
node = node.right
else:
if node.left is None:
node.left = Node(node, key)
break
else:
node = node.left
def inOrder (self, node):
#pdb.set_trace()
if node != None:
self.inOrder (node.left)
print node.key
self.inOrder (node.right)
def printLeft (self, node):
if node != None:
self.printLeft (node)
print node.key
def debugAll (self):
self.inOrder (self.root)
#self.printLeft (self.root)
def fromArray (self, numbers):
srt = sorted(numbers)
print srt
length = len(srt)
mid = length/2
rootEle = srt[mid]
self.insertKey (rootEle)
for i in range (1, mid+1):
try:
#pdb.set_trace()
self.insertKey (srt[mid-i])
self.insertKey (srt[mid+i])
except IndexError:
pass
bst = BST()
bst.fromArray ([1,2,4,3,6,5,10,8,9])
bst.debugAll ()