question on serialization and deserialization in binary tree - python

There is this problem asked by Google to design an algorithim to serialize and deserialize binary tree. I found one of the solutions online. The part i don't really understand is why the condition is necessary at line 20, where "if node == None:", self.root = Node(value) ? Because afterall this program will prompt the user to input nodes in the form eg: 1,3,5 in order for the program to work so therefore there won't be a case where node =none because user input is necessary? Am I misunderstanding something else here?
class Node:
def __init__(self, value):
self.left = None
self.right = None
self.value = value
class Tree:
def __init__(self):
self.root = None
def addNode(self, node, value): #Why when node==None, self.root= Node(value)?
if node == None:
self.root = Node(value)
else:
if value < node.value:
if not node.left:
node.left = Node(value)
else:
self.addNode(node.left, value)
else:
if not node.right:
node.right = Node(value)
else:
self.addNode(node.right, value)
def serialize(root):
values = []
def serializer(node):
if not node:
values.append('?')
else:
values.append(str(node.value))
serializer(node.left)
serializer(node.right)
serializer(root)
return ','.join(values)
def deserialize(s):
values = iter(s.split(','))
def deserializer():
val = next(values)
if val == '?':
return None
else:
node = Node(int(val))
node.left = deserializer()
node.right = deserializer()
return node
return deserializer()
if __name__ == '__main__':
# Read input, numbers separated by commas
numbers = [int(n) for n in input().split(',')]
theTree = Tree()
for number in numbers:
theTree.addNode(theTree.root, number)
s1 = serialize(theTree.root)
s2 = serialize(deserialize(s1))
print(s1)
print(s2)
assert s1 == s2

In this line, when first number is entered in the tree, root will be None
for number in numbers:
theTree.addNode(theTree.root, number)
Hence, line 20 is needed.

Related

Trouble balancing a binary search tree using AVL

I'm unable to identify where I'm going wrong with my AVL implementation for balancing an existing binary search tree. I'm not getting any errors but my binary search tree does not come out to be properly balanced. After insertion, my binary search tree looks like (it would be prudent here to mention that my display_keys method gives us a visualization that is rotated by 90 degrees):
∅
The-Dreamers
∅
Saint-Laurent
∅
Pierrot-le-Fou
∅
Contempt
Cold-War
Before-Sunrise
∅
Basic-Instinct
∅
This is correct as it seems be to be following the rules for a BST.
But after calling the BalanceTree() method on my binary search tree, I seem to get:
∅
The-Dreamers
Saint-Laurent
Pierrot-le-Fou
Contempt
Cold-War
Before-Sunrise
Basic-Instinct
∅
which as you can see, is not Balanced. But wait, and here's the catch, if I call BalanceTree() again, the tree comes out to be perfectly balanced and isBSTBalanced returns True also. After the second call to BalanceTree(), our tree looks like:
∅
The-Dreamers
Saint-Laurent
Pierrot-le-Fou
Contempt
Cold-War
Before-Sunrise
Basic-Instinct
∅
I am adding the complete source code for my BST class for clarity and if somebody wants to execute the code, but I have added a comment (#Addition of new methods for AVL starts here) in the BST class to indicate where the methods for AVL start. You need only concern yourself with them. I would like for you help me pinpoint what exactly is going wrong in my code.
class BST:
class TreeNode:
def __init__(self, key, value, left=None, right=None, parent=None):
self.key = key
self.value = value
self.left = left
self.right = right
self.parent = parent
self.height = 1
def __init__(self):
self.root = None
self.size = 0
def __len__(self):
return self.size
def insert(self, key, value):
if self.root == None:
self.root = self.TreeNode(key, value)
else:
self._insert(key, value, self.root)
self.size += 1
def _insert(self, key, value, curr_node):
if key < curr_node.key:
if curr_node.left is not None:
self._insert(key, value, curr_node.left)
else:
curr_node.left = self.TreeNode(key, value, parent=curr_node)
elif key > curr_node.key:
if curr_node.right is not None:
self._insert(key, value, curr_node.right)
else:
curr_node.right = self.TreeNode(key, value, parent=curr_node)
def search(self, key):
if self.root:
found = self._search(key, self.root)
if found:
return found.value
else:
return None
else:
return None
def _search(self, key, curr_node):
if not curr_node:
return None
elif curr_node.key == key:
return curr_node
elif key < curr_node.key:
return self._search(key, curr_node.left)
else:
return self._search(key, curr_node.right)
def find_min(self):
curr = self.root
while curr.left is not None:
curr = curr.left
return curr
def find(self, node):
curr = node
while curr.left is not None:
curr = curr.left
return curr
def delete(self, key):
node_to_remove = self._search(key, self.root)
if node_to_remove.left is None and node_to_remove.right is None:
#Then we identify this as a leaf node
if node_to_remove is node_to_remove.parent.left:
#Setting the parent's reference to this to None
node_to_remove.parent.left = None
elif node_to_remove is node_to_remove.parent.right:
node_to_remove.parent.right = None
#2nd Case --> Two child
elif node_to_remove.left and node_to_remove.right:
minimum = self.find(node_to_remove.right)
self.delete(minimum.key) #We will still have a ref to this node afterwards
node_to_remove.key, node_to_remove.value = minimum.key, minimum.value
#3rd Case -> One child
else:
if node_to_remove.left:
node_to_remove.left.parent = node_to_remove.parent
node_to_remove.parent.left = node_to_remove.left
elif node_to_remove.right:
node_to_remove.right.parent = node_to_remove.parent
node_to_remove.parent.right = node_to_remove.right
def traversal(self, root):
res = []
if root:
res = self.traversal(root.left)
res.append(root)
res = res + self.traversal(root.right)
return res
def inorder_traversal(self, root):
if root:
self.inorder_traversal(root.left)
print(root.key)
self.inorder_traversal(root.right)
#Addition of new methods for AVL starts here
def display_keys(self, node, space='\t', level=0):
"""
Allows us to visualize the tree (albiet rotated by 90 degrees)
"""
# print(node.key if node else None, level)
# If the node is empty
if node is None:
print(space*level + '∅')
return
# If the node is a leaf
if node.left is None and node.right is None:
print(space*level + str(node.key))
return
# If the node has children
self.display_keys(node.right, space, level+1)
print(space*level + str(node.key))
self.display_keys(node.left,space, level+1)
def height(self):
return self._height(self.root)
def _height(self, curr_node):
if curr_node is None:
return -1 #since we are counting number of edges, we will return -1
else:
return 1 + max(self._height(curr_node.left), self._height(curr_node.right))
def isBSTBalanced(self):
return self._isBSTBalanced(self.root)
def _isBSTBalanced(self, curr_node):
if curr_node is None:
return True
hleft_subtree = self._height(curr_node.left)
hright_subtree = self._height(curr_node.right)
if hleft_subtree - hright_subtree in [-1,0,1]:
return self._isBSTBalanced(curr_node.left) and self._isBSTBalanced(curr_node.right)
else:
return False
def balance_factor(self):
if self.root is not None:
return self._balance_factor(self.root)
else:
return 0
def _balance_factor(self, curr_node):
if curr_node is None:
return
hleft_subtree = self._height(curr_node.left)
hright_subtree = self._height(curr_node.right)
b_factor = hleft_subtree - hright_subtree
return b_factor
def BalanceTree(self):
if self.isBSTBalanced() == False:
return self._rebalance(self.root)
def _rebalance(self, curr_node):
if curr_node is None:
return None
curr_node.left = self._rebalance(curr_node.left)
curr_node.right = self._rebalance(curr_node.right)
curr_node.height = 1 + max(self._height(curr_node.left), self._height(curr_node.right))
#print(curr_node.height)
if self._balance_factor(curr_node) > 1 and self._balance_factor(curr_node.left) >= 0:
#left heavy subtree
return self._rotate_right(curr_node)
if self._balance_factor(curr_node) < -1 and self._balance_factor(curr_node.right) <= 0:
#right heavy subtree
return self._rotate_left(curr_node)
if self._balance_factor(curr_node) < 0 and self._balance_factor(curr_node.right) > 0:
self._rotate_right(curr_node.right)
return self._rotate_left(curr_node)
if self._balance_factor(curr_node) > 0 and self._balance_factor(curr_node.left) < 0:
self._rotate_left(curr_node.left)
return self._rotate_right(curr_node)
return curr_node
def _rotate_left(self, oldRoot):
newRoot = oldRoot.right #the newRoot is the right child of the previous root
oldRoot.right = newRoot.left #replacing right child of the old root with the left child of the new
if newRoot.left is not None:
newRoot.left.parent = oldRoot
newRoot.parent = oldRoot.parent
if oldRoot == self.root:
self.root = newRoot
else:
if oldRoot.parent.left is oldRoot: #Checking isLeftChild
oldRoot.parent.left = newRoot
else:
oldRoot.parent.right = newRoot
newRoot.left = oldRoot
oldRoot.parent = newRoot
oldRoot.height = 1 + max(self._height(oldRoot.left), self._height(oldRoot.right))
newRoot.height = 1 + max(self._height(newRoot.left), self._height(newRoot.right))
return newRoot
def _rotate_right(self, oldRoot):
newRoot = oldRoot.left #the newRoot is the left child of the previous root
oldRoot.left = newRoot.right #replacing left child of the old root with the right child of the new
if newRoot.right is not None:
newRoot.right.parent = oldRoot
newRoot.parent = oldRoot.parent
if oldRoot == self.root:
self.root = newRoot
else:
if oldRoot.parent.right is oldRoot: #Checking isRightChild
oldRoot.parent.right = newRoot
else:
oldRoot.parent.left = newRoot
newRoot.right = oldRoot
oldRoot.parent = newRoot
oldRoot.height = 1 + max(self._height(oldRoot.left), self._height(oldRoot.right))
newRoot.height = 1 + max(self._height(newRoot.left), self._height(newRoot.right))
return newRoot
if __name__ == '__main__':
obj = BST()
obj.insert('Basic-Instinct', 0)
obj.insert('The-Dreamers', 1)
obj.insert('Saint-Laurent', 2)
obj.insert('Pierrot-le-Fou', 3)
obj.insert('Contempt', 4)
obj.insert('Before-Sunrise', 5)
obj.insert('Cold-War', 8)
obj.display_keys(obj.root) #displays a visual representation of our tree, albeit rotated by 90 degrees
print()
print("isBSTBalanced:", obj.isBSTBalanced())
obj.BalanceTree()
print("isBSTBalanced:", obj.isBSTBalanced()) #After executing BalanceTree(), isBSTBalanced still returns False
print()
obj.display_keys(obj.root)
Progress: Revamped _isBSTBalanced method so that it visits every node recursively and not just the root node. The final outcome, however, remains the same.
Progress: I was able to identify one of the major issues being that while I was calling _rotate_left and _rotate_right methods in the _rebalance method, I was not returning them. In addition to this, I was not recursively visiting the left and right subtrees of curr_node, which was initially set to the root of the tree, to be able to traverse the tree in a bottom up manner. I have resolved this too. I have added a display_keys method which allows us to visualize the tree, albeit rotated by 90 degrees. I'm updating the code and prompt in this post accordingly. The problem that still remains is that I have to call the BalanceTree() method more than once in some cases for isBSTBalanced to return True.

Question on argument in functions for deserializing the string back into tree in Python

The question that I am asking here is based on www.dailycodingproblem.com
question 3:
"Given the root to a binary tree, implement serialize(root), which serializes the tree into a string, and deserialize(s), which deserializes the string back into the tree."
The code below is one of the solutions that I found and the question I want to ask is:-
For the deserialize function in line 42, when I pass an argument such as
def deserializer(node): which is followed by return deserializer(node), the output says "node not defined error" if I input any numbers eg: 1,3,2.
However, it works when I leave out the argument part empty that is def deserializer() followed by return deserializer()?
Your help would be much appreciated!
#must create a constructor every time we create a class eg: def __init__(self)
class Node:
def __init__(self, v):
self.left = None #none = empty state
self.right = None
self.value = v
class Tree:
def __init__(self):
self.root = None
def addNode(self, node, v1):
if node == None:
self.root = Node(v1)
#argument in Node need not have to be v
else:
if v1 < node.value:
#if 2nd value less than 1st value for example?
if not node.left:
node.left = Node(v1) #will not update value
else:
#if it is node.left,update value
self.addNode(node.left, v1)
else:
if not node.right:
node.right = Node(v1)
else:
self.addNode(node.right, v1)
def deserialize(s):
values = iter(s.split(','))
def deserializer(): #why putting def deserializer(node) gives me "node undefined error"?)
val = next(values)
if val == '?':
return None
else:
node = Node(int(val))
node.left = deserializer()
node.right = deserializer()
return node
return deserializer() #why putting return deserializer(node) gives me "node undefined error"?)
if __name__ == '__main__':
# Read input, numbers separated by commas
numbers = [int(n) for n in input().split(',')]
theTree = Tree()
for number in numbers:
theTree.addNode(theTree.root, number)
s2 = serialize(deserialize(s1))
print(s2)

BST with connection to parent - loop

I have problem with infinite loop in getMinimal() method. It works in this way :
1)Take node,
2)If node has other node on the left - go to other one.
3)Repeat as far as node has sth on the left side
4)Return the minimal node.
But sometimes it works in infinite loop for example from 1000 to 400, then to 4 then..to 1000! I have no ide where I make mistake. I reviewed this code many times,every single "pointer" to parent/left/right node is okay! Please - help.
Algorithm works okay to "handwritten" trees - ~20nodes. I wanted to test it in better cases - 2500nodes,generated by random lib (from -10k to 10k).
import random
class Node:
def __init__(self, val):
self.val = val
self.parent = None
self.right = None
self.left = None
# Class of node.
def str(self):
return str(self.val)
class MyTree:
def __init__(self, node):
self.root = node
def insert(self, node):
current = self.root
a = True
while a:
if node.val > current.val:
if current.right is not None:
current = current.right
continue
else:
current.right = node
node.parent = current
a = False
if node.val <= current.val:
if current.left is not None:
current = current.left
continue
else:
current.left = node
node.parent = current
a = False
def search(self, node):
current = self.root
while node.val != current.val:
if node.val > current.val:
current = current.right
continue
elif node.val <= current.val:
current = current.left
continue
if node.val == current.val:
return current
else:
print("There is no such node!")
def delete(self, node):
if isinstance(node, (float, int)):
node = self.search(node)
if node is self.root:
self.__deleteRoot()
return
else:
if node.right is None and node.left is None:
self.__deleteNN(node)
return
if node.right is None and node.left is not None:
self.__deleteLN(node)
return
if node.right is not None and node.left is None:
self.__deleteNR(node)
return
if node.right is not None and node.left is not None:
self.__deleteLR(node)
return
def __deleteNN(self, node):
if node.parent.left is node:
node.parent.left = None
if node.parent.right is node:
node.parent.right = None
def __deleteLN(self, node):
parent = node.parent
son = node.left
# parent replaced
if parent.left is node:
parent.left = son
if parent.right is node:
parent.right = son
son.parent = parent
def __deleteNR(self,node):
parent = node.parent
son = node.right
# replace parent
if parent.left is node:
parent.left = son
if parent.right is node:
parent.right = son
son.parent = parent
def __deleteLR(self, node):
minimal = self.getMinimal(node.right)
if minimal.parent.left is minimal:
minimal.parent.left = None
if minimal.parent.right is minimal:
minimal.parent.right = None
# parent of minimal done..
if node.parent.left is node:
node.parent.left = minimal
if node.parent.right is node:
node.parent.right = minimal
minimal.right = node.right
minimal.left = node.left
def getMinimal(self, node):
k = node
while k.left is not None:
k = k.left
return k
def getMax(self):
current = self.root
while current.right:
current = current.right
return current
def __trav(self, node):
if not node:
return
print(node.val)
self.__trav(node.left)
self.__trav(node.right)
def printTrav(self):
self.__trav(self.root)
def __deleteRoot(self):
if self.root.left is None and self.root.right is None:
self.root = None
return
if self.root.left is None and self.root.right is not None:
# left empty,right full
self.root.right.parent = None
self.root = self.root.right
return
if self.root.left is not None and self.root.right is None:
# right empty, left full
self.root.left.parent = None
self.root = self.root.left
return
# node has both children
if self.root.left is not None and self.root.right is not None:
temp = self.getMinimal(self.root.right) # minimal from right subtree
# sometimes it could be like this..
# r
# \
# x
if temp.parent.left is temp:
temp.parent.left = None
else:
temp.parent.right = None
self.root.left.parent = temp
self.root.right.parent = temp
temp.right = self.root.right
temp.left = self.root.left
self.root = temp
self.root.parent = None
return
def search(self, val):
node = self.root
if node.val == val:
return node
if val > node.val and node.right is not None:
node = node.right
if val < node.val and node.left is not None:
node = node.left
else:
print("There's no such value!")
return
def printMax(self):
print(self.getMax().val)
def printMin(self):
print(self.getMinimal(self.root).val)
arr=[None]*2500
for x in range(2500):
arr[x]=Node(random.randint(-10000,10000))
myTree = MyTree(arr[0])
for x in range(1,2500):
myTree.insert(arr[x])
for x in range(2500):
myTree.delete(arr[x])
It is suspicious that you define search twice.
Still that said, here is how I would debug this. I would modify your program to read from a file, try to run, and then detect an endless loop and bail out. Now write random files until you have one that causes you to crash.
Once you have a random file that shows the bug, the next step is to make it minimal. Here is a harness that can let you do that.
import itertools
flatten = itertools.chain.from_iterable
# bug_found should be a function that takes a list of elements and runs your test.
# example should be an array that demonstrates the bug.
def find_minimal (bug_found, example):
parts = [example]
while 1 < max(len(part) for part in parts):
i = 0
while i < len(parts):
if 1 == len(parts[i]):
i = i + 1
else:
part = parts.pop(i)
# Divide in 2.
mid = len(part)/2
part1 = part[0:mid]
part2 = part[mid:len(part)]
# Do we need part1?
parts.insert(i, part1)
if bug_found(flatten(parts)):
i = i + 1
parts.insert(i, part2)
else:
parts[i] = part2
# Do we need part2?
if bug_found_func(flatten(parts)):
i = i + 1
else:
parts.pop(i)
return list(flatten(parts))
Just let it run, and after some time it is likely to find a small example. Which will greatly aid in debugging.
So - I found 2 serious bugs in code. Both in LR ("standard" node and root). As I suspected - bugs were in pointers. Now tree is working (tested few times for 20k,30k and 100k nodes). Solved.

Python Serializing/Deserializing a binary tree

I'm trying to implement a serializing/deserializing algorithm in python for binary trees.
Here's my code:
class Node:
count = 1
def __init__(self, value):
self.value = value
self.left = None
self.right = None
def insert(self, value):
if self.value > value:
if self.left is None:
self.left = Node(value)
Node.count += 1
else:
self.left.insert(value)
else:
if self.right is None:
self.right = Node(value)
Node.count += 1
else:
self.right.insert(value)
# Using preorder
def serialize(root, serial):
if root != None:
serial.append(root.value)
serialize(root.left, serial)
serialize(root.right, serial)
else:
serial.append('x')
def deserialize(newRoot, serial):
if serial[0] == 'x':
serial.pop(0)
else:
if len(serial) > 0:
newRoot = Node(serial.pop(0))
print(newRoot.value)
deserialize(newRoot.left, serial)
deserialize(newRoot.right, serial)
print("This program serializes a tree\n")
root = Node(3)
root.insert(1)
root.insert(2)
root.insert(4)
root.insert(5)
root.insert(0)
# Serialize
serial = []
serialize(root, serial)
print(serial)
# Deserialize
newRoot = Node(None)
deserialize(newRoot, serial)
print(newRoot.value)
The problem is, newRoot doesn't get updated by deserialize because python passes it by value. How do I get around this, preferably in the most elegant way? In C/C++, I would just pass a pointer to newRoot and it should get updated accordingly. Thanks!
You can return the newly created nodes and assign them as left and right nodes. Also poping the first element of a list is more costly than poping the last element, so reverseing the list at the beginning and then using it in the recursion will be more performant in your case. So the code will become something like:
def deserialize(serial):
serial.reverse()
return _deserialize(serial)
def _deserialize(serial):
if not serial:
return None
node = None
value = serial.pop()
if value != 'x':
node = Node(value)
node.left = _deserialize(serial)
node.right = _deserialize(serial)
return node
root = deserialize(serial)
print(root.value)
You can create left and right subtree within deserialize function and return the root.
Here is my code:
node_list = []
MARKER = -1
class Node:
def __init__(self, val):
self.val = val
self.left = None
self.right = None
def serialize(root):
if root is None:
node_list.append(MARKER)
return
node_list.append(root.val)
serialize(root.left)
serialize(root.right)
def deserialize(root, node_list):
if node_list:
val = node_list.pop(0)
else:
return
if val == MARKER:
return
# Create root, left and right recursively
root = Node(val)
root.left = deserialize(root.left, node_list)
root.right = deserialize(root.right, node_list)
return root
def inorder_traversal(root):
if root:
inorder_traversal(root.left)
print(root.val, end=' ')
inorder_traversal(root.right)
if __name__=="__main__":
# Create tree
root = Node(20)
root.left = Node(8)
root.right = Node(22)
root.left.left = Node(4)
root.left.right = Node(12)
root.left.right.left = Node(10)
root.left.right.right = Node(14)
print("Inorder traversal before serialization..")
inorder_traversal(root)
print('')
# serialize the tree and insert elements into a list
serialize(root)
print(node_list)
root1 = None
root1 = deserialize(root1, node_list)
print("Inorder traversal after deserialization..")
inorder_traversal(root1)
print('')

Cofusing about lookup node with binary tree

I build a binary tree with python code, now I could print it in order with testTree.printInorder(testTree.root). I have tried to lookup some node ,and the function findNode doesn't work anymore . print testTree.findNode(testTree.root,20) whatever I put in just return None.
class TreeNode:
def __init__(self, value):
self.left = None;
self.right = None;
self.data = value;
class Tree:
def __init__(self):
self.root = None
def addNode(self,node,value):
if node == None:
self.root = TreeNode(value)
else:
if value < node.data:
if node.left == None:
node.left = TreeNode(value)
else:
self.addNode(node.left,value)
else:
if node.right == None:
node.right = TreeNode(value)
else:
self.addNode(node.right,value)
def printInorder(self,node):
if node != None:
self.printInorder(node.left)
print node.data
self.printInorder(node.right)
def findNode(self,node,value):
if self.root != None:
if value == node.data:
return node.data
elif value < node.data and node.left != None:
self.findNode(node.left,value)
elif value > node.data and node.right != None:
self.findNode(node.right,value)
else:
return None
testTree = Tree()
testTree.addNode(testTree.root, 200)
testTree.addNode(testTree.root, 300)
testTree.addNode(testTree.root, 100)
testTree.addNode(testTree.root, 30)
testTree.addNode(testTree.root, 20)
#testTree.printInorder(testTree.root)
print testTree.findNode(testTree.root,20)
Any function without an explicit return will return None.
You have not returned the recursive calls within findNode. So, here.
if value == node.data:
return node.data
elif value < node.data and node.left != None:
return self.findNode(node.left,value)
elif value > node.data and node.right != None:
return self.findNode(node.right,value)
Now, I can't help but thinking this is a bit noisy. You'll always start adding from the root, yes?
testTree.addNode(testTree.root, 200)
You could rather do this
testTree.addNode(200)
And to do that, you basically implement your methods on the TreeNode class instead. So, for the addNode.
You could also "return up" from the recursion, rather than "pass down" the nodes as parameters.
class TreeNode:
def __init__(self, value):
self.left = None
self.right = None
self.data = value
def addNode(self,value):
if self.data == None: # Ideally, should never end-up here
self.data = value
else:
if value < self.data:
if self.left == None:
self.left = TreeNode(value)
else:
self.left = self.left.addNode(value)
else:
if self.right == None:
self.right = TreeNode(value)
else:
self.right = self.right.addNode(value)
return self # Return back up the recursion
Then, in the Tree class, just delegate the addNode responsibility to the root
class Tree:
def __init__(self):
self.root = None
def addNode(self,value):
if self.root == None:
self.root = TreeNode(value)
else:
self.root = self.root.addNode(value)
When you recurse to children in findNode you need to return the result, otherwise the function will implicitly return None:
def findNode(self,node,value):
if self.root != None:
if value == node.data:
return node.data
elif value < node.data and node.left != None:
return self.findNode(node.left,value) # Added return
elif value > node.data and node.right != None:
return self.findNode(node.right,value) # Added return
else:
return None

Categories