How to make a BST from a .txt file - python

This is my BST code:
class BinarySearchTree(BinaryTree):
def insert(self, value):
parent = None
x = self.root
while(x):
parent = x
if value < x.data:
x = x.left
else:
x = x.right
if parent is None:
self.root = Node(value)
elif value < parent.data:
parent.left = Node(value)
else:
parent.right = Node(value)
def search(self, value):
return self._search(value, self.root)
def _search(self, value, node):
if node is None:
return node
if node.data == value:
return BinarySearchTree(node)
if value < node.data:
return self._search(value, node.left)
return self._search(value, node.right)
and i want it to open a .txt file that have words like this
apple
grape
banana
lemon
and only show the first 4 levels

Related

Binary Search Tree Deletion in Python Recursive

I know this question has been asked hundreds of times, but I haven't been able to find a question exactly similar to my implementation of binary search trees. I have implemented a basic binary search tree class in Python; the only function that currently is not complete is the delete function. I understand the algorithm for how it is done, and the three different deletion cases. The one restriction I have is that I don't want to use parent pointers, and I am unsure how to keep track of the previous node so that I can set it appropriately based on deletion.
For example, for the case where the node to be deleted has one child, I don't know how I can keep track of the previous node (parent node) so that I can update its left or right child to the children of the node that was deleted.
I don't want to use parent pointers for each node in this deletion function
def delete(self, value):
if self.root:
self.__delete(value, self.root)
def __delete(self, value, curr_node):
#How do I keep track of previous (parent) node here?
if curr_node:
if value < curr_node.value:
self.__delete(value, curr_node.left)
elif value > curr_node.value:
self.__delete(value, curr_node.right)
elif value == curr_node.value:
#Case 1: Node is leaf (no children)
#Case 2: Node has one child: the parent node has its child updated to the deleted node's children
#Case 3: Node has two children
pass
Full code
class Node:
def __init__(self, value):
self.value = value
self.left = None
self.right = None
def __str__(self):
return str(self.value)
class BST:
def __init__(self):
self.root = None
def insert(self, value):
if not self.root:
self.root = Node(value)
else:
self.__insert(value, self.root)
def __insert(self, value, curr_node):
if value < curr_node.value:
if not curr_node.left:
curr_node.left = Node(value)
else:
self.__insert(value, curr_node.left)
elif value > curr_node.value:
if not curr_node.right:
curr_node.right = Node(value)
else:
self.__insert(value, curr_node.right)
else:
print("Node already exists!")
def lookup(self, value):
if value == self.root.value:
return self.root
else:
self.__lookup(value, self.root)
def __lookup(self, value, curr_node):
if value < curr_node.value:
if value == curr_node.left.value:
return curr_node.left
else:
self.__lookup(value, curr_node.left)
elif value > curr_node.value:
if value == curr_node.right.value:
return curr_node.right
else:
self.__lookup(value, curr_node.right)
else:
print("Value doesn't exist!")
def height(self):
if not self.root:
return 0
else:
return self.__height(self.root)
def __height(self, curr_node):
if not curr_node:
return 0
return 1 + max(self.__height(curr_node.left), self.__height(curr_node.right))
def min_value(self):
if self.root:
return self.__min_value(self.root)
def __min_value(self, curr_node):
if not curr_node.left:
return curr_node
return self.__min_value(curr_node.left)
def max_value(self):
if self.root:
return self.__max_value(self.root)
def __max_value(self, curr_node):
if not curr_node.right:
return curr_node
return self.__max_value(curr_node.right)
def count_nodes(self):
if not self.root:
return 0
else:
return self.__count_nodes(self.root)
def __count_nodes(self, curr_node):
if not curr_node:
return 0
return 1 + self.__count_nodes(curr_node.left) + self.__count_nodes(curr_node.right)
def inorder_traversal(self):
if self.root:
return self.__inorder_traversal(self.root)
def __inorder_traversal(self, curr_node):
path = []
if curr_node:
path.extend(self.__inorder_traversal(curr_node.left))
path.append(curr_node.value)
path.extend(self.__inorder_traversal(curr_node.right))
return path
def __num_children(self, curr_node):
if not curr_node.left and not curr_node.right:
return 0
elif curr_node.right or curr_node.left:
return 1
elif curr_node.right and curr_node.left:
return 2
def delete(self, value):
if self.root:
self.__delete(value, self.root)
def __delete(self, value, curr_node):
#How do I keep track of previous (parent) node here?
if curr_node:
if value < curr_node.value:
self.__delete(value, curr_node.left)
elif value > curr_node.value:
self.__delete(value, curr_node.right)
elif value == curr_node.value:
#Case 1: Node is leaf (no children)
#Case 2: Node has one child: the parent node has its child updated to the deleted node's children
#Case 3: Node has two children
pass

BST with connection to parent - loop

I have problem with infinite loop in getMinimal() method. It works in this way :
1)Take node,
2)If node has other node on the left - go to other one.
3)Repeat as far as node has sth on the left side
4)Return the minimal node.
But sometimes it works in infinite loop for example from 1000 to 400, then to 4 then..to 1000! I have no ide where I make mistake. I reviewed this code many times,every single "pointer" to parent/left/right node is okay! Please - help.
Algorithm works okay to "handwritten" trees - ~20nodes. I wanted to test it in better cases - 2500nodes,generated by random lib (from -10k to 10k).
import random
class Node:
def __init__(self, val):
self.val = val
self.parent = None
self.right = None
self.left = None
# Class of node.
def str(self):
return str(self.val)
class MyTree:
def __init__(self, node):
self.root = node
def insert(self, node):
current = self.root
a = True
while a:
if node.val > current.val:
if current.right is not None:
current = current.right
continue
else:
current.right = node
node.parent = current
a = False
if node.val <= current.val:
if current.left is not None:
current = current.left
continue
else:
current.left = node
node.parent = current
a = False
def search(self, node):
current = self.root
while node.val != current.val:
if node.val > current.val:
current = current.right
continue
elif node.val <= current.val:
current = current.left
continue
if node.val == current.val:
return current
else:
print("There is no such node!")
def delete(self, node):
if isinstance(node, (float, int)):
node = self.search(node)
if node is self.root:
self.__deleteRoot()
return
else:
if node.right is None and node.left is None:
self.__deleteNN(node)
return
if node.right is None and node.left is not None:
self.__deleteLN(node)
return
if node.right is not None and node.left is None:
self.__deleteNR(node)
return
if node.right is not None and node.left is not None:
self.__deleteLR(node)
return
def __deleteNN(self, node):
if node.parent.left is node:
node.parent.left = None
if node.parent.right is node:
node.parent.right = None
def __deleteLN(self, node):
parent = node.parent
son = node.left
# parent replaced
if parent.left is node:
parent.left = son
if parent.right is node:
parent.right = son
son.parent = parent
def __deleteNR(self,node):
parent = node.parent
son = node.right
# replace parent
if parent.left is node:
parent.left = son
if parent.right is node:
parent.right = son
son.parent = parent
def __deleteLR(self, node):
minimal = self.getMinimal(node.right)
if minimal.parent.left is minimal:
minimal.parent.left = None
if minimal.parent.right is minimal:
minimal.parent.right = None
# parent of minimal done..
if node.parent.left is node:
node.parent.left = minimal
if node.parent.right is node:
node.parent.right = minimal
minimal.right = node.right
minimal.left = node.left
def getMinimal(self, node):
k = node
while k.left is not None:
k = k.left
return k
def getMax(self):
current = self.root
while current.right:
current = current.right
return current
def __trav(self, node):
if not node:
return
print(node.val)
self.__trav(node.left)
self.__trav(node.right)
def printTrav(self):
self.__trav(self.root)
def __deleteRoot(self):
if self.root.left is None and self.root.right is None:
self.root = None
return
if self.root.left is None and self.root.right is not None:
# left empty,right full
self.root.right.parent = None
self.root = self.root.right
return
if self.root.left is not None and self.root.right is None:
# right empty, left full
self.root.left.parent = None
self.root = self.root.left
return
# node has both children
if self.root.left is not None and self.root.right is not None:
temp = self.getMinimal(self.root.right) # minimal from right subtree
# sometimes it could be like this..
# r
# \
# x
if temp.parent.left is temp:
temp.parent.left = None
else:
temp.parent.right = None
self.root.left.parent = temp
self.root.right.parent = temp
temp.right = self.root.right
temp.left = self.root.left
self.root = temp
self.root.parent = None
return
def search(self, val):
node = self.root
if node.val == val:
return node
if val > node.val and node.right is not None:
node = node.right
if val < node.val and node.left is not None:
node = node.left
else:
print("There's no such value!")
return
def printMax(self):
print(self.getMax().val)
def printMin(self):
print(self.getMinimal(self.root).val)
arr=[None]*2500
for x in range(2500):
arr[x]=Node(random.randint(-10000,10000))
myTree = MyTree(arr[0])
for x in range(1,2500):
myTree.insert(arr[x])
for x in range(2500):
myTree.delete(arr[x])
It is suspicious that you define search twice.
Still that said, here is how I would debug this. I would modify your program to read from a file, try to run, and then detect an endless loop and bail out. Now write random files until you have one that causes you to crash.
Once you have a random file that shows the bug, the next step is to make it minimal. Here is a harness that can let you do that.
import itertools
flatten = itertools.chain.from_iterable
# bug_found should be a function that takes a list of elements and runs your test.
# example should be an array that demonstrates the bug.
def find_minimal (bug_found, example):
parts = [example]
while 1 < max(len(part) for part in parts):
i = 0
while i < len(parts):
if 1 == len(parts[i]):
i = i + 1
else:
part = parts.pop(i)
# Divide in 2.
mid = len(part)/2
part1 = part[0:mid]
part2 = part[mid:len(part)]
# Do we need part1?
parts.insert(i, part1)
if bug_found(flatten(parts)):
i = i + 1
parts.insert(i, part2)
else:
parts[i] = part2
# Do we need part2?
if bug_found_func(flatten(parts)):
i = i + 1
else:
parts.pop(i)
return list(flatten(parts))
Just let it run, and after some time it is likely to find a small example. Which will greatly aid in debugging.
So - I found 2 serious bugs in code. Both in LR ("standard" node and root). As I suspected - bugs were in pointers. Now tree is working (tested few times for 20k,30k and 100k nodes). Solved.

Cofusing about lookup node with binary tree

I build a binary tree with python code, now I could print it in order with testTree.printInorder(testTree.root). I have tried to lookup some node ,and the function findNode doesn't work anymore . print testTree.findNode(testTree.root,20) whatever I put in just return None.
class TreeNode:
def __init__(self, value):
self.left = None;
self.right = None;
self.data = value;
class Tree:
def __init__(self):
self.root = None
def addNode(self,node,value):
if node == None:
self.root = TreeNode(value)
else:
if value < node.data:
if node.left == None:
node.left = TreeNode(value)
else:
self.addNode(node.left,value)
else:
if node.right == None:
node.right = TreeNode(value)
else:
self.addNode(node.right,value)
def printInorder(self,node):
if node != None:
self.printInorder(node.left)
print node.data
self.printInorder(node.right)
def findNode(self,node,value):
if self.root != None:
if value == node.data:
return node.data
elif value < node.data and node.left != None:
self.findNode(node.left,value)
elif value > node.data and node.right != None:
self.findNode(node.right,value)
else:
return None
testTree = Tree()
testTree.addNode(testTree.root, 200)
testTree.addNode(testTree.root, 300)
testTree.addNode(testTree.root, 100)
testTree.addNode(testTree.root, 30)
testTree.addNode(testTree.root, 20)
#testTree.printInorder(testTree.root)
print testTree.findNode(testTree.root,20)
Any function without an explicit return will return None.
You have not returned the recursive calls within findNode. So, here.
if value == node.data:
return node.data
elif value < node.data and node.left != None:
return self.findNode(node.left,value)
elif value > node.data and node.right != None:
return self.findNode(node.right,value)
Now, I can't help but thinking this is a bit noisy. You'll always start adding from the root, yes?
testTree.addNode(testTree.root, 200)
You could rather do this
testTree.addNode(200)
And to do that, you basically implement your methods on the TreeNode class instead. So, for the addNode.
You could also "return up" from the recursion, rather than "pass down" the nodes as parameters.
class TreeNode:
def __init__(self, value):
self.left = None
self.right = None
self.data = value
def addNode(self,value):
if self.data == None: # Ideally, should never end-up here
self.data = value
else:
if value < self.data:
if self.left == None:
self.left = TreeNode(value)
else:
self.left = self.left.addNode(value)
else:
if self.right == None:
self.right = TreeNode(value)
else:
self.right = self.right.addNode(value)
return self # Return back up the recursion
Then, in the Tree class, just delegate the addNode responsibility to the root
class Tree:
def __init__(self):
self.root = None
def addNode(self,value):
if self.root == None:
self.root = TreeNode(value)
else:
self.root = self.root.addNode(value)
When you recurse to children in findNode you need to return the result, otherwise the function will implicitly return None:
def findNode(self,node,value):
if self.root != None:
if value == node.data:
return node.data
elif value < node.data and node.left != None:
return self.findNode(node.left,value) # Added return
elif value > node.data and node.right != None:
return self.findNode(node.right,value) # Added return
else:
return None

Inorder, Preorder, Postorder traversal not working

I am trying to implement binary search tree in pythonand trying to print the nodes of a tree in inorder, preorder and postorder but unfortunately my results are not correct.
Here is my code:
class Node:
def __init__(self, val):
self.v = val
self.l = None
self.r = None
class BinarySearchTree:
def __init__(self):
self.root = None
def get_root(self):
return self.root
def insert(self, val):
if self.root is None:
self.root = Node(val)
else:
self._add(val, self.root)
def _add(self, val, node):
if val < node.l:
if node.l is None:
node.l = Node(val)
else:
self._add(val, node.l)
else:
if node.r is None:
node.r = Node(val)
else:
self._add(val, node.r)
def find(self, val):
if self.root is None:
return None
else:
self._find(val, self.root)
def _find(self, val, node):
if val == node.v:
return Node
else:
if val < node.v and node is not None:
self._find(val, node.l)
if val > node.v and node is not None:
self._find(val, node.r)
def delete_tree(self):
self.root = None
def print_in_order(self): # Left, Node, Right
if self.root is None:
return None
else:
self._in_order(self.root)
def _in_order(self, node):
if node is not None:
self._in_order(node.l)
print str(node.v) + ' '
self._in_order(node.r)
def print_pre_order(self): # Node, Left, Right
if self.root is None:
return None
else:
self._pre_order(self.root)
def _pre_order(self, node):
if node is not None:
print str(node.v) + ' '
self._pre_order(node.l)
self._pre_order(node.r)
def print_post_order(self): # Left, Right, Node
if self.root is None:
return None
else:
self._post_order(self.root)
def _post_order(self, node):
if node is not None:
self._post_order(node.l)
self._post_order(node.r)
print str(node.v) + ' '
if __name__ == '__main__':
t = BinarySearchTree()
t.insert(20)
t.insert(10)
t.insert(30)
t.insert(5)
t.insert(15)
t.insert(25)
t.insert(35)
print 'In Order Traversal: \n', t.print_in_order()
print '\nPre Order Traversal: \n', t.print_pre_order()
print '\nPost Order Traversal:\n', t.print_post_order()
Can someone please tell me what am I doing wrong?
My output is in the following: Inorder and Preorder is returning the same output.
In Order Traversal:
20
10
30
5
15
25
35
None
Pre Order Traversal:
20
10
30
5
15
25
35
None
Post Order Traversal:
35
25
15
5
30
10
20
None
Traversal functions are okay. But in _add, the following comparison:
if val < node.l:
...
should be replaced with:
if val < node.v:
...
to compare new value with current node value, instead of the left node which cause wrong comparison result; results in wrong tree structure.

Inorder tree walk not working

I am trying to practice BST tree implementation with python, following is my code,
import pdb
class Node():
def __init__(self, parent=None, key=None):
self.parent = parent if parent != None else None
self.left = None
self.right = None
self.key = key if key != None else None
class BST():
def __init__(self):
self.root = Node()
def insertKey (self, key):
#pdb.set_trace()
# transverse till we find empty position
if (self.root.key == None):
self.root.key = key
else:
node = self.root
while (node.left != None and node.right != None):
if node.key < key:
node = node.right
else:
node = node.left
#we have node either left or right is empty
if node.key < key:
node.right = Node (node, key)
else:
node.left = Node (node, key)
def inOrder (self, node):
#pdb.set_trace()
if node != None:
self.inOrder (node.left)
print node.key
self.inOrder (node.right)
def printLeft (self, node):
if node != None:
self.printLeft (node)
print node.key
def debugAll (self):
self.inOrder (self.root)
#self.printLeft (self.root)
def fromArray (self, numbers):
srt = sorted(numbers)
print srt
length = len(srt)
mid = length/2
rootEle = srt[mid]
self.insertKey (rootEle)
for i in range (1, mid+1):
try:
#pdb.set_trace()
self.insertKey (srt[mid-i])
self.insertKey (srt[mid+i])
except IndexError:
pass
bst = BST()
bst.fromArray ([1,2,4,3,6,5,10,8,9])
bst.debugAll ()
However the result of the inOrder tree walk is unexpected
1
4
5
6
10
I tried to debug through the pdb while inserting the keys, the keys are properly inserted, but when transversing the tree, some Node are skipped because they're marked as 'NoneType'. May be I am missing out on some language specifics here.
For a start, the code you have below isn't right:
while (node.left != None and node.right != None):
if node.key < key:
node = node.right
else:
node = node.left
It will stop descending if either the left or the right node doesn't exist.
EDIT: If you modify the loop like this, it works. Could be better optimized, but it's a start...
class Node():
def __init__(self, parent=None, key=None):
self.parent = parent if parent != None else None
self.left = None
self.right = None
self.key = key if key != None else None
class BST():
def __init__(self):
self.root = Node()
def insertKey (self, key):
#pdb.set_trace()
# transverse till we find empty position
if (self.root.key == None):
self.root.key = key
else:
node = self.root
while 1:
if node.key < key:
if node.right is None:
node.right = Node(node, key)
break
else:
node = node.right
else:
if node.left is None:
node.left = Node(node, key)
break
else:
node = node.left
def inOrder (self, node):
#pdb.set_trace()
if node != None:
self.inOrder (node.left)
print node.key
self.inOrder (node.right)
def printLeft (self, node):
if node != None:
self.printLeft (node)
print node.key
def debugAll (self):
self.inOrder (self.root)
#self.printLeft (self.root)
def fromArray (self, numbers):
srt = sorted(numbers)
print srt
length = len(srt)
mid = length/2
rootEle = srt[mid]
self.insertKey (rootEle)
for i in range (1, mid+1):
try:
#pdb.set_trace()
self.insertKey (srt[mid-i])
self.insertKey (srt[mid+i])
except IndexError:
pass
bst = BST()
bst.fromArray ([1,2,4,3,6,5,10,8,9])
bst.debugAll ()

Categories