Building a Binary Search Tree from a file - python

I have a text file of lines in the format
2 0 0
7 0 0
4 1 1
10 0 0
9 0 1
8 1 1
These lines represent the data in a binary search tree where the first element is the node data, the second is whether or not a left child exists ( 0 if no, 1 if yes) and the third is whether or not a right child exists (0 if no, 1 if yes)
I have a class called "BinarySearchTree" which has the following initialization function
def __init__(self, value=None):
# Initializes the tree with a value node, a left child and a right child
self.leftChild = None
self.rightChild = None
self.height = 1
self.value = value
I also have a stack class with the following "push" and "pop" functions:
def push(self, item):
# Adds an item to the beginning of the stack
ending = self.stack
self.stack = [item] + [ending]
def pop(self):
# Removes and returns the first element from the stack
if self.isEmpty():
return None
top_element = self.stack[0]
self.stack = self.stack[1:]
return top_element
I am trying to create a binary search tree instance from the lines in the text file and using the stack class. So far I have:
def loadTreeFromFile(filename):
binarySearchTree = stack.Stack()
with open(filename) as file:
# gets a list containing only the elements in the txt file
for level in file.readlines():
nodeInfo = level.rstrip().split()
data, lc, rc = int(nodeInfo[0]), int(nodeInfo[1]), int(nodeInfo[2])
print(data, lc, rc)
if rc == 1:
right_tree = binarySearchTree.pop()
if lc == 1:
left_tree = binarySearchTree.pop()
newTree = BinarySearchTree(data)
if rc == 1:
newTree.rightChild = right_tree
if lc == 1:
newTree.leftChild = left_tree
binarySearchTree.push(newTree)
return newTree
I am running into the problem when I try to display the BST, I get 8: [[[<__main__.BinarySearchTree object at 0x1033e4390>, []]], 9: [None, 10: [None, None]]] (I have a display function written for the BST class so this is not the problem) AND when I try to do anything with this newly created BST (such as get the depth, search it, etc), I get errors. Any help is much appreciated, thanks .

Related

Why does my binary tree behave differently given different byte strings?

I have been practicing recursion with python and currently am attempting to stop recursing all the way down to single bytes and instead stop at a certain byte size. In this example I choose 2, so in my code if a either of the potential children to be spawned is less than 2, it won't recurse and will just return the current node. It works fine with the first byte string, but fails with the next two. Why is this happening and how can I fix it?
Correct output for 1st b: stops recursing/creating children at size 3, because next generation of children have at least 1 child smaller than
size 2
b'\x00\x01\x00\x02\x00\x03'
b'\x00\x01\x00'
b'\x02\x00\x03'
Incorrect output for 2nd b: Appears to be recursing until single bytes
b'L_]ju\x87\xd4\x14j\x1b> \xc52'
b'L_]ju\x87\xd4'
b'L_]'
b'ju\x87\xd4'
b'ju'
b'\x87\xd4'
b'\x14j\x1b> \xc52'
b'\x14j\x1b'
b'> \xc52'
b'> '
b'\xc52'
from random import randbytes
class Node:
def __init__(self, value):
self.value = value
self.children = []
self.parent = None
self.bytesize = len(value)
def make_children(self, child):
child.parent = self
self.children.append(child)
def print_tree(self):
print(self.value)
if len(self.children) > 0: # leaf node case
for child in self.children:
child.print_tree()
def build_tree(value_list):
root = Node(value_list)
#if len(value_list) == 1:
if len(value_list) / 2 < 2: # MODIFY TO STOP RECURSING IF SIZE OF CHILDREN WILL BE BELOW 2
return root
mid_point = len(value_list) // 2
left_half = value_list[:mid_point]
right_half = value_list[mid_point:]
child1 = build_tree(left_half)
root.make_children(child1)
child2 = build_tree(right_half)
root.make_children(child2)
return root
if __name__ == '__main__':
#list1 = [12, 7, 8, 15, 9]
b = b'\x00\x01\x00\x02\x00\x03'
#b = b'\x4c\x5f\x5d\x6a\x75\x87\xd4\x14\x6a\x1b\x3e\x20\xc5\x32'
#b = randbytes(6)
file = build_tree(b)
file.print_tree()
print(len(b))
Your code is actually working as intended. The two byte strings you mention both have 2 bytes, not 1.
Here is one way to display a bytestring that might make it more clear:
def print_string(s):
print(' '.join(map('{:#2x}'.format, s)))
print_string(b'> ')
# 0x3e 0x20
print_string(b'\xc52')
# 0xc5 0x32

Stuck at an algo which searches indexes of a list

The function searches the index of a value and if it is called again then it returns the next index of that value. Here is what i tried, where I'm getting error is that my list is not getting overridden and after every call I am getting same result.
Note: I have not included linked list classes here
If anyone knows a better way to make this function please suggest
class Node:
def __init__(self,value):
self.value = value
self.next = None
self.previous = None
class LinkedList:
def __init__(self):
self.head = None
self.tail = None
class DynamicSharpSearch(LinkedList):
def __init__(self, data):
super().__init__()
self.data = data
self.count = 0
self.lim = 0 # using in search function
for i in self.data:
self.InsertatEnd(i)
self.count += 1
self.demlst = [-1 for i in range(self.count)] # using in Search function only
self.seclist = self.demlst
def Search(self,n):
x = self.head
#demlst = [-1 for i in range(self.count)]
y = 0
tst = -1 # checks value if not in our data
##############################
for i in range(self.count):
if n == x.value:
# demlst.append(i)
self.demlst[y] = i
tst = 0
x = x.next
y += 1
##############################
if tst == -1:
return -1
else:
pass
"""Demlist containes "indexes" """
for i in range(self.count):
if self.seclist[i] >= 0:
#y = self.seclist[i]
self.seclist[i] = -1
self.lim = i
return i
obj = DynamicSharpSearch([53,4,52,7,5,4,5,5,5,6,4,2,4,5,459]) # c = 6
print(obj.Search(5))
print(obj.Search(5))
print(obj.Search(5))
What output I am getting:
4
4
4
I would use a dictionary to keep track of the last returned index for each value.
When the Search method is called, it looks to see if it's looked for that value before: start_index = self.last_returned.get(n, 0). If start_index is >0, then you fast-forward to that index before initiating the search. Once you've found what you're looking for, update the dictionary: self.last_returned[n] = returned_index + 1. Why "+1"? Because otherwise you'll get what you're getting right now, the same index returned over and over. You need to start searching after the last index you returned. (You'd want to make sure you're not storing a value which would result in an IndexError, however.)

Binary Search Tree Frequency Counter

I need to read a text file, strip the unnecessary punctuation, lowercase the words and use binary search tree function to make a word binary search tree that consists of the words in the file.
We are asked to count the frequency of recurring words and asked for a total word count and total unique word count.
So far I've got the punctuation resolved, file read done, lowercase done, binary search tree basically done and I just need to figure out how to implement the "frequency" counter in the code.
My code is as follows:
class BSearchTree :
class _Node :
def __init__(self, word, left = None, right = None) :
self._word = word
self._count = 0
self._left = left
self._right = right
def __init__(self) :
self._root = None
self._wordc = 0
self._each = 0
def isEmpty(self) :
return self._root == None
def search(self, word) :
probe = self._root
while (probe != None) :
if word == probe._word :
return probe
if word < probe._value :
probe = probe._left
else :
probe = probe._right
return None
def insert(self, word) :
if self.isEmpty() :
self._root = self._Node(word)
self._root._freq += 1 <- is this correct?
return
parent = None #to keep track of parent
#we need above information to adjust
#link of parent of new node later
probe = self._root
while (probe != None) :
if word < probe._word : # go to left tree
parent = probe # before we go to child, save parent
probe = probe._left
elif word > probe._word : # go to right tree
parent = probe # before we go to child, save parent
probe = probe._right
if (word < parent._word) : #new value will be new left child
parent._left = self._Node(word)
else : #new value will be new right child
parent._right = self._Node(word)
cause formatting is killing me, this is the latter part of it.
class NotPresent(Exception) :
pass
def main():
t=BST()
file = open("sample.txt")
line = file.readline()
file.close()
#for word in line:
# t.insert(word)
# Line above crashes program because there are too many
# words to add. Lines on bottom tests BST class
t.insert('all')
t.insert('high')
t.insert('fly')
t.insert('can')
t.insert('boars')
#t.insert('all') <- how do i handle duplicates by making
t.inOrder() #extras add to the nodes frequency?
Thank you for helping/trying to help!
Firstly, it's better to initialize a Node's _freq by 1 than doing that in in BST's insert()
(1 more: In python coding convention, white spaces in writing default argument values are not recommended.)
def __init__(self, word, left=None, right=None) :
self._word = word
self._freq = 1
self._left = left
self._right = right
and just add the last 3 lines:
probe = self._root
while (probe != None) :
if word < probe._word : # go to left tree
parent = probe # before we go to child, save parent
probe = probe._left
elif word > probe._word : # go to right tree
parent = probe # before we go to child, save parent
probe = probe._right
else:
probe._freq += 1
return

Inserting into n-child tree in Python

I am trying to implement a tree for the travelling salesperson problem. My particular tree has 5 destinations which are fully connected to each other.
One of the destinations is guaranteed to always be the starting destination and that you are only allowed to visit each destination once with the exception of the starting destination which you have to return to (ie if you have [1,2,3,4,5] with 1 the starting destination, a possible sequence of moves would be 1-3-5-2-4-1)
I tried implementing a tree in python with the following code (I brute forced it since I know the maximum depth is going to be 5).
class Node(object):
def __init__(self,value, city, children = [None, None, None, None]):
self.value = value
self.city = city
self.children = children
class Tree(object):
def __init__(self):
self.root = None
def insert(self,value,city):
newNode = Node(value,city)
if self.root is None:
self.root = newNode
else:
self._insert(1, newNode)
def _insert(self,depth, newNode):
if depth is 1:
for x in range(0,4):
if self.root.children[x] is None:
self.root.children[x] = newNode
return
elif self.root.children[3] is not None:
self._insert(2, newNode)
return
if depth is 2:
for x in range(0,4):
for y in range(0,3):
if self.root.children[x].children[y] is None:
self.root.children[x].children[y] = newNode
return
elif self.root.children[3].children[2] is not None:
self._insert(3, newNode)
return
if depth is 3:
for w in range(0,4):
for x in range(0,3):
for y in range(0,2):
if self.root.children[w].children[x].children[y] is None:
self.root.children[w].children[x].children[y] = newNode
return
elif self.root.children[3].children[2].children[1] is not None:
self._insert(4,newNode)
return
if depth is 4:
for w in range(0,4):
for x in range(0,3):
for y in range(0,2):
for z in range(0,1):
if self.root.children[w].children[x].children[y].children[z] is None:
self.root.children[w].children[x].children[y].children[z] = newNode
return
elif self.root.children[3].children[2].children[1].children[0] is not None:
self._insert(5,newNode)
return
if depth is 5:
for w in range(0,4):
for x in range(0,3):
for y in range(0,2):
for z in range(0,1):
for u in range(0,1):
if self.root.children[w].children[x].children[y].children[z].children[u] is None:
self.root.children[w].children[x].children[y].children[z].children[u] = newNode
return
elif self.root.children[3].children[2].children[1].children[0].children[0] is not None and w is 3 and x is 2 and y is 1 and z is 0 and u is 0:
print "The table is full"
def delete(self):
self.root = None
x = Tree()
x.insert(0, "Pretoria")
x.insert(60, "Johannesburg")
x.insert(1200, "Cape Town")
x.insert (600, "Durban")
x.insert(400, "Bloemfontein")
x.insert(1400, "Port Elizabeth")
My root and first level populate correctly but all the children nodes of the second, third, fourth and fifth level all populate exactly the same as the first level. When I checked their memory, they all populated the exact same memory space and I have no idea why. This happens when the following line of code runs:
x.insert(1400, "Port Elizabeth")
The tree for some reason is fully populated at this point despite only having 5 entries.
I tried using pointers at first but the same issue crops up.
Long story short, how would one go about inserting into an n-ary tree with decreasing n as you increase in depth?
This particular tree has the following attributes:
Root: 4 children per node (1 node with 4 children)
Level 1: 3 children per node (4 nodes with 3 children)
Level 2: 2 children per node (12 nodes with 2 children)
level 3: 1 child per node (24 nodes with 1 child)
level 4: 1 child per node (24 nodes with 1 child) (this is final destination in the TSP)

Implementing Binary Search Tree (Python)

I have the task to perform some basic operations on Binary Search Trees and I'm not sure what is the clever way to do it.
I know that the usual way would be to write a class for the nodes and one for the tree so that I can build up my tree from given values and perform certain tasks on it. The thing is, I'm already getting the tree as a list and since BSTs are not unique, there won't come any good from it if I take each value and build the tree myself.
So... I'm getting a list like this:
11 9 2 13 _, 4 18 2 14 _, 2 10 _ 11 4, 14 16 4 _ _, 13 0 11 _ _ | 10 | 7
which means:
key value parent left right, ... | value1 | value2
So as you see the BST is given explicitly. My tasks are to do a level-print of the tree, return the path from root to value1, do a rotate-right operation on the subtree that has value1, then delete value1 and then insert value2.
What would be an efficient way to tackle this problem?
Here is one possible way of implementing the tree. Hope it helps. Though this contains insertions and popular traversals, not rotations or deletions.
Reference: http://www.thelearningpoint.net/computer-science/learning-python-programming-and-data-structures/learning-python-programming-and-data-structures--tutorial-20--graphs-breadth-and-depth-first-search-bfsdfs-dijkstra-algorithm-topological-search
'''
Binary Search Tree is a binary tree(that is every node has two branches),
in which the values contained in the left subtree is always less than the
root of that subtree, and the values contained in the right subtree is
always greater than the value of the root of the right subtree.
For more information about binary search trees, refer to :
http://en.wikipedia.org/wiki/Binary_search_tree
'''
#Only for use in Python 2.6.0a2 and later
from __future__ import print_function
class Node:
# Constructor to initialize data
# If data is not given by user,its taken as None
def __init__(self, data=None, left=None, right=None):
self.data = data
self.left = left
self.right = right
# __str__ returns string equivalent of Object
def __str__(self):
return "Node[Data = %s]" % (self.data,)
class BinarySearchTree:
def __init__(self):
self.root = None
'''
While inserting values in a binary search tree, we first check
whether the value is greater than, lesser than or equal to the
root of the tree.
We initialize current node as the root.
If the value is greater than the current node value, then we know that
its right location will be in the right subtree. So we make the current
element as the right node.
If the value is lesser than the current node value, then we know that
its right location will be in the left subtree. So we make the current
element as the left node.
If the value is equal to the current node value, then we know that the
value is already contained in the tree and doesn't need to be reinserted.
So we break from the loop.
'''
def insert(self, val):
if (self.root == None):
self.root = Node(val)
else:
current = self.root
while 1:
if (current.data > val):
if (current.left == None):
current.left = Node(val)
break
else:
current = current.left
elif (current.data < val):
if (current.right == None):
current.right = Node(val)
break
else:
current = current.right
else:
break
'''
In preorder traversal, we first print the current element, then
move on to the left subtree and finally to the right subree.
'''
def preorder(self, node):
if (node == None):
return
else:
print(node.data, end=" ")
self.preorder(node.left)
self.preorder(node.right)
'''
In inorder traversal, we first move to the left subtree, then print
the current element and finally move to the right subtree.
'''
#Important : Inorder traversal returns the elements in sorted form.
def inorder(self, node):
if (node == None):
return
else:
self.inorder(node.left)
print(node.data, end=" ")
self.inorder(node.right)
'''
In postorder traversal, we first move to the left subtree, then to the
right subtree and finally print the current element.
'''
def postorder(self, node):
if (node == None):
return
else:
self.postorder(node.left)
self.postorder(node.right)
print(node.data, end=" ")
tree = BinarySearchTree()
tree.insert(1)
tree.insert(9)
tree.insert(4)
tree.insert(3)
tree.insert(5)
tree.insert(7)
tree.insert(10)
tree.insert(0)
print ("Preorder Printing")
tree.preorder(tree.root)
print("\n\nInorder Printing")
tree.inorder(tree.root)
print("\n\nPostOrder Printing")
tree.postorder(tree.root)
Here is the implementation of Binary Search Tree with it's basic operations like insert node, find node
class Node:
def __init__(self,data):
self.left = None
self.right = None
self.data = data
class BST:
def __init__(self):
self.root = None
def set_root(self,data):
self.root = Node(data)
def insert_node(self,data):
if self.root is None:
self.set_root(data)
else:
n = Node(data)
troot = self.root
while troot:
if data < troot.data:
if troot.left:
troot = troot.left
else:
troot.left = n
break
else:
if troot.right:
troot = troot.right
else:
troot.right = n
break
def search_node(self,data):
if self.root is None:
return "Not found"
else:
troot = self.root
while troot:
if data < troot.data:
if troot.left:
troot = troot.left
if troot.data == data:
return "Found"
else:
return "Not found"
elif data > troot.data:
if troot.right:
troot = troot.right
if troot.data == data:
return "Found"
else:
return "Not found"
else:
return "Found"
tree = BST()
tree.insert_node(10)
tree.insert_node(5)
tree.insert_node(20)
tree.insert_node(7)
print(tree.root.data)
print(tree.root.left.data)
print(tree.root.right.data)
print(tree.root.left.right.data)
print(tree.search_node(10))
print(tree.search_node(5))
print(tree.search_node(20))
print(tree.search_node(7))
print(tree.search_node(12))
print(tree.search_node(15))
Output:
10
5
20
7
Found
Found
Found
Found
Not found
Not found
In this specific case I had success using a dictionary as a datatype to store the graph. The key is the node_key and the value is a list with the attributes of the node. In this way it is rather fast to find the needed nodes and all its attributes.
I'm just not sure if there is a way to make it reasonably faster.

Categories