I need help implementing a method for my "MyHashTable" class:
def search(self, search_key):
The method is supposed to use linear probing to handle collision resolution. If the search_key is in the hash table then the method returns the slot number of the slot containing that search_key. If the search_key is not in the hash table, the method returns -1
My class looks like this:
class MyHashTable:
def __init__(self, capacity):
self.capacity = capacity
self.slots = [None] * self.capacity
def __str__(self):
return str(self.slots )
def __len__(self):
count = 0
for i in self.slots:
if i != None:
count += 1
return count
def hash_function(self, key):
i = key % self.capacity
return i
def insert(self, key):
slot = self.hash_function(key)
orig = slot
while True:
if self.slots[slot] is None:
self.slots[slot] = key
return slot
if self.slots[slot] == key:
return -2
slot = (slot + 1) % self.capacity
if slot == orig:
return -1
def search(self, search_key):
Any help or tutorial links would be awesome.
Thanks
You are only using a single list to store all the values, if you wanted a hash table you might use a list of lists where each list was a bucket but if you just want to check if the element is in your hash table with your own code:
def search(self, search_key):
hsh = self.hash_function(search_key)
if self.slots[hsh] is None:
return -1
while hsh < self.capacity:
if self.slots[hsh] == search_key:
return hsh
hsh += 1
return -1
You also have to handle the case where you have multiple collisions so we need at worst to check every element in the hash table to find the correct value:
def search(self, search_key):
hsh = self.hash_function(search_key)
if self.slots[hsh] is None:
return -1
for i in range(self.capacity):
mod = (hsh + i) % self.capacity
if self.slots[mod] == search_key:
return mod
return -1
The first while loop will probe one value over at a time but if we have wrapped around the list from multiple collisions it would miss elements at the start so using range and mod = (hsh + i) % self.capacity makes sure we check all entries like the example below.
m = MyHashTable(5)
m.insert(13) # 13 % 5 = 3
m.insert(73) # 83 % 5 = 3
m.insert(93) # 93 & 5 = 3
print(m.search(13)) # 3
print(m.search(73)) # 4
print(m.search(93)) # 0
print(m.search(2)) # -1
You can make your len method O(1) by keeping track of when you add a unique value to your hash table, there is also a nice wiki page on Open_addressing parts of which you can adopt into your code and it will help you create a proper mapping of keys to values and resized your hash table when needed. If you want to store more than just numbers you need to use a different hash function, I just use hash but you can use whatever you like. Also using in when your hash table is full and the key does not exist will cause an infinite loop so you will need to handle that case:
class MyHashTable:
def __init__(self, capacity):
self.capacity = capacity
self.slots = [None] * self.capacity
self.count = 0
def __str__(self):
return str(self.slots)
def __contains__(self, item):
return self.search(item) != -1
def __len__(self):
return self.count
def hash_function(self, key):
return hash(key) % self.capacity
def find_slot(self, key):
slot = self.hash_function(key)
while self.slots[slot] is not None and self.slots[slot] != key:
slot = (slot + 1) % self.capacity
return slot
def insert(self, key):
slot = self.find_slot(key)
if self.slots[slot] != key:
self.slots[slot] = key
self.count += 1
def search(self, key):
i = self.find_slot(key)
if self.slots[i] is not None:
return i
return -1
Add a __contains__ will also allow you to use in to test for membership:
m = MyHashTable(5)
m.insert("foo")
m.insert(73)
m.insert(93)
m.insert(1)
print(m.search(73))
print(m.search(93))
print(m.search(1))
print(m.search("foo"))
m.insert(73)
print(m.slots)
print(len(m))
print("foo" in m)
print(5 in m)
Output:
3
4
1
0
['foo', 1, None, 73, 93]
4
True
False
Related
I'm given a .txt file with a total of 1 million songs and their respective authors. My given task is to write a HashMap in python which can store this information using either the Author or the Song as a key. I've written a HashMap that works but is running incredibly slow, taking up to 2 minutes to finish. (Expected time is apparently a few seconds at most, according to my tutor)
For collision handling I decided to use linked lists as from what I've gathered it's an effective way to handle collisions without drastically reducing performance.
from HashNode import HashNode
class HashTabell:
def __init__(self, size):
self.size = size
self.dict = {}
self.krock = 0
def store(self, nyckel, data):
hashval = self.__hash(nyckel)
## Shit is empty
if self.dict.get(hashval) != None:
get_val = self.dict[hashval]
## Is list, append normally
if isinstance(get_val, list):
list2 = self.dict[hashval]
found = False
for (k, val) in enumerate(list2):
if val.get_nyckel == nyckel:
list[k] = HashNode(nyckel, data) ## Update old value
found = True
break
if found:
self.dict[hashval] = list2
else:
self.dict[hashval] = get_val + [HashNode(nyckel, data)]
self.krock += 1
else:
## Create list
if get_val.get_nyckel() == nyckel:
self.dict[hashval] = HashNode(nyckel, data) ## Update old value
else:
self.dict[hashval] = [get_val, HashNode(nyckel, data)] ## Append to existing node
self.krock += 1
else:
self.dict[hashval] = HashNode(nyckel, data)
def __getitem__(self, nyckel):
return search(nyckel)
def __contains__(self, nyckel):
return (search(nyckel) != None)
def search(self, nyckel):
hashval = self.__hash(nyckel)
## Get val
get_val = self.dict.get(hashval)
if get_val == None:
raise KeyError("Key not found")
## Check if has multiple entries or not
if isinstance(get_val, list):
## Multiple
for value in get_val:
if(get_val.get_nyckel() == nyckel):
return get_val
raise KeyError("Key not found")
else:
## Single
if get_val.get_nyckel() == nyckel:
return get_val
else:
raise KeyError("Key not found")
## Hash function
def __hash(self, input):
inp = str(input) ## Get chars
value = 0
for k in input:
value += ord(k)
return (value % self.size)
def get_dict(self):
return self.dict
def get_krock(self):
return self.krock
Where the HashNode class is simply:
class HashNode:
def __init__(self, nyckel, data):
self.nyckel = nyckel
self.data = data
def get_nyckel(self):
return self.nyckel
def get_data(self):
return self.data
I've been staring myself blind with this issue for the past 2 weeks and I'm not getting any help from my lecturer/assistants, would greatly appreciate any advice on how to improve the speed.
I have a little problem with two different classes and two methods from the same class. I have a class B which is using both methods from class a which seems to work fine.
The problem however is that the first method from class a (insert) changes a list which the second method (lookup) from this class should use. It is using the global list which is still initiated with only zeroes. So I have no idea how to tell the method to use the HashMap from the insert method :/ I Hope somebody can help, thank you!
""" PUBLIC MEMBERS
Insert the given key (given as a string) with the given value (given as
an integer). If the hash table already contains an entry for the given key,
update the value of this entry with the given value.
"""
class Map:
global m
m = 10000
global HashMap
HashMap = []
for i in range(m):
HashMap.append(0)
#classmethod
def insert(self, key, value):
"""
>>> Map.insert("hi", 9)
[4,53]
"""
self.key = key
self.value = value
asci = 0
for i in key:
asci += ord(i)
hashindex = (asci%m)*2
print(hashindex)
print(HashMap[hashindex])
if HashMap[hashindex] == key:
HashMap[hashindex + 1] = value
else:
while HashMap[hashindex] != 0:
hashindex = ((asci+1)%m)*2
HashMap[hashindex] = key
HashMap[hashindex+1] = value
""" Check if there exists an entry with the given key in the hash table.
If such an entry exists, return its associated integer value.
Otherwise return -1.
"""
#classmethod
def lookup(self, key):
self.key = key
ascilookup = 0
for i in key:
ascilookup += ord(i)
indexlookup = (ascilookup%m)*2
for j in HashMap:
if HashMap[j]==key:
return HashMap[j + 1]
elif HashMap[j]==0:
return "-1"
else:
j =((j+1)%m)*2
if __name__ == "__main__":
import doctest
doctest.testmod()
This is a far simpler implementation of a map in python:
class Map:
HashMap = {}
def __init__(self,leng):
for i in range(leng):
self.HashMap[str(i)]=0
def insert(self, key, value):
self.HashMap[key]=value
def lookup(self, key):
for each in self.HashMap.iterkeys():
if each == key:
return self.HashMap[each]
return None
EDIT without using a dictionary, using two lists is easier:
class Map:
keys = []
values = []
def __init__(self,leng):
for i in range(leng):
self.keys.append(str(i))
self.values.append(0)
#classmethod
def insert(self, key, value):
self.keys.append(key)
self.values.append(value)
#classmethod
def lookup(self, key):
for x in range(0, len(self.keys)):
if self.keys[x] == key:
return self.values[x]
return None
class _ListNode:
def __init__(self, value, next_):
self._data = value
self._next = next_
return
class List:
def __init__(self):
self._front = None
self._count = 0
return
def _linear_search(self,key):
previous = None
current = self._front
index = 0
while current is not None and key > current._data:
previous = current
current = current._next
index += 1
if current._data != key:
previous = None
current = None
index = -1
return previous, current, index
def __contains__(self, key):
_, _, i = self._linear_search(key)
return i != -1
def append(self, value):
if self._front is None:
self._front = _ListNode(value,None)
else:
self._front._next = _ListNode(value,None)
self._count += 1
l = List()
lst = [1,2,3,4]
i = 0
n = len(lst)
while i < n:
l.append(lst[i])
i += 1
print("{}".format(l.__contains(3))
To explain more, I implement the linear search method and the contains method. The contains method check if the number is in the list or not (returns true or false). Now when I need to check that #3 in the list using contains method, the answer is false!! i cant know what's the problem
Your append method does not walk down the list. It simply always appends to self._front._next if self.front is already present. Meaning the contents at the end of the append loop are the first thing you appended, the last thing you appended and nothing in between.
To correct it walk the list looking for a _next equal to None and append there.
def append(self, value):
if self._front is None:
self._front = _ListNode(value, None)
else:
n = self._front
while n._next is not None:
n = n._next
n._next = _ListNode(value, None)
self._count += 1
You could also define a _str__ method to print the content of the List
e.g.
def __str__(self):
res = []
n = self._front
while n is not None:
res.append(str(n._data))
n = n._next
return ', '.join(res)
This is not a particularly efficient implementation as it builds an intermediate builtin list object.
You also don't need those bare return statements in your methods. You can remove those.
I have an entire Deque Array class that looks like this:
from collections import deque
import ctypes
class dequeArray:
DEFAULT_CAPACITY = 10 #moderate capacity for all new queues
def __init__(self):
self.capacity = 5
capacity = self.capacity
self._data = self._make_array(self.capacity)
self._size = 0
self._front = 0
def __len__(self):
return self._size
def __getitem__(self, k): #Return element at index k
if not 0 <= k < self._size:
raise IndexError('invalid index')
return self._data[k]
def isEmpty(self):
if self._data == 0:
return False
else:
return True
def append(self, item): #add an element to the back of the queue
if self._size == self.capacity:
self._data.pop(0)
else:
avail = (self._front + self._size) % len(self._data)
self._data[avail] = item
self._size += 1
#def _resize(self, c):
#B = self._make_array(c)
#for k in range(self._size):
#B[k] = self._A[k]
#self._data = B
#self.capacity = capacity
def _make_array(self, c):
capacity = self.capacity
return (capacity * ctypes.py_object)()
def removeFirst(self):
if self._size == self.capacity:
self._data.pop(0)
else:
answer = self._data[self._front]
self._data[self._front] = None
self._front = (self._front + 1) % len(self._data)
self._size -= 1
print(answer)
def removeLast(self):
return self._data.popleft()
def __str__(self):
return str(self._data)
and when I try to print the deque in the main it prints out something like this,
<bound method dequeArray.__str__ of <__main__.dequeArray object at 0x1053aec88>>
when it should be printing the entire array. I think i need to use the str function and i tried adding
def __str__(self):
return str(self._data)
and that failed to give me the output. I also tried just
def __str__(self):
return str(d)
d being the deque array but I still am not having any success. How do I do i get it to print correctly?
you should call the str function of each element of the array that is not NULL, can be done with the following str function:
def __str__(self):
contents = ", ".join(map(str, self._data[:self._size]))
return "dequeArray[{}]".format(contents)
What I get when I try to q = dequeArray(); print(q) is <__main__.py_object_Array_5 object at 0x006188A0> which makes sense. If you want it list-like, use something like this (print uses __str__ method implicitly):
def __str__(self):
values = []
for i in range(5):
try:
values.append(self._data[i])
except ValueError: # since accessing ctypes array by index
# prior to assignment to this index raises
# the exception
values.append('NULL (never used)')
return repr(values)
Also, several things about the code:
from collections import deque
This import is never user and should be removed.
DEFAULT_CAPACITY = 10
is never used. Consider using it in the __init__:
def __init__(self, capacity=None):
self.capacity = capacity or self.DEFAULT_CAPACITY
This variable inside __init__ is never user and should be removed:
capacity = self.capacity
def _make_array(self, c):
capacity = self.capacity
return (capacity * ctypes.py_object)()
Though this is a valid code, you're doing it wrong unless you're absolutely required to do it in your assignment. Ctypes shouldn't be used like this, Python is a language with automated memory management. Just return [] would be fine. And yes, variable c is never used and should be removed from the signature.
if self._data == 0
In isEmpty always evaluates to False because you're comparing ctypes object with zero, and ctypes object is definitely not a zero.
class MyHashTable:
def __init__(self, capacity):
self.capacity = capacity
self.slots = [None] * self.capacity
def __str__(self):
return str(self.slots )
def __len__(self):
count = 0
for i in self.slots:
if i != None:
count += 1
return count
def hash_function(self, key):
slot = key % len(self.slots)
if key in self.slots:
return slot
elif (not key in self.slots) and len(self.slots) == self.capacity:
return slot
else:
for i in self.slots:
count = 0
if i == None:
return count
count += 1
def insert(self, key):
print(len(self.slots)) #Why does this show an output of 2?
if key in self.slots:
return -2
elif (not key in self.slots) and (len(self.slots) != self.capacity): #Now this cant execute
num = hash_function(key)
self.slots[num] = key
return num
elif (not key in self.slots) and len(self.slots) == self.capacity:
return -1
Im wondering why the commented part above in the insert(self, key) the print statement gives (2) instead of (0). The elif statement underneath wont execute since its giving a result of (2) instead of (0)
A function call of
x = MyHashTable(2)
print(len(x))
Should give: 0
You're initializing self.slots = [None] * self.capacity, so with capacity = 2, self.slots is [None, None], which is of length 2.
Your __len__ method doesn't run because len(self.slot) calls self.slot.__len__, not self.__len__. If you'd like to use your override method, you should be calling len(self) instead.
You have to call your __len__ function (by calling self.__len__() ) if you want the length of the elements which are not None. For lists None are valid entries.
By the way. It is always best to compare with None by a is None or a is not None instead of == or !=.