Python Hashing with lists - python

I need some help making a insert function that adds values into a hash table where each table position is a list. If there is collision the value just gets added to the list at the right position.
class MyChainHashTable:
def __init__(self, capacity):
self.capacity = capacity
self.slots = []
for i in range(self.capacity):
self.slots.append([])
def __str__(self):
info = ""
for items in self.slots:
info += str(items)
return info
def __len__(self):
count = 0
for i in self.slots:
count += len(i)
return count
def hash_function(self, key):
i = key % self.capacity
return i
def insert(self, key):
#need help
#this should insert each value into seperate lists, and if there is collision
#it should add the value to make a list with +1 positions.
#eg. [26][][54, 93][][17, 43][31][][][][][][][77, 90]

def insert(self, key):
self.slots[self.hash_function(key)].append(key)

You can just use a dictionary for this:
def insertWithChain(dict, key, value):
if key in d:
d[key].append(value) # add value to an existing list
else:
d[key] = [value] # new value in a new list by itself

Related

I'm having difficulty speeding up my HashMap

I'm given a .txt file with a total of 1 million songs and their respective authors. My given task is to write a HashMap in python which can store this information using either the Author or the Song as a key. I've written a HashMap that works but is running incredibly slow, taking up to 2 minutes to finish. (Expected time is apparently a few seconds at most, according to my tutor)
For collision handling I decided to use linked lists as from what I've gathered it's an effective way to handle collisions without drastically reducing performance.
from HashNode import HashNode
class HashTabell:
def __init__(self, size):
self.size = size
self.dict = {}
self.krock = 0
def store(self, nyckel, data):
hashval = self.__hash(nyckel)
## Shit is empty
if self.dict.get(hashval) != None:
get_val = self.dict[hashval]
## Is list, append normally
if isinstance(get_val, list):
list2 = self.dict[hashval]
found = False
for (k, val) in enumerate(list2):
if val.get_nyckel == nyckel:
list[k] = HashNode(nyckel, data) ## Update old value
found = True
break
if found:
self.dict[hashval] = list2
else:
self.dict[hashval] = get_val + [HashNode(nyckel, data)]
self.krock += 1
else:
## Create list
if get_val.get_nyckel() == nyckel:
self.dict[hashval] = HashNode(nyckel, data) ## Update old value
else:
self.dict[hashval] = [get_val, HashNode(nyckel, data)] ## Append to existing node
self.krock += 1
else:
self.dict[hashval] = HashNode(nyckel, data)
def __getitem__(self, nyckel):
return search(nyckel)
def __contains__(self, nyckel):
return (search(nyckel) != None)
def search(self, nyckel):
hashval = self.__hash(nyckel)
## Get val
get_val = self.dict.get(hashval)
if get_val == None:
raise KeyError("Key not found")
## Check if has multiple entries or not
if isinstance(get_val, list):
## Multiple
for value in get_val:
if(get_val.get_nyckel() == nyckel):
return get_val
raise KeyError("Key not found")
else:
## Single
if get_val.get_nyckel() == nyckel:
return get_val
else:
raise KeyError("Key not found")
## Hash function
def __hash(self, input):
inp = str(input) ## Get chars
value = 0
for k in input:
value += ord(k)
return (value % self.size)
def get_dict(self):
return self.dict
def get_krock(self):
return self.krock
Where the HashNode class is simply:
class HashNode:
def __init__(self, nyckel, data):
self.nyckel = nyckel
self.data = data
def get_nyckel(self):
return self.nyckel
def get_data(self):
return self.data
I've been staring myself blind with this issue for the past 2 weeks and I'm not getting any help from my lecturer/assistants, would greatly appreciate any advice on how to improve the speed.

Hash table and the last value of a key

I have a class of hash table. method 'add' adds key and a value. And when I add another value for the same key I would like to replace an old value on a new one. But I don't know what I have to change:)
class HashNode:
def __init__(self, key, value):
self.next = None
self.key = key
self.value = value
class HashTable:
def __init__(self):
self.table = [None] * 1000
def hash(self, key):
hashed = 0
for i in range(len(key)):
hashed = (256 * hashed + ord(key[i])) % 1000
return hashed
def add(self, key, value):
bucket = self.hash(key)
if self.table[bucket]:
temp = self.table[bucket]
while temp.next:
temp = temp.next
temp.next = HashNode(key, value)
else:
self.table[bucket] = HashNode(key, value)
def find(self, key):
bucket = self.hash(key)
if not self.table[bucket]:
return 'none'
else:
temp = self.table[bucket]
while temp:
if temp.key == key:
return temp.value
temp = temp.next
return 'none'
table = HashTable()
table.add('a', 1)
table.add('a', 2)
I am a getting key value '1' but I want '2'
table.find('a')
To elaborate on #mkrieger1's comment: your question is the exact reason why the buckets are not simple cells and why you store the keys in the buckets. If you had no collision, that is key1 != key2 implies hash(key1) != hash(key2)1, you wouln't need to store keys:
def add(self, key, value):
bucket = self.hash(key)
self.table[bucket] = value
def find(self, key, value):
bucket = self.hash(key)
return self.table[bucket]
But you might have collisions. That's why you are using a linked list to store several (key, value) pairs for the keys having the same hash. You correctly handled the collisions in the find method:
temp = self.table[bucket]
while temp:
if temp.key == key: # key found!
return temp.value # return the value
temp = temp.next
return 'none' # why not None?
You should do the same in the add method:
temp = self.table[bucket]
while temp.next:
if temp.key == key: # key found!
temp.value = value # update the value
return # and return
temp = temp.next
temp.next = HashNode(key, value) # key not found: create the entry
Both methods are now symmetrical.
1In maths terms, hash is injective. That is theoritecally possible assuming some conditions that are rarely met.
Remark: you could take advantage of a method that finds HashNodes:
def _find(self, bucket, key):
temp = self.table[bucket]
while temp:
if temp.key == key:
return temp
temp = temp.next
return None
And insert the new keys at the beginning:
def add(self, key, value):
bucket = self.hash(key)
node = self._find(bucket, key)
if node is None:
self.table[bucket] = HashNode(key, value, self.table[bucket]) # last parameter is next
else:
node.value = value
def find(self, key):
bucket = self.hash(key)
node = self._find(bucket, key)
if node is None:
return None
else:
return node.value
The symmetry is even more visible.

Python dictionary not adding subsequent keys after the first

Fairly new to Python and I can not figure this out. I go to add a key to a dictionary and it adds it fine. I can even update that same key with a new value, however when I go to add a second key to the dictionary, it does not add the second key value pair.
class CountedSet:
def __init__(self):
self.data = {}
def __iadd__(self,other):
if isinstance(other,int):
self.data[other] = self.data.get(other, 0) + 1
return self
elif isinstance(other,CountedSet):
#TODO::iterate through second countedSet and update self
return self
def __add__(self,obj):
for key, value in obj.data.items():
if len(self.data) == 0:
self.data[key] = value
elif self.data[key]:
self.data[key] = self.data[key] + value
else:
self.data[key] = value
return self
def __getitem__(self,item):
if item in self.data:
return self.data.get(item)
else:
return None
def __str__(self):
for key, value in self.data.items():
return("{%s,%s}" % (key,value))
a = CountedSet()
a += 17
a += 4
print(a)
This simply outputs {17,1} when I would expect to see {17,1} {4,1}
Your __str__ implementation returns on the first iteration of the for-loop:
def __str__(self):
for key, value in self.data.items():
return("{%s,%s}" % (key,value)) # here
Maybe you want something like:
def __str__(self):
return " ".join([{"{%s,%s}" % (k,v) for k, v in self.data.items()])
Or, without the comprehension:
def __str__(self):
items = []
for key, value in self.data.items():
items.append("{%s,%s}" % (key,value))
return ' '.join(items)

Python - Accesing a list from another class method

I have a little problem with two different classes and two methods from the same class. I have a class B which is using both methods from class a which seems to work fine.
The problem however is that the first method from class a (insert) changes a list which the second method (lookup) from this class should use. It is using the global list which is still initiated with only zeroes. So I have no idea how to tell the method to use the HashMap from the insert method :/ I Hope somebody can help, thank you!
""" PUBLIC MEMBERS
Insert the given key (given as a string) with the given value (given as
an integer). If the hash table already contains an entry for the given key,
update the value of this entry with the given value.
"""
class Map:
global m
m = 10000
global HashMap
HashMap = []
for i in range(m):
HashMap.append(0)
#classmethod
def insert(self, key, value):
"""
>>> Map.insert("hi", 9)
[4,53]
"""
self.key = key
self.value = value
asci = 0
for i in key:
asci += ord(i)
hashindex = (asci%m)*2
print(hashindex)
print(HashMap[hashindex])
if HashMap[hashindex] == key:
HashMap[hashindex + 1] = value
else:
while HashMap[hashindex] != 0:
hashindex = ((asci+1)%m)*2
HashMap[hashindex] = key
HashMap[hashindex+1] = value
""" Check if there exists an entry with the given key in the hash table.
If such an entry exists, return its associated integer value.
Otherwise return -1.
"""
#classmethod
def lookup(self, key):
self.key = key
ascilookup = 0
for i in key:
ascilookup += ord(i)
indexlookup = (ascilookup%m)*2
for j in HashMap:
if HashMap[j]==key:
return HashMap[j + 1]
elif HashMap[j]==0:
return "-1"
else:
j =((j+1)%m)*2
if __name__ == "__main__":
import doctest
doctest.testmod()
This is a far simpler implementation of a map in python:
class Map:
HashMap = {}
def __init__(self,leng):
for i in range(leng):
self.HashMap[str(i)]=0
def insert(self, key, value):
self.HashMap[key]=value
def lookup(self, key):
for each in self.HashMap.iterkeys():
if each == key:
return self.HashMap[each]
return None
EDIT without using a dictionary, using two lists is easier:
class Map:
keys = []
values = []
def __init__(self,leng):
for i in range(leng):
self.keys.append(str(i))
self.values.append(0)
#classmethod
def insert(self, key, value):
self.keys.append(key)
self.values.append(value)
#classmethod
def lookup(self, key):
for x in range(0, len(self.keys)):
if self.keys[x] == key:
return self.values[x]
return None

Python MyHashTable class: search method with linear probing

I need help implementing a method for my "MyHashTable" class:
def search(self, search_key):
The method is supposed to use linear probing to handle collision resolution. If the search_key is in the hash table then the method returns the slot number of the slot containing that search_key. If the search_key is not in the hash table, the method returns -1
My class looks like this:
class MyHashTable:
def __init__(self, capacity):
self.capacity = capacity
self.slots = [None] * self.capacity
def __str__(self):
return str(self.slots )
def __len__(self):
count = 0
for i in self.slots:
if i != None:
count += 1
return count
def hash_function(self, key):
i = key % self.capacity
return i
def insert(self, key):
slot = self.hash_function(key)
orig = slot
while True:
if self.slots[slot] is None:
self.slots[slot] = key
return slot
if self.slots[slot] == key:
return -2
slot = (slot + 1) % self.capacity
if slot == orig:
return -1
def search(self, search_key):
Any help or tutorial links would be awesome.
Thanks
You are only using a single list to store all the values, if you wanted a hash table you might use a list of lists where each list was a bucket but if you just want to check if the element is in your hash table with your own code:
def search(self, search_key):
hsh = self.hash_function(search_key)
if self.slots[hsh] is None:
return -1
while hsh < self.capacity:
if self.slots[hsh] == search_key:
return hsh
hsh += 1
return -1
You also have to handle the case where you have multiple collisions so we need at worst to check every element in the hash table to find the correct value:
def search(self, search_key):
hsh = self.hash_function(search_key)
if self.slots[hsh] is None:
return -1
for i in range(self.capacity):
mod = (hsh + i) % self.capacity
if self.slots[mod] == search_key:
return mod
return -1
The first while loop will probe one value over at a time but if we have wrapped around the list from multiple collisions it would miss elements at the start so using range and mod = (hsh + i) % self.capacity makes sure we check all entries like the example below.
m = MyHashTable(5)
m.insert(13) # 13 % 5 = 3
m.insert(73) # 83 % 5 = 3
m.insert(93) # 93 & 5 = 3
print(m.search(13)) # 3
print(m.search(73)) # 4
print(m.search(93)) # 0
print(m.search(2)) # -1
You can make your len method O(1) by keeping track of when you add a unique value to your hash table, there is also a nice wiki page on Open_addressing parts of which you can adopt into your code and it will help you create a proper mapping of keys to values and resized your hash table when needed. If you want to store more than just numbers you need to use a different hash function, I just use hash but you can use whatever you like. Also using in when your hash table is full and the key does not exist will cause an infinite loop so you will need to handle that case:
class MyHashTable:
def __init__(self, capacity):
self.capacity = capacity
self.slots = [None] * self.capacity
self.count = 0
def __str__(self):
return str(self.slots)
def __contains__(self, item):
return self.search(item) != -1
def __len__(self):
return self.count
def hash_function(self, key):
return hash(key) % self.capacity
def find_slot(self, key):
slot = self.hash_function(key)
while self.slots[slot] is not None and self.slots[slot] != key:
slot = (slot + 1) % self.capacity
return slot
def insert(self, key):
slot = self.find_slot(key)
if self.slots[slot] != key:
self.slots[slot] = key
self.count += 1
def search(self, key):
i = self.find_slot(key)
if self.slots[i] is not None:
return i
return -1
Add a __contains__ will also allow you to use in to test for membership:
m = MyHashTable(5)
m.insert("foo")
m.insert(73)
m.insert(93)
m.insert(1)
print(m.search(73))
print(m.search(93))
print(m.search(1))
print(m.search("foo"))
m.insert(73)
print(m.slots)
print(len(m))
print("foo" in m)
print(5 in m)
Output:
3
4
1
0
['foo', 1, None, 73, 93]
4
True
False

Categories