Encoutering "Terminated due to timeout" Error for Hackerrank Hash Table: Ransom Note for 6 out of 21 test cases
Implemented open address hashing. The size of input strings is up to 30,000 strings: Have experimented with changing the hash table size from 60,000 to 300,000 to no success.
CAPACITY = 300000
hashTable = [None] * CAPACITY
def checkMagazine(magazine, note):
# Store Magazine into hashtable
for element in magazine:
# print("STORED " + element)
position = calculateHash(element)
# print(position)
if hashTable[position] == None:
hashTable[position] = element
# print("Stored into " + str(position))
else:
i = 1
# print("collided into " + str((position) % CAPACITY))
while hashTable[(position + i) % CAPACITY] != None:
# print("collided into " + str((position + i) % CAPACITY))
i += 1
hashTable[(position + i) % CAPACITY] = element
# Check if all items in note is in hashtable
included = True
for item in note:
position = calculateHash(item)
if hashTable[position] != item:
i = 1
while hashTable[(position + i ) % CAPACITY] != item:
if hashTable[(position + i ) % CAPACITY] == None:
included = False
print("No")
return
else:
i += 1
hashTable[(position + i ) % CAPACITY] = "DONED"
else:
hashTable[position] = "DONED"
# print("Found " + item)
print("Yes")
def calculateHash(string):
return hash(string) % CAPACITY
Given that hash table is the optimal way to solve this problem (time complexity O(n)), is the reason why the timeout is happening is because of open address hashing? Or is there another reason?
I think the issue relates to your implementation. Have a look at what happens to your code if you pass a large "magazine" input like ["a", "a", "a", .... "a"].
Should have tried something like this:
magazine = 'two times three is not four'
note = 'two times two is four'
if set(magazine.split()) & set(note.split()) == set(note.split()) :
print 'yes'
else :
print 'no'
you may save some time by precalculating set(note.split()), but I doubt it's very large.
if you care about the number of words, you may use Counter:
from collections import Counter
and then check for every word in the note the counter is smaller than the counter for the same word in magazine.
Related
I'm trying to do the polish notation challenge on kattis.com. Thing is, I feel I have done everything they asked for and I've tried fixing everything I could think of. I even looked up some other's solutions and while theirs are more clean I want to continue on mine as I am learning.
Why is it that for example this person's code works but not mine?
Here is my current code:
import sys
case = 1
valid_ints = set([str(i) for i in range(-10,11)])
def simplify(index, myLine, processed):
while index+1 > 0:
if (myLine[index] == "+" or myLine[index] == "-" or myLine[index] == "*") and index < len(myLine)-2:
if myLine[index+1] in valid_ints and myLine[index+2] in valid_ints:
try:
processed = myLine[index+3:] + processed
a = str(myLine[index+1] + myLine[index] + myLine[index+2])
processed.insert(0, str(eval(a)))
del myLine[index:]
except:
processed = [myLine[index], myLine[index+1], myLine[index+2]] + processed
del myLine[index:]
elif len(myLine) < 3:
processed = myLine + processed
del myLine[index]
index -= 1
processed = myLine + processed
return processed
for line in sys.stdin:
myLine = line.split()
processed = []
index = len(myLine)-1
savedprocessed = []
processed = simplify(index, myLine, processed)
while True:
if savedprocessed == processed:
break
else:
savedprocessed = []
savedprocessed += processed
processed = simplify(len(processed)-1, processed, [])
result = " ".join(savedprocessed)
print("Case " + str(case) + ": " + result)
case += 1
if case > 5:
break
You're bringing some other language style to Python, that's unnecessary because Python is more flexible.
I've simplified as much as I can here.
Split the input string on white spaces and iterate over the tokens.
For every operator in the expression, push a list onto the stack and append the operator and its operands to the list.
Now pop each list off the stack and process the list
def simplify(exp):
stack1 = []
ops = set('+*-')
for token in exp.split():
if token in ops:
stack1.append([])
stack1[-1].append(token)
stack2 = []
while stack1:
top = stack1.pop()
while len(top) < 3 and stack2:
top.append(stack2.pop())
if any(x.isalpha() for x in top):
simplified = ' '.join(top)
else:
top[0], top[1] = top[1], top[0]
simplified = str(eval(''.join(top)))
stack2.append(simplified)
return simplified
exp = '* - 6 + x -6 - - 9 6 * 0 c'
print(exp)
simplify(exp)
Output;
* - 6 + x -6 - - 9 6 * 0 c
* - 6 + x -6 - - 3 * 0 c
I am working on this problem where I need to find all of the combinations of the purchase items in a .csv file that equal a total. The purchase items can be positive and negative. When I run my code with practice data I can get the correct answer but when I use the real data which has hundreds of purchases it runs forever. I am wondering how to speed up the processing time of the following code.
import decimal
import pandas
import datetime
df = pandas.read_csv('negative_test.csv')
print(df)
# Index are the keys of the dictonary
index_list = list(df.index)
# Values are the values of the dictonary
values = df["Purchase"].to_list()
target = df["Target"].to_list()[0]
print(target)
def sanitize_values(list1):
sanitized_list = []
for item in list1:
sanitized_list.append(float(item.replace(",","")))
return sanitized_list
def merge(list1, list2):
merged_list = [(list1[i], list2[i]) for i in range(0, len(list1))]
return merged_list
merged_tuples = merge(index_list,sanitize_values(values))
def sum_tuples(tuples):
s = 0
for i in range(len(tuples)):
s += tuples[i][1]
#s += round(tuples[i][1],2)
return s
def subset_sum(numbers, target, partial=[], s=0, progress=0):
if(len(partial) > 0):
s += partial[len(partial)-1][1]
#s = sum_tuples(partial)
# check if the partial sum is equals to target
if round(s,2) == round(target,2):
print ((partial, target))
if len(numbers) == 0:
return
for i in range(len(numbers)):
#hacked in progress bar
if(len(partial) == 0):
progress += 1
printDateTime()
print("progress: " + str(progress))
#print("Starting from: " + str(i) + " Numbers: " + str(numbers) + "\n len: " + str(len(partial)) + "partial: " + str(partial))
n = numbers[i][1]
#n = round(numbers[i][1],2)
remaining = numbers[i+1:]
# print(remaining)
subset_sum(remaining, target, partial + [numbers[i]], s, progress)
def printDateTime():
now = datetime.datetime.now()
print ("Current date and time : ")
print (now.strftime("%Y-%m-%d %H:%M:%S"))
if __name__ == "__main__":
printDateTime()
subset_sum(merged_tuples,target)
printDateTime()
I have the following code which helps to bruteforce hashes
The first if statement will run, the values are hash=wordlist.txt, args=abtfg, values=[0, "0,1", 0, wordlist.txt, true]
def bruteforce(hash, args, values):
if "." in hash:
files = open(values[args.find("f")]) # Open wordlist.txt
for xhsd in files.readlines():
hash = xhsd
alphabet = "abcdefghijklmnopqrstuvwxyz"
alphabet += alphabet.upper() + "0123456789!$%^&*(){}~#][;:'#/?.>,<"
if "b" in args: # It is
m = args.find("b")
m = values[m]
else:
m = "0,16"
# m is 0,10
start_time = strftime("%Y-%m-%d %H:%M:%S", gmtime())
l = 0
print("Cracking...")
attempts = 0
while l == 0:
password = ""
for x in range(random.randrange(int(m.split(",")[0])+1,int(m.split(",")[1])+1)): # range(random.randrange(0,10))
password += alphabet[random.randrange(0,len(alphabet)-1)]
num = hash_types[int(values[args.find("t")])] # num="md5"
htype = "hash2 = hashlib."+num+"(password).hexdigest()"
exec(htype) # hash2 = md5(password)
print hash2 + ":" + hash # Compares the hashes
if hash == hash2:
print password
l = 1
else:
print "Trying..."
The first item it tries, it cracks it almost instantly, printing:
0cc175b9c0f1b6a831c399e269772661:0cc175b9c0f1b6a831c399e269772661
(this is hash2 and hash). So we now know these two variables are equal. However, the if statement directly below it, doesn't run. This is the weirdest thing I've seen in Python, could anyone explain why this is? I've printed both variables and they're clearly the same...
Removing whitespace could help:
if hash.strip() == hash2.strip():
I'm having some formatting issues with my call to print function. For lack of knowledge of better ways to format, i've ended up with an issue. here is what it should look like
However the actual result of my print returns this.
def tupleMaker(inputString):
s1 = inputString.split()
# Adding the surname at the end of the string
s2 = [s1[len(s1) - 1]]
# Number of other names(no surname)
global noOfNames
noOfNames = len(s1) - 4
# Adding all the other names
for i in range(noOfNames):
s2.append((s1[i + 3]))
# Adding the Reg number
s2.append(s1[0])
# Adding the Degree scheme
s2.append(s1[2])
# Adding the year
s2.append("Year " + s1[1])
# Making it a tuple
t = ()
for i in range(len(s2)):
t = t + (s2[i],)
return t
def formatting(t):
s1 = ""
for i in range(len(t)):
s1 += t[i]
if (i == 0):
s1 += ", "
elif (i == len(t) - 4):
s1 += " "
else:
s1 += " "
#print(t[0] + ", ", end="")
#for i in range(noOfNames):
#print (t[i+1], end= " ")
#print(format(t[1+noOfNames], "<32s"))
#print(format(thenames, "<32d") + format(regNo, "<7d") + format(degScheme, ">6s") + format(year, ">1s")
print("")
print(s1)
I would recommend looking at using pythons built in string.format() function a small tutorial is located here: https://pyformat.info/
So i wrote this code and it passes the first test case, and fails all the rest. However, I can't seem to find an input that breaks it. Maybe it's because I've been staring at the code too long, but i would appreciate any help.
The algorithm uses two priority queues for the smallest and largest halves of the current list. Here's the code:
#!/bin/python
import heapq
def fix(minset, maxset):
if len(maxset) > len(minset):
item = heapq.heappop(maxset)
heapq.heappush(minset, -item)
elif len(minset) > (len(maxset) + 1):
item = heapq.heappop(minset)
heapq.heappush(maxset, -item)
N = int(raw_input())
s = []
x = []
for i in range(0, N):
tmp = raw_input()
a, b = [xx for xx in tmp.split(' ')]
s.append(a)
x.append(int(b))
minset = []
maxset = []
for i in range(0, N):
wrong = False
if s[i] == "a":
if len(minset) == 0:
heapq.heappush(minset,-x[i])
else:
if x[i] > minset[0]:
heapq.heappush(maxset, x[i])
else:
heapq.heappush(minset, -x[i])
fix(minset, maxset)
elif s[i] == "r":
if -x[i] in minset:
minset.remove(-x[i])
heapq.heapify(minset)
elif x[i] in maxset:
maxset.remove(x[i])
heapq.heapify(maxset)
else:
wrong = True
fix(minset, maxset)
if len(minset) == 0 and len(maxset) == 0:
wrong = True
if wrong == False:
#Calculate median
if len(minset) > len(maxset):
item = - minset[0]
print int(item)
else:
item = ((-float(minset[0])) + float(maxset[0])) / 2
if item.is_integer():
print int(item)
continue
out = str(item)
out.rstrip('0')
print out
else:
print "Wrong!"
Your original was not so legible, so first I made it object-oriented:
MedianHeapq supports methods rebalance(), add(), remove(), size(), median(). We seriously want to hide the members minset,maxset from the client code, for all sorts of sensible reasons: prevent client from swapping them, modifying them etc. If client needs to see them you just write an accessor.
We also added a __str__() method which we will use to debug visually and make your life easier.
Also added legibility changes to avoid the indexing with [i] everywhere, rename s,x arrays to op,val, add prompts on the raw_input(), reject invalid ops at the input stage.
Your actual computation of the median confuses me (when do you want float and when integer? the rstrip('0') is a bit wack), so I rewrote it, change that if you want something else.
A discussion of the algorithm is here.
Now it is legible and self-contained. Also makes it testable.
You might be making sign errors in your code, I don't know, I'll look at that later.
Next we will want to automate it by writing some PyUnit testcases. doctest is also a possibility. TBC.
Ok I think I see a bug in the sloppiness about locating the median. Remember the minset and maxset can have a size mismatch of +/-1. So take more care about precisely where the median is located.
#!/bin/python
import heapq
class MedianHeapq(object):
def __init__(self):
self.minset = []
self.maxset = []
def rebalance(self):
size_imbalance = len(self.maxset) - len(self.minset)
if len(self.maxset) > len(self.minset):
#if size_imbalance > 0:
item = heapq.heappop(self.maxset)
heapq.heappush(self.minset, -item)
#elif size_imbalance < -1:
elif len(self.minset) > (len(self.maxset) + 1):
item = heapq.heappop(self.minset)
heapq.heappush(self.maxset, -item)
def add(self, value, verbose=False):
if len(self.minset) == 0:
heapq.heappush(self.minset,-value)
else:
if value > self.minset[0]:
heapq.heappush(self.maxset, value)
else:
heapq.heappush(self.minset, -value)
self.rebalance()
if verbose: print self.__str__()
return False
def remove(self,value,verbose=False):
wrong = False
if -value in self.minset:
minset.remove(-value)
heapq.heapify(self.minset)
elif value in maxset:
maxset.remove(value)
heapq.heapify(self.maxset)
else:
wrong = True
self.rebalance()
if verbose: print self.__str__()
return wrong
def size(self):
return len(self.minset)+len(self.maxset)
def median(self):
if len(self.minset) > len(self.maxset):
item = - self.minset[0]
return int(item)
else:
item = (-self.minset[0] + self.maxset[0]) / 2.0
# Can't understand the intent of your code here: int, string or float?
if item.is_integer():
return int(item)
# continue # intent???
else:
return item
# The intent of this vv seems to be round floats and return '%.1f' % item ??
#out = str(item)
#out.rstrip('0') # why can't you just int()? or // operator?
#return out
def __str__(self):
return 'Median: %s Minset:%s Maxset:%s' % (self.median(), self.minset,self.maxset)
# Read size and elements from stdin
N = int(raw_input('Size of heap? '))
op = []
val = []
while(len(val)<N):
tmp = raw_input('a/r value : ')
op_, val_ = tmp.split(' ')
if op_ not in ['a','r']: # reject invalid ops
print 'First argument (operation) must be a:Add or r:Remove! '
continue
op.append(op_)
val.append(int(val_))
mhq = MedianHeapq()
for op_,val_ in zip(op,val): # use zip to avoid indexing with [i] everywhere
wrong = False
if op_ == 'a':
wrong = mhq.add(val_)
elif op_ == 'r':
wrong = mhq.remove(val_)
assert (mhq.size()>0), 'Heap has zero size!'
assert (not wrong), 'Heap structure is wrong!'
if not wrong:
print mhq.__str__()