I would like to ask how can we count the number of words that occur alphabetically before the given string in the trie?
Here is my implementation now.
class TrieNode:
# Trie node class
def __init__(self):
self.children = [None] * 26
# isEndOfWord is True if node represent the end of the word
self.isEndOfWord = False
self.word_count = 0
class Trie:
# Trie data structure class
def __init__(self):
self.root = self.getNode()
def getNode(self):
# Returns new trie node (initialized to NULLs)
return TrieNode()
def _charToIndex(self, ch):
# private helper function
# Converts key current character into index
# use only 'a' through 'z' and lower case
return ord(ch) - ord('a')
def insert(self, key):
# If not present, inserts key into trie
# If the key is prefix of trie node,
# just marks leaf node
pCrawl = self.root
length = len(key)
for level in range(length):
index = self._charToIndex(key[level])
# if current character is not present
if not pCrawl.children[index]:
pCrawl.children[index] = self.getNode()
pCrawl = pCrawl.children[index]
# mark last node as leaf
pCrawl.isEndOfWord = True
pCrawl.word_count += 1
def search(self, key):
# Search key in the trie
# Returns true if key presents
# in trie, else false
pCrawl = self.root
length = len(key)
for level in range(length):
index = self._charToIndex(key[level])
if not pCrawl.children[index]:
return False
pCrawl = pCrawl.children[index]
return pCrawl is not None and pCrawl.isEndOfWord
def count_before(self, string):
cur = self.root
for b in string:
index = self._charToIndex(b)
print(index)
cur = cur.children[index]
if cur is None:
return 0
return cur.word_count
def total_before(text):
t = Trie()
for i in range(len(text)):
t.insert(text[i])
a_list = [] # A list to store the result that occur before the text[i]
for i in range(len(text)):
result = t.count_before(text[i])
a_list.append(result)
return a_list
total_before(["bac", "aaa", "baa", "aac"]) # Output will be [3, 0, 2, 1]
I would like to know how can I count the number of words that occur before the given string in the trie that I had created. Can someone give me an idea about it?
As word_count is currently initialised, it does not serve much purpose. It only is non-zero at nodes with isEndOfWord set to True. It would be more useful if it counted the number of words that depend on the current node, i.e. words that either end in that node (which your code counts now), or continue further down the trie (which are currently not counted).
To make that happen, also increment word_count while you descend the trie:
def insert(self, key):
pCrawl = self.root
length = len(key)
for level in range(length):
pCrawl.word_count += 1 # <-------------- added
index = self._charToIndex(key[level])
if not pCrawl.children[index]:
pCrawl.children[index] = self.getNode()
pCrawl = pCrawl.children[index]
pCrawl.isEndOfWord = True
pCrawl.word_count += 1
In count_before you would need to sum up all the word_count values of the child nodes the precede the child that you will select, as those represent words that come before the current word:
def count_before(self, string):
count = 0 # used to accumulate the word_counts
cur = self.root
for b in string:
index = self._charToIndex(b)
# add the word counts of the children that are to the left of this index:
count += sum(node.word_count for node in cur.children[:index] if node)
cur = cur.children[index]
if cur is None:
break
return count
This line:
count += sum(node.word_count for node in cur.children[:index] if node)
Is a compact way of doing this:
mysum = 0
for node in cur.children[:index]:
if node:
mysum += node.word_count
sum += mysum
I think you overcomplicated the problem.
def total_before(lst):
return [sorted(lst).index(el) for el in lst]
print(total_before(["bac", "aaa", "baa", "aac"]))
Output:
[3, 0, 2, 1]
I have the code that can build a trie data structure when it is given one string. When I am trying to pass a list of strings, it combines the words into one
class TrieNode:
def __init__(self):
self.end = False
self.children = {}
def all_words(self, prefix):
if self.end:
yield prefix
for letter, child in self.children.items():
yield from child.all_words(prefix + letter)
class Trie:
def __init__(self):
self.root = TrieNode()
def __init__(self):
self.root = TrieNode()
def insert(self, words):
curr = self.root
#the line I added to read the words from a list is below
for word in words:
for letter in word:
node = curr.children.get(letter)
if not node:
node = TrieNode()
curr.children[letter] = node
curr = node
curr.end = True
def all_words_beginning_with_prefix(self, prefix):
cur = self.root
for c in prefix:
cur = cur.children.get(c)
if cur is None:
return # No words with given prefix
yield from cur.all_words(prefix)
This is the code I use to insert everything into the tree:
lst = ['foo', 'foob', 'foobar', 'foof']
trie = Trie()
trie.insert(lst)
The output I get is
['foo', 'foofoob', 'foofoobfoobar', 'foofoobfoobarfoof']
The output I would like to get is
['foo', 'foob', 'foobar', 'foof']
This is the line I used to get the output (for reproducibility, in case you will need to run the code) - it returns all the words that start with a particular prefix:
print(list(trie.all_words_beginning_with_prefix('foo')))
How do I fix it?
You aren't resetting curr back to the root after each insert, so you're inserting the next word where the last one left off. You'd want something like:
def insert(self, words):
curr = self.root
for word in words:
for letter in word:
node = curr.children.get(letter)
if not node:
node = TrieNode()
curr.children[letter] = node
curr = node
curr.end = True
curr = self.root # Reset back to the root
I'd break this up though. I think your insert function is doing too much, and shouldn't be dealing with multiple strings. I'd change it to something like:
def insert(self, word):
curr = self.root
for letter in word:
node = curr.children.get(letter)
if not node:
node = TrieNode()
curr.children[letter] = node
curr = node
curr.end = True
def insert_many(self, words):
for word in words:
self.insert(word) # Just loop over self.insert
Now that's a non-problem since each insert is an independent call, and you can't forget to reset curr.
I wrote my Trie solution, where I used defaultdict. The task is to find all words with prefix.
The format must be like {of:[of, offten, offensive]}
Here my Trie class:
from collections import defaultdict
def _trie():
return defaultdict(_trie)
TERMINAL = None
class Trie(object):
def __init__(self):
self.trie = _trie()
def addWord(self, word):
trie = self.trie
for letter in word:
trie = trie[letter]
trie[TERMINAL]
def search(self, word, trie=None):
if trie is None:
trie = self.trie
for i, letter in enumerate(word):
if letter in trie:
trie = trie[letter]
else:
return False
return trie
Here The example:
Trie = Trie()
Trie.addWord('of')
Trie.addWord('often')
Trie.addWord('offensive')
string = 'of'
s = dict(Trie.search(string))
They give the result:
Here I make depth searh
from collections import defaultdict
class TrieNode:
def __init__(self):
self.child = defaultdict(TrieNode)
self.is_word = False
self.words = ""
class Trie:
def __init__(self):
self.root = TrieNode()
def insert(self, word):
cur = self.root
for i in range(len(word)):
cur = cur.child[word[i]]
cur.words = word[:i+1]
cur.is_word = True
def search(self, word):
cur = self.root
for char in word:
cur = cur.child.get(char)
if not cur:
return []
stack = [cur]
res = []
while stack:
node = stack.pop()
if node.is_word:
res.append(node.words)
for key, val in node.child.items():
stack.append(val)
return sorted(res)
Trie = Trie()
Trie.insert('of')
Trie.insert('often')
Trie.insert('offensive')
Trie.insert('offensive2')
Trie.search('o')
# ['of', 'offensive', 'offensive2', 'often']
Hi this is a portion of my code for prefix trie, i trying to get it to return more than just prefix, more explanation at the bottom:
class TrieNode:
def __init__(self):
self.isString = False
self.children = {}
def insertString(word, root):
currentNode = root
for char in word:
if char not in currentNode.children:
currentNode.children[char] = TrieNode()
currentNode = currentNode.children[char]
currentNode.isString = True
def findStrings(prefix, node, results):
if node.isString:
results.append(prefix)
for char in node.children:
findStrings(prefix + char, node.children[char], results)
def longestPrefix(word, root):
currentNode = root
currentPrefix = ''
for char in word:
if char not in currentNode.children:
break
else:
currentNode = currentNode.children[char]
currentPrefix += char
strings = []
findStrings(currentPrefix, currentNode, strings)
return strings
pass
# Discussion: Is it dangerous to assume that findStrings actually found a string?
# Hint: There is an edge case that will break this
wordList = ['aydt', 'coombs', 'schuhmacher', 'claypoole', 'exhume', 'forehands', 'carin', 'plaits', 'alfonsin',
'hometowns', 'pedestals', 'emad', 'hourly', 'purchaser', 'spogli', 'combativeness', 'henningsen', 'luedke',
'duchin', 'koglin', 'teason', 'bumpings', 'substantially', 'lamendola', 'cecola', 'henze', 'tutti', 'dills',
'satirical', 'jetted', 'intertwine', 'predict', 'breezes', 'cyclist', 'ancillary', 'schaumburg', 'viewer',
"bay's", 'emissions', 'kincheloe', 'trees', 'vipperman', 'exhale', 'ornamental', 'repeated', 'pedestal',
'pedesta', 'pedest']
root = TrieNode()
for word in wordList:
insertString(word, root)
allWords = []
findStrings('', root, allWords)
print(allWords)
inputWord = 'co'
print(longestPrefix(inputWord, root))
inputWord = 'pedestals'
print(longestPrefix(inputWord, root))
I trying to understand how do i get print(longestPrefix('pedestals', root)) to return 'pedestals','pedestal','pedesta', 'pedest' and not just pedestals. What am i missing in my code?
I trying to understand how do i get print(longestPrefix('pedestals',
root)) to return 'pedestals','pedestal','pedesta', 'pedest' and not
just pedestals.
Since pedestals isn't a prefix, this doesn't make sense given the logic of the code -- I would have expected you to wonder why print(longestPrefix('pedest', root)) didn't return those four results. I've reworked your code below, turning all your functions into methods since each was taking the object you defined as an argument:
class TrieNode:
def __init__(self):
self.isString = False
self.children = {}
def insertString(self, word):
for char in word:
if char not in self.children:
self.children[char] = TrieNode()
self = self.children[char]
self.isString = True
def findStrings(self, prefix):
results = []
if self.isString:
results.append(prefix)
for char in self.children:
results.extend((self.children[char]).findStrings(prefix + char))
return results
def longestPrefix(self, word):
currentPrefix = ''
for char in word:
if char not in self.children:
break
else:
self = self.children[char]
currentPrefix += char
return self.findStrings(currentPrefix)
wordList = [
'aydt', 'coombs', 'schuhmacher', 'claypoole', 'exhume', 'forehands', 'carin', 'plaits', 'alfonsin',
'hometowns', 'pedestals', 'emad', 'hourly', 'purchaser', 'spogli', 'combativeness', 'henningsen', 'luedke',
'duchin', 'koglin', 'teason', 'bumpings', 'substantially', 'lamendola', 'cecola', 'henze', 'tutti', 'dills',
'satirical', 'jetted', 'intertwine', 'predict', 'breezes', 'cyclist', 'ancillary', 'schaumburg', 'viewer',
"bay's", 'emissions', 'kincheloe', 'trees', 'vipperman', 'exhale', 'ornamental', 'repeated', 'pedestal',
'pedesta', 'pedest'
]
root = TrieNode()
for word in wordList:
root.insertString(word)
allWords = root.findStrings('')
print(allWords)
inputWord = 'co'
print(root.longestPrefix(inputWord))
inputWord = 'pedest'
print(root.longestPrefix(inputWord))
The last two print statements output:
['coombs', 'combativeness']
['pedest', 'pedesta', 'pedestal', 'pedestals']
wordList = ['aydt', 'coombs', 'schuhmacher', 'claypoole', 'exhume', 'forehands', 'carin', 'plaits', 'alfonsin',
'hometowns', 'pedestals', 'emad', 'hourly', 'purchaser', 'spogli', 'combativeness', 'henningsen', 'luedke',
'duchin', 'koglin', 'teason', 'bumpings', 'substantially', 'lamendola', 'cecola', 'henze', 'tutti', 'dills',
'satirical', 'jetted', 'intertwine', 'predict', 'breezes', 'cyclist', 'ancillary', 'schaumburg', 'viewer',
"bay's", 'emissions', 'kincheloe', 'trees', 'vipperman', 'exhale', 'ornamental', 'repeated', 'pedestal',
'pedesta', 'pedest']
def findsubstring(fullstring):
for word in wordList:
if word in fullstring:
print (word)
findsubstring("pedestals")
output:
pedestals
pedestal
pedesta
pedest