How to make depth search in Trie? - python

I wrote my Trie solution, where I used defaultdict. The task is to find all words with prefix.
The format must be like {of:[of, offten, offensive]}
Here my Trie class:
from collections import defaultdict
def _trie():
return defaultdict(_trie)
TERMINAL = None
class Trie(object):
def __init__(self):
self.trie = _trie()
def addWord(self, word):
trie = self.trie
for letter in word:
trie = trie[letter]
trie[TERMINAL]
def search(self, word, trie=None):
if trie is None:
trie = self.trie
for i, letter in enumerate(word):
if letter in trie:
trie = trie[letter]
else:
return False
return trie
Here The example:
Trie = Trie()
Trie.addWord('of')
Trie.addWord('often')
Trie.addWord('offensive')
string = 'of'
s = dict(Trie.search(string))
They give the result:

Here I make depth searh
from collections import defaultdict
class TrieNode:
def __init__(self):
self.child = defaultdict(TrieNode)
self.is_word = False
self.words = ""
class Trie:
def __init__(self):
self.root = TrieNode()
def insert(self, word):
cur = self.root
for i in range(len(word)):
cur = cur.child[word[i]]
cur.words = word[:i+1]
cur.is_word = True
def search(self, word):
cur = self.root
for char in word:
cur = cur.child.get(char)
if not cur:
return []
stack = [cur]
res = []
while stack:
node = stack.pop()
if node.is_word:
res.append(node.words)
for key, val in node.child.items():
stack.append(val)
return sorted(res)
Trie = Trie()
Trie.insert('of')
Trie.insert('often')
Trie.insert('offensive')
Trie.insert('offensive2')
Trie.search('o')
# ['of', 'offensive', 'offensive2', 'often']

Related

Longest Common Substring from Trie

i'm trying to obtain the longest common substring from two strings and have coded a trie, but im not too sure how to obtain the longest common substring based on this trie. Could someone please help, thanks!
class TrieNode:
def __init__(self):
self.children = [None] * 27
self.isLeaf = False
class Trie:
def __init__(self):
"""
Initialize your data structure here.
"""
self.root = TrieNode()
def insert(self, word: str) -> None:
"""
Inserts a word into the trie.
"""
current = self.root
for letter in word:
index = ord(letter) - ord('a')
if not current.children[index]:
current.children[index] = TrieNode()
current = current.children[index]
current.isLeaf = True
def search(self, word: str) -> bool:
"""
Returns if the word is in the trie.
"""
current = self.root
for letter in word:
index = ord(letter) - ord('a')
if not current.children[index]:
return False
current = current.children[index]
return current.isLeaf and current
def startsWith(self, prefix: str) -> bool:
"""
Returns if there is any word in the trie that starts with the given prefix.
"""
current = self.root
for letter in prefix:
index = ord(letter) - ord('a')
if not current.children[index]:
return False
current = current.children[index]
return True

Passing a list of strings to be put into trie

I have the code that can build a trie data structure when it is given one string. When I am trying to pass a list of strings, it combines the words into one
class TrieNode:
def __init__(self):
self.end = False
self.children = {}
def all_words(self, prefix):
if self.end:
yield prefix
for letter, child in self.children.items():
yield from child.all_words(prefix + letter)
class Trie:
def __init__(self):
self.root = TrieNode()
def __init__(self):
self.root = TrieNode()
def insert(self, words):
curr = self.root
#the line I added to read the words from a list is below
for word in words:
for letter in word:
node = curr.children.get(letter)
if not node:
node = TrieNode()
curr.children[letter] = node
curr = node
curr.end = True
def all_words_beginning_with_prefix(self, prefix):
cur = self.root
for c in prefix:
cur = cur.children.get(c)
if cur is None:
return # No words with given prefix
yield from cur.all_words(prefix)
This is the code I use to insert everything into the tree:
lst = ['foo', 'foob', 'foobar', 'foof']
trie = Trie()
trie.insert(lst)
The output I get is
['foo', 'foofoob', 'foofoobfoobar', 'foofoobfoobarfoof']
The output I would like to get is
['foo', 'foob', 'foobar', 'foof']
This is the line I used to get the output (for reproducibility, in case you will need to run the code) - it returns all the words that start with a particular prefix:
print(list(trie.all_words_beginning_with_prefix('foo')))
How do I fix it?
You aren't resetting curr back to the root after each insert, so you're inserting the next word where the last one left off. You'd want something like:
def insert(self, words):
curr = self.root
for word in words:
for letter in word:
node = curr.children.get(letter)
if not node:
node = TrieNode()
curr.children[letter] = node
curr = node
curr.end = True
curr = self.root # Reset back to the root
I'd break this up though. I think your insert function is doing too much, and shouldn't be dealing with multiple strings. I'd change it to something like:
def insert(self, word):
curr = self.root
for letter in word:
node = curr.children.get(letter)
if not node:
node = TrieNode()
curr.children[letter] = node
curr = node
curr.end = True
def insert_many(self, words):
for word in words:
self.insert(word) # Just loop over self.insert
Now that's a non-problem since each insert is an independent call, and you can't forget to reset curr.

Storing word count in the python trie

I took a list of words and put it into a trie. I would also like to store word count inside for further analysis. What would be the best way to do it? This is the class where I think the frequency would be collected and stored, but I am not sure how to go about it. You can see my attempt, last line in insert is where I try to store the count.
class TrieNode:
def __init__(self,k):
self.v = 0
self.k = k
self.children = {}
def all_words(self, prefix):
if self.end:
yield prefix
for letter, child in self.children.items():
yield from child.all_words(prefix + letter)
class Trie:
def __init__(self):
self.root = TrieNode()
def __init__(self):
self.root = TrieNode()
def insert(self, word):
curr = self.root
for letter in word:
node = curr.children.get(letter)
if not node:
node = TrieNode()
curr.children[letter] = node
curr.v += 1
def insert_many(self, words):
for word in words:
self.insert(word)
def all_words_beginning_with_prefix(self, prefix):
cur = self.root
for c in prefix:
cur = cur.children.get(c)
if cur is None:
return # No words with given prefix
yield from cur.all_words(prefix)
I want to store the count so that when I use
print(list(trie.all_words_beginning_with_prefix('prefix')))
I would get a result like so:
[(word, count), (word, count)]
While inserting, on seeing any node, it means there's a new word going to be added in that path. Therefore increment your word_count of that node.
class TrieNode:
def __init__(self, char):
self.char = char
self.word_count = 0
self.children = {}
def all_words(self, prefix, path):
if len(self.children) == 0:
yield prefix + path
for letter, child in self.children.items():
yield from child.all_words(prefix, path + letter)
class Trie:
def __init__(self):
self.root = TrieNode('')
def insert(self, word):
curr = self.root
for letter in word:
node = curr.children.get(letter)
if node is None:
node = TrieNode(letter)
curr.children[letter] = node
curr.word_count += 1 # increment it everytime the node is seen at particular level.
curr = node
def insert_many(self, words):
for word in words:
self.insert(word)
def all_words_beginning_with_prefix(self, prefix):
cur = self.root
for c in prefix:
cur = cur.children.get(c)
if cur is None:
return # No words with given prefix
yield from cur.all_words(prefix, path="")
def word_count(self, prefix):
cur = self.root
for c in prefix:
cur = cur.children.get(c)
if cur is None:
return 0
return cur.word_count
trie = Trie()
trie.insert_many(["hello", "hi", "random", "heap"])
prefix = "he"
words = [w for w in trie.all_words_beginning_with_prefix(prefix)]
print("Lazy method:\n Prefix: %s, Words: %s, Count: %d" % (prefix, words, len(words)))
print("Proactive method:\n Word count for '%s': %d" % (prefix, trie.word_count(prefix)))
Output:
Lazy method:
Prefix: he, Words: ['hello', 'heap'], Count: 2
Proactive method:
Word count for 'he': 2
I would add a field called is_word to the trie node, where is_word would be true only for the last letter in the word. Like you have word AND, is_word would be true for the trie node holding the letter D. And I would update frequency for only nodes that have is_word to be true, not for every letter in the word.
So when you iterate from a letter, check if it is a word, if it is, stop the iteration, return the count and the word. I’m assuming in your iteration you keep track of the letters, and keep adding them to the prefix.
Your trie is a multi-way trie.

Trie Implementation in Python -- Print Keys

I Implemented a Trie data structure using python, now the problem is it doesn't display the keys that Trie is stored in its data structure.
class Node:
def __init__(self):
self.children = [None] * 26
self.endOfTheWord = False
class Trie:
def __init__(self):
self.root = self.getNode()
def getNode(self):
return Node()
def charToIndex(self ,ch):
return ord(ch) - ord('a')
def insert(self ,word):
current = self.root
for i in range(len(word)):
index = self.charToIndex(word[i])
if current.children[index] is None:
current.children[index] = self.getNode()
current = current.children[index]
current.endOfTheWord = True
def printKeys(self):
str = []
self.printKeysUtil(self.root ,str)
def printKeysUtil(self ,root ,str):
if root.endOfTheWord == True:
print(''.join(str))
return
for i in range(26):
if root.children[i] is not None:
ch = chr(97) + chr(i)
str.append(ch)
self.printKeysUtil(root.children[i] ,str)
str.pop()
You could perform a pre-order traversal of the nodes, and wherever you find an end-of-word marker, you zoom up to the root, capturing the letters as you go, in order to get the full word... except that to accomplish this, you would need to store the parent node in each node.
def printKeysUtil(self ,root ,str):
if root.endOfTheWord == True:
print(''.join(str))
return
for i in range(26):
if root.children[i] is not None:
ch = chr(97+i)
str.append(ch)
self.printKeysUtil(root.children[i] ,str)
str.pop()

How to change this prefix trie to return all words that contain similar prefixes

Hi this is a portion of my code for prefix trie, i trying to get it to return more than just prefix, more explanation at the bottom:
class TrieNode:
def __init__(self):
self.isString = False
self.children = {}
def insertString(word, root):
currentNode = root
for char in word:
if char not in currentNode.children:
currentNode.children[char] = TrieNode()
currentNode = currentNode.children[char]
currentNode.isString = True
def findStrings(prefix, node, results):
if node.isString:
results.append(prefix)
for char in node.children:
findStrings(prefix + char, node.children[char], results)
def longestPrefix(word, root):
currentNode = root
currentPrefix = ''
for char in word:
if char not in currentNode.children:
break
else:
currentNode = currentNode.children[char]
currentPrefix += char
strings = []
findStrings(currentPrefix, currentNode, strings)
return strings
pass
# Discussion: Is it dangerous to assume that findStrings actually found a string?
# Hint: There is an edge case that will break this
wordList = ['aydt', 'coombs', 'schuhmacher', 'claypoole', 'exhume', 'forehands', 'carin', 'plaits', 'alfonsin',
'hometowns', 'pedestals', 'emad', 'hourly', 'purchaser', 'spogli', 'combativeness', 'henningsen', 'luedke',
'duchin', 'koglin', 'teason', 'bumpings', 'substantially', 'lamendola', 'cecola', 'henze', 'tutti', 'dills',
'satirical', 'jetted', 'intertwine', 'predict', 'breezes', 'cyclist', 'ancillary', 'schaumburg', 'viewer',
"bay's", 'emissions', 'kincheloe', 'trees', 'vipperman', 'exhale', 'ornamental', 'repeated', 'pedestal',
'pedesta', 'pedest']
root = TrieNode()
for word in wordList:
insertString(word, root)
allWords = []
findStrings('', root, allWords)
print(allWords)
inputWord = 'co'
print(longestPrefix(inputWord, root))
inputWord = 'pedestals'
print(longestPrefix(inputWord, root))
I trying to understand how do i get print(longestPrefix('pedestals', root)) to return 'pedestals','pedestal','pedesta', 'pedest' and not just pedestals. What am i missing in my code?
I trying to understand how do i get print(longestPrefix('pedestals',
root)) to return 'pedestals','pedestal','pedesta', 'pedest' and not
just pedestals.
Since pedestals isn't a prefix, this doesn't make sense given the logic of the code -- I would have expected you to wonder why print(longestPrefix('pedest', root)) didn't return those four results. I've reworked your code below, turning all your functions into methods since each was taking the object you defined as an argument:
class TrieNode:
def __init__(self):
self.isString = False
self.children = {}
def insertString(self, word):
for char in word:
if char not in self.children:
self.children[char] = TrieNode()
self = self.children[char]
self.isString = True
def findStrings(self, prefix):
results = []
if self.isString:
results.append(prefix)
for char in self.children:
results.extend((self.children[char]).findStrings(prefix + char))
return results
def longestPrefix(self, word):
currentPrefix = ''
for char in word:
if char not in self.children:
break
else:
self = self.children[char]
currentPrefix += char
return self.findStrings(currentPrefix)
wordList = [
'aydt', 'coombs', 'schuhmacher', 'claypoole', 'exhume', 'forehands', 'carin', 'plaits', 'alfonsin',
'hometowns', 'pedestals', 'emad', 'hourly', 'purchaser', 'spogli', 'combativeness', 'henningsen', 'luedke',
'duchin', 'koglin', 'teason', 'bumpings', 'substantially', 'lamendola', 'cecola', 'henze', 'tutti', 'dills',
'satirical', 'jetted', 'intertwine', 'predict', 'breezes', 'cyclist', 'ancillary', 'schaumburg', 'viewer',
"bay's", 'emissions', 'kincheloe', 'trees', 'vipperman', 'exhale', 'ornamental', 'repeated', 'pedestal',
'pedesta', 'pedest'
]
root = TrieNode()
for word in wordList:
root.insertString(word)
allWords = root.findStrings('')
print(allWords)
inputWord = 'co'
print(root.longestPrefix(inputWord))
inputWord = 'pedest'
print(root.longestPrefix(inputWord))
The last two print statements output:
['coombs', 'combativeness']
['pedest', 'pedesta', 'pedestal', 'pedestals']
wordList = ['aydt', 'coombs', 'schuhmacher', 'claypoole', 'exhume', 'forehands', 'carin', 'plaits', 'alfonsin',
'hometowns', 'pedestals', 'emad', 'hourly', 'purchaser', 'spogli', 'combativeness', 'henningsen', 'luedke',
'duchin', 'koglin', 'teason', 'bumpings', 'substantially', 'lamendola', 'cecola', 'henze', 'tutti', 'dills',
'satirical', 'jetted', 'intertwine', 'predict', 'breezes', 'cyclist', 'ancillary', 'schaumburg', 'viewer',
"bay's", 'emissions', 'kincheloe', 'trees', 'vipperman', 'exhale', 'ornamental', 'repeated', 'pedestal',
'pedesta', 'pedest']
def findsubstring(fullstring):
for word in wordList:
if word in fullstring:
print (word)
findsubstring("pedestals")
output:
pedestals
pedestal
pedesta
pedest

Categories