Ignoring Changed Index Check (Python) - python

I have made a script:
our_word = "Success"
def duplicate_encode(word):
char_list = []
final_str = ""
changed_index = []
base_wrd = word.lower()
for k in base_wrd:
char_list.append(k)
for i in range(0, len(char_list)):
count = 0
for j in range(i + 1, len(char_list)):
if j not in changed_index:
if char_list[j] == char_list[i]:
char_list[j] = ")"
changed_index.append(j)
count += 1
else:
continue
if count > 0:
char_list[i] = ")"
else:
char_list[i] = "("
print(changed_index)
print(char_list)
final_str = "".join(char_list)
return final_str
print(duplicate_encode(our_word))
essentialy the purpose of this script is to convert a string to a new string where each character in the new string is "(", if that character appears only once in the original string, or ")", if that character appears more than once in the original string. I have made a rather layered up script (I am relatively new to the python language so didn't want to use any helpful in-built functions) that attempts to do this. My issue is that where I check if the current index has been previously edited (in order to prevent it from changing), it seems to ignore it. So instead of the intended )())()) I get )()((((. I'd really appreciate an insightful answer to why I am getting this issue and ways to work around this, since I'm trying to gather an intuitive knowledge surrounding python. Thanks!

word = "Success"
print(''.join([')' if word.lower().count(c) > 1 else '(' for c in word.lower()]))

The issue here has nothing to do with your understanding of Python. It's purely algorithmic. If you retain this 'layered' algorithm, it is essential that you add one more check in the "i" loop.
our_word = "Success"
def duplicate_encode(word):
char_list = list(word.lower())
changed_index = []
for i in range(len(word)):
count = 0
for j in range(i + 1, len(word)):
if j not in changed_index:
if char_list[j] == char_list[i]:
char_list[j] = ")"
changed_index.append(j)
count += 1
if i not in changed_index: # the new inportant check to avoid reversal of already assigned ')' to '('
char_list[i] = ")" if count > 0 else "("
return "".join(char_list)
print(duplicate_encode(our_word))

Your algorithm can be greatly simplified if you avoid using char_list as both the input and output. Instead, you can create an output list of the same length filled with ( by default, and then only change an element when a duplicate is found. The loops will simply walk along the entire input list once for each character looking for any matches (other than self-matches). If one is found, the output list can be updated and the inner loop will break and move on to the next character.
The final code should look like this:
def duplicate_encode(word):
char_list = list(word.lower())
output = list('(' * len(word))
for i in range(len(char_list)):
for j in range(len(char_list)):
if i != j and char_list[i] == char_list[j]:
output[i] = ')'
break
return ''.join(output)
for our_word in (
'Success',
'ChJsTk(u cIUzI htBp#qX)OTIHpVtHHhQ',
):
result = duplicate_encode(our_word)
print(our_word)
print(result)
Output:
Success
)())())
ChJsTk(u cIUzI htBp#qX)OTIHpVtHHhQ
))(()(()))))())))()()((())))()))))

Related

Recursive Decompression of Strings

I'm trying to decompress strings using recursion. For example, the input:
3[b3[a]]
should output:
baaabaaabaaa
but I get:
baaaabaaaabaaaabbaaaabaaaabaaaaa
I have the following code but it is clearly off. The first find_end function works as intended. I am absolutely new to using recursion and any help understanding / tracking where the extra letters come from or any general tips to help me understand this really cool methodology would be greatly appreciated.
def find_end(original, start, level):
if original[start] != "[":
message = "ERROR in find_error, must start with [:", original[start:]
raise ValueError(message)
indent = level * " "
index = start + 1
count = 1
while count != 0 and index < len(original):
if original[index] == "[":
count += 1
elif original[index] == "]":
count -= 1
index += 1
if count != 0:
message = "ERROR in find_error, mismatched brackets:", original[start:]
raise ValueError(message)
return index - 1
def decompress(original, level):
# set the result to an empty string
result = ""
# for any character in the string we have not looked at yet
for i in range(len(original)):
# if the character at the current index is a digit
if original[i].isnumeric():
# the character of the current index is the number of repetitions needed
repititions = int(original[i])
# start = the next index containing the '[' character
x = 0
while x < (len(original)):
if original[x].isnumeric():
start = x + 1
x = len(original)
else:
x += 1
# last = the index of the matching ']'
last = find_end(original, start, level)
# calculate a substring using `original[start + 1:last]
sub_original = original[start + 1 : last]
# RECURSIVELY call decompress with the substring
# sub = decompress(original, level + 1)
# concatenate the result of the recursive call times the number of repetitions needed to the result
result += decompress(sub_original, level + 1) * repititions
# set the current index to the index of the matching ']'
i = last
# else
else:
# concatenate the letter at the current index to the result
if original[i] != "[" and original[i] != "]":
result += original[i]
# return the result
return result
def main():
passed = True
ORIGINAL = 0
EXPECTED = 1
# The test cases
provided = [
("3[b]", "bbb"),
("3[b3[a]]", "baaabaaabaaa"),
("3[b2[ca]]", "bcacabcacabcaca"),
("5[a3[b]1[ab]]", "abbbababbbababbbababbbababbbab"),
]
# Run the provided tests cases
for t in provided:
actual = decompress(t[ORIGINAL], 0)
if actual != t[EXPECTED]:
print("Error decompressing:", t[ORIGINAL])
print(" Expected:", t[EXPECTED])
print(" Actual: ", actual)
print()
passed = False
# print that all the tests passed
if passed:
print("All tests passed")
if __name__ == '__main__':
main()
From what I gathered from your code, it probably gives the wrong result because of the approach you've taken to find the last matching closing brace at a given level (I'm not 100% sure, the code was a lot). However, I can suggest a cleaner approach using stacks (almost similar to DFS, without the complications):
def decomp(s):
stack = []
for i in s:
if i.isalnum():
stack.append(i)
elif i == "]":
temp = stack.pop()
count = stack.pop()
if count.isnumeric():
stack.append(int(count)*temp)
else:
stack.append(count+temp)
for i in range(len(stack)-2, -1, -1):
if stack[i].isnumeric():
stack[i] = int(stack[i])*stack[i+1]
else:
stack[i] += stack[i+1]
return stack[0]
print(decomp("3[b]")) # bbb
print(decomp("3[b3[a]]")) # baaabaaabaaa
print(decomp("3[b2[ca]]")) # bcacabcacabcaca
print(decomp("5[a3[b]1[ab]]")) # abbbababbbababbbababbbababbbab
This works on a simple observation: rather tha evaluating a substring after on reading a [, evaluate the substring after encountering a ]. That would allow you to build the result AFTER the pieces have been evaluated individually as well. (This is similar to the prefix/postfix evaluation using programming).
(You can add error checking to this as well, if you wish. It would be easier to check if the string is semantically correct in one pass and evaluate it in another pass, rather than doing both in one go)
Here is the solution with the similar idea from above:
we go through string putting everything on stack until we find ']', then we go back until '[' taking everything off, find the number, multiply and put it back on stack
It's much less consuming as we don't add strings, but work with lists
Note: multiply number can't be more than 9 as we parse it as one element string
def decompress(string):
stack = []
letters = []
for i in string:
if i != ']':
stack.append(i)
elif i == ']':
letter = stack.pop()
while letter != '[':
letters.append(letter)
letter = stack.pop()
word = ''.join(letters[::-1])
letters = []
stack.append(''.join([word for j in range(int(stack.pop()))]))
return ''.join(stack)

Replacing keywords in strings with sequence of symbols

For an exercise, I have to create a simple profanity filter in order to learn about classes.
The filter gets initialized with a list of offensive keywords and a replacement template. Every occurrence of any of these words should be replaced with a string that is generated from the template. If the word size is shorter than the template, a substring should be used that starts from the beginning, for longer sizes, the template should be repeated as often as necessary.
The following are my results so far, with an example.
class ProfanityFilter:
def __init__(self, keywords, template):
self.__keywords = sorted(keywords, key=len, reverse=True)
self.__template = template
def filter(self, msg):
def __replace_letters__(old_word, replace_str):
replaced_word = ""
old_index = 0
replace_index = 0
while old_index <= len(old_word):
if replace_index == len(replace_str):
replace_index = 0
else:
replaced_word += replace_str[replace_index]
replace_index += 1
old_index += 1
return replaced_word
for keyword in self.__keywords:
idx = 0
while idx < len(msg):
index_l = msg.lower().find(keyword.lower(), idx)
if index_l == -1:
break
msg = msg[:index_l] + __replace_letters__(keyword, self.__template) + msg[index_l + len(keyword):]
idx = index_l + len(keyword)
return msg
f = ProfanityFilter(["duck", "shot", "batch", "mastard"], "?#$")
offensive_msg = "this mastard shot my duck"
clean_msg = f.filter(offensive_msg)
print(clean_msg) # should be: "this ?#$?#$? ?#$? my ?#$?"
The example should print:
this ?#$?#$? ?#$? my ?#$?
But it prints:
this ?#$?#$ ?#$? my ?#$?
For some reason it replaces the word "mastard" with 6 symbols instead of 7 (one for each letter). It works for the other keywords, why not for this one?
Also if you see anything else that seems off, feel free to tell me. Do keep in mind tho that I am a beginner and my "toolbox" is quite small atm.
Your problem is in the index logic. You have two errors
Each time you reach the end of the replacement string, you skip a letter in the profanity:
while old_index <= len(old_word):
if replace_index == len(replace_str):
replace_index = 0
# You don't replace a letter; you just reset the new index, but ...
else:
replaced_word += replace_str[replace_index]
replace_index += 1
old_index += 1 # ... but you still advance the old index.
The reason you didn't notice this is that you have a second bug: you run your old_index from 0 through len(old_word), which is one more character than you started with. For the canonical four-letter word (or words of 5 or 6 characters), the two errors cancel each other. You didn't see this because you didn't test enough. For instance, using:
f = ProfanityFilter(["StackOverflow", "PC"], "?#$")
offensive_msg = "StackOverflow on PC rulez!"
clean_msg = f.filter(offensive_msg)
Output:
?#$?#$?#$?# on ?#$ rulez!
The input words are 13 and 2 letters; the replacements are 11 and 3.
Fix those two errors: make old_index stay in bounds, and increment it only when you make a replacement.
while old_index < len(old_word):
if replace_index == len(replace_str):
replace_index = 0
else:
replaced_word += replace_str[replace_index]
replace_index += 1
old_index += 1
Future improvements:
Refactor this into a for loop.
Don't reset your replace_index; in fact, get rid of it. Simply use old_index % len(replace_str).
I'd do this with a regular expression instead, since re.sub() has a handy API for dynamic replacements:
import re
class ProfanityFilter:
def __init__(self, keywords, template):
# Build a regular expression that will match all of the profane words
self.keyword_re = re.compile("|".join(re.escape(keyword) for keyword in keywords), re.I)
self.template = template
def _generate_replacement(self, word):
l = len(word)
# Figure out how many times to repeat the template
repeats = (l // len(self.template)) + 1
# Since we may end up with a string longer than the original,
# slice to the correct length.
return (self.template * repeats)[:l]
def filter(self, msg):
# Replace all occurrences of the regular expression with
# a dynamically computed replacement value.
return self.keyword_re.sub(
lambda m: self._generate_replacement(m.group(0)),
msg,
)
f = ProfanityFilter(["duck", "shot", "batch", "mastard"], "?#$")
offensive_msg = "this mastard shot my duck"
print(f.filter(offensive_msg))
Couldn't make a one-liner, but here's a terrible implementation anway. Don't do what VoNWooDSoN does:
def replace(msg, keywords=["duck", "shot", "batch", "mastard"], template="?#$"):
for keyword in keywords * len(msg)):
msg = (template*len(keyword))[:len(keyword)].join([msg[:msg.find(keyword)], msg[msg.find(keyword)+len(keyword):]]) if msg.find(keyword) > 0 else msg
return msg
offensive_msg = "this mastard shot my duck"
clean_msg = replace(offensive_msg)
print(clean_msg) # should be: "this ?#$?#$? ?#$? my ?#$?"
print(clean_msg=="this ?#$?#$? ?#$? my ?#$?")
edit
So, I guess that 3.8 has assignment expressions... So, but this'd be the one liner then (probably).
print ((lambda msg: [msg := (("?#$"*len(keyword))[:len(keyword)].join([msg[:msg.find(keyword)], msg[msg.find(keyword)+len(keyword):]]) if msg.find(keyword) > 0 else msg) for keyword in ["duck", "shot", "batch", "mastard"]])("this mastard shot my duck")[-1])

Trying to write the java code from the urlify problem in cracking the coding interview in python

I took the Java code from cracking the coding interview for the urlify problem (1.3):
URLify: Write a method to replace all spaces in a string with '%20'. You may assume that the string has sufficient space at the end to hold the additional characters, and that you are given the "true" length of the string. (Note: if implementing in Java, please use a character array so that you can perform this operation in place.)
EXAMPLE
Input: "Mr John Smith , 13
Output: "Mr%2eJohn%2eSmith"
I'm having some issues with the converted code. Here is my python code:
def urlify(str, trueLength):
spaceCount = 0
index = 0
i = 0
for i in range(0, trueLength):
if str[i] == ' ':
spaceCount += 1
print(spaceCount, "spaceCount")
index = trueLength + spaceCount * 2
if (trueLength < len(str)):
str[trueLength] = '\0'
for i in range(trueLength, 0):
if str[i] == ' ':
str[index - 1] = '0'
str[index - 2] = '2'
str[index - 3] = '%'
index = index - 3
else:
str[index - 1] = str[i]
index = index - 1
print(urlify("Mr John Smith ", 13))
I think one of the issues is
str[trueLength] = '\0'
I'm not sure what else could be an issue. I'm also a little confused about the two lines
if (trueLength < len(str)):
str[trueLength] = '\0'
so if someone could explain those lines, that would be awesome. I just wanted to fully understand Gayle's solution.
Code I found:
def urlify(string, length):
'''function replaces single spaces with %20 and removes trailing spaces'''
new_index = len(string)
for i in reversed(range(length)):
if string[i] == ' ':
# Replace spaces
string[new_index - 3:new_index] = '%20'
new_index -= 3
else:
# Move characters
string[new_index - 1] = string[i]
new_index -= 1
return string
Shorten code(more Pythonic way):
def urlify(string, real_length):
return string[:real_length].replace(' ', '%20')
Explanation:
string[:real_length]
# This limits strings to real length, in your case to 13. This will remove unnecessary end of the string.
.replace(' ', '%20')
# This replaces every space with '%20'.
About your code:
In Python, 'str' is reserved word. Do not use it.
In Python, you can't change strings. You must create new one. String item assignment is not supported. Your code is completely based on item assignment. You should instead create new string and add characters to it.
This code is really messed up. It is very hard to understand, you should find easier solution.
Your code and logic optimized:
def urlify(string, trueLength):
new_string = ''
for i in range(0, trueLength):
if string[i] == ' ':
new_string=new_string+'%20'
else:
new_string=new_string+string[i]
return new_string
My guess, justing trying to understand the code you want to understand:
Note that:
you are given the "true" length of the string.
So the trueLength can be shorter than the len(str), and you should handle this case in your code.

Pattern search by NOT using Regex algorithm and code in python

Today I had an interview at AMD and was asked a question which I didn't know how to solve it without Regex. Here is the question:
Find all the pattern for the word "Hello" in a text. Consider that there is only ONE char can be in between letters of hello e.g. search for all instances of "h.ello", "hell o", "he,llo", or "hel!lo".
Since you also tagged this question algorithm, I'm just going to show the general approach that I would take when looking at this question, without including any language tricks from python.
1) I would want to split the string into a list of words
2) Loop through each string in the resulting list, checking if the string matches 'hello' without the character at the current index (or if it simply matches 'hello')
3) If a match is found, return it.
Here is a simple approach in python:
s = "h.ello hello h!ello hell.o none of these"
all = s.split()
def drop_one(s, match):
if s == match:
return True # WARNING: Early Return
for i in range(len(s) - 1):
if s[:i] + s[i+1:] == match:
return True
matches = [x for x in all if drop_one(x, "hello")]
print(matches)
The output of this snippet:
['h.ello', 'hello', 'h!ello', 'hell.o']
This should work. I've tried to make it generic. You might have to make some minor adjustments. Let me know if you don't understand any part.
def checkValidity(tlist):
tmpVar = ''
for i in range(len(tlist)):
if tlist[i] in set("hello"):
tmpVar += tlist[i]
return(tmpVar == 'hello')
mStr = "he.llo hehellbo hellox hell.o hello helloxy abhell.oyz"
mWord = "hello"
mlen = len(mStr)
wordLen = len(mWord)+1
i=0
print ("given str = ", mStr)
while i<mlen:
tmpList = []
if mStr[i] == 'h':
for j in range(wordLen):
tmpList.append(mStr[i+j])
validFlag = checkValidity(tmpList)
if validFlag:
print("Match starting at index: ",i, ':', mStr[i:i+wordLen])
i += wordLen
else:
i += 1
else:
i += 1

How do you reverse the words in a string using python (manually)? [duplicate]

This question already has answers here:
Closed 11 years ago.
Possible Duplicate:
Reverse the ordering of words in a string
I know there are methods that python already provides for this, but I'm trying to understand the basics of how those methods work when you only have the list data structure to work with. If I have a string hello world and I want to make a new string world hello, how would I think about this?
And then, if I can do it with a new list, how would I avoid making a new list and do it in place?
Split the string, make a reverse iterator then join the parts back.
' '.join(reversed(my_string.split()))
If you are concerned with multiple spaces, change split() to split(' ')
As requested, I'm posting an implementation of split (by GvR himself from the oldest downloadable version of CPython's source code: Link)
def split(s,whitespace=' \n\t'):
res = []
i, n = 0, len(s)
while i < n:
while i < n and s[i] in whitespace:
i = i+1
if i == n:
break
j = i
while j < n and s[j] not in whitespace:
j = j+1
res.append(s[i:j])
i = j
return res
I think now there are more pythonic ways of doing that (maybe groupby) and the original source had a bug (if i = n:, corrrected to ==)
Original Answer
from array import array
def reverse_array(letters, first=0, last=None):
"reverses the letters in an array in-place"
if last is None:
last = len(letters)
last -= 1
while first < last:
letters[first], letters[last] = letters[last], letters[first]
first += 1
last -= 1
def reverse_words(string):
"reverses the words in a string using an array"
words = array('c', string)
reverse_array(words, first=0, last=len(words))
first = last = 0
while first < len(words) and last < len(words):
if words[last] != ' ':
last += 1
continue
reverse_array(words, first, last)
last += 1
first = last
if first < last:
reverse_array(words, first, last=len(words))
return words.tostring()
Answer using list to match updated question
def reverse_list(letters, first=0, last=None):
"reverses the elements of a list in-place"
if last is None:
last = len(letters)
last -= 1
while first < last:
letters[first], letters[last] = letters[last], letters[first]
first += 1
last -= 1
def reverse_words(string):
"""reverses the words in a string using a list, with each character
as a list element"""
characters = list(string)
reverse_list(characters)
first = last = 0
while first < len(characters) and last < len(characters):
if characters[last] != ' ':
last += 1
continue
reverse_list(characters, first, last)
last += 1
first = last
if first < last:
reverse_list(characters, first, last=len(characters))
return ''.join(characters)
Besides renaming, the only change of interest is the last line.
You have a string:
str = "A long string to test this algorithm"
Split the string (at word boundary -- no arguments to split):
splitted = str.split()
Reverse the array obtained -- either using ranges or a function
reversed = splitted[::-1]
Concatenate all words with spaces in between -- also known as joining.
result = " ".join(reversed)
Now, you don't need so many temps, combining them into one line gives:
result = " ".join(str.split()[::-1])
str = "hello world"
" ".join(str.split()[::-1])

Categories