Using a dictionary to correct spelling - python

I need to write a function that takes a string argument and returns a string with the corrected spelling according to this dictionary:
For example:
"I ate teh whole thing lol"
comes out to be:
'I ate the whole thing haha'
I've done this so far but am lost on what to do:
def respell(string):
respellings = {
"teh":"the",
"relevent":"relevant",
"lite": "light",
"lol":"haha" }
respellingslist = reslepllings.split()

Try this :)
def respelling(string):
respellings = {
"teh": "the",
"relevent": "relevant",
"lite": "light",
"lol": "haha" }
res = string.split()
for i in xrange(len(res)):
if res[i] in respellings:
res[i] = respellings[res[i]]
return ' '.join(res)
[EDIT] one-liner:
return ' '.join(map(lambda s: respellings.get(s, s), string.split()))

Assuming that the respellings contains a lot of possible mistyped words as keys:
def respell(s):
respellings = {
"teh":"the",
"relevent":"relevant",
"lite": "light",
"lol":"haha" }
if s in respellings:
return respellings[s]
If it does not contain the required key it will do nothing.

The following function takes a string and corrects all the misspellings specified in the respellings dictionary passed as an argument:
def respell(s, respellings):
for wrong in respellings:
try:
index = s.index(wrong)
s = s[:index] + respellings[wrong] + s[len(wrong)+index:]
except:
print(wrong + " not in string")
return s
>>> print(respell("I aet teh wohle tingh",
{"aet":"ate", "teh":"the", "wohle":"whole", "tingh":"thing"}))
"I ate the whole thing"

If you want to only use a dictionary to correct spelling and all wrong spellings are present as key then your code should be as follows
def respell(word):
respellings = {
"teh":"the",
"relevent":"relevant",
"lite": "light",
"lol":"haha" }
try:
return respellings[word]
except KeyError:
return word
string = "I ate teh whole thing lol"
correct_string = " ".join(respell(word) for word in string.split())
and in-case if you want to do a proper spell check which would be slightly expensive is as follows.
from difflib import SequenceMatcher
def similar(a, b):
return SequenceMatcher(None, a, b).ratio()
def respell(word):
known_words = set(["the","man","relevant","light","haha"])
if word in known_words:
return word,1
max_similarity = 0
correct_word = None
for known_word in known_words:
similarity_value = similar(known_word, word)
if max_similarity<similarity_value:
similarity_value = max_similarity
correct_word = known_word
return correct_word,max_similarity
The above function returns two value a word and a similarity value which is between 0 and 1. zero (0) if no words are even near similar and 1 if given word is known or correct word.

Related

Leetcode Valid Palindrome pass first test case but fail the second one when submit

I'm doing the valid palindrome problem at leetcode, below is the problem discription:
Given a string, determine if it is a palindrome, considering only alphanumeric characters and ignoring cases.
Note: For the purpose of this problem, we define empty string as valid palindrome.
Example 1:
Input:
A man, a plan, a canal: Panama
Output:
true
Example 2:
Input:
race a car
Output:
false
Here is my code, my code was able to pass the first example A man, a plan, a canal: Panama, but failed the second one race a car. I have no idea why.
class Solution:
def isPalindrome(self, s: str) -> bool:
mystring = s.lower()
mystring2 = ""
for i in mystring:
if i.isalnum():
mystring2 += i
return mystring2
for i in range(0, int(len(mystring2)/2)):
if mystring2[i] != mystring2[len(mystring2)-i-1]:
return False
return True
You don't need to return mystring2 after your first loop ends. You can directly start with the second loop. Your logic seems to be correct. Just doing this modification would work.
Correct Code -
class Solution:
def isPalindrome(self, s: str) -> bool:
mystring = s.lower()
mystring2 = ""
for i in mystring:
if i.isalnum():
mystring2 += i
for i in range(0, int(len(mystring2)/2)):
if mystring2[i] != mystring2[len(mystring2)-i-1]:
return False
return True
In your case, it is failing because you are just returning from the function halfway through your code. The last loop part of code was never getting executed in your case.
Also, in python, the last part of the loop can be written in a much simpler way -
Slightly Simplified code -
class Solution:
def isPalindrome(self, s: str) -> bool:
mystring = s.lower()
mystring2 = ""
for i in mystring:
if i.isalnum():
mystring2 += i
return mystring2 == mystring2[::-1]
Similarly, you could also reduce the first part of your code as -
More Simplified Code -
class Solution:
def isPalindrome(self, s: str) -> bool:
mystring2 = ''.join([character.lower() for character in s if chracter.isalnum()])
return mystring2 == mystring2[::-1]
Easier way to do this is to reverse the string and compare in more pythonic way
a = "A man, a plan, a canal: Panama"
newA= [i.lower() for i in a if i.isalnum()]
print(newA == newA[::-1])
At last, you have to return true or false only, not the string.
class Solution:
def isPalindrome(self, s: str) -> bool:
mystring = s.lower()
mystring2 = ""
for i in mystring:
if i.isalnum():
mystring2 += i
return (mystring2 == mystring2[::-1])
Even though is not recommended, we can also solve this problem with a regular expression.
This'll pass:
class Solution:
def isPalindrome(self, s):
s = ''.join(re.findall(r'(?is)[a-z0-9]+', s)).lower()
return s == s[::-1]
Java
class Solution {
public boolean isPalindrome(String s) {
String original = s.replaceAll("(?i)[^a-z0-9]", "").toLowerCase();
String reversed = new StringBuffer(original).reverse().toString();
return original.equals(reversed);
}
}
JavaScript
var isPalindrome = function(s) {
var original = s.replace(/[^a-z0-9]/isg, '');
var reversed = original.split('').reverse().join('');
return original.toLowerCase() == reversed.toLowerCase();
};
References
For additional details, you can see the Discussion Board. There are plenty of accepted solutions with a variety of languages and explanations, efficient algorithms, as well as asymptotic time/space complexity analysis1, 2 in there.

Python 3.7 : Variable switching between int and str depending on usage

I am working on a python Caesar cypher (for fun, I know it's not a good way to encrypt messages) and I ran into a problem. When I run the first bit of code, I get an error saying that the first arg in replace() must be a string, not an integer, when it is in face already a string ("TypeError: replace() argument 1 must be str, not int").
However, whenever I try to use it as an indice for a string, it tells me it is not an int ("TypeError: string indices must be integers").
Here is the code, thanks in advance. (There are a few more parts to the code but I don't think they're relevant to the question.)
def find_str(s, char):
index = 0
if char in s:
c = char[0]
for ch in s:
if ch == c:
if s[index:index+len(char)] == char:
return index
index += 1
return -1
class Alpha:
def __init__(self, message, key):
self.fKey = key
self.msg = str(message)
self.alpha = []
self.spcLoc = []
self.spcNum = 0
self.encryptedMessage = str(self.msg)
def encMsg(self):
for letter in self.spcNum):
str.replace(letter, find_str(self.alpha,letter) + self.fKey, self.spcNum)
def main():
msg = 'This is sparta'
key = 1
a = Alpha(msg, key)
a.encMsg()
for letter in self.spcNum:
This is a for-each loop which loops over every value in self.spcNum.
For example
for letter in ['a','b','c']:
print(letter)
will print out the letters a, b and c.
You can not iterate over self.spcNum. Because it is an integer (with the value 0) not a list.
There are other problems in the code too,
str.replace(letter, find_str(self.alpha,letter) + self.fKey, self.spcNum)
You're using this method incorrectly.
Correct usage:
stringYouWantEdited = "hi, my name is DGGB, hi"
substringYouWantReplaced = "hi"
newSubstring = "hello"
numberOfTimesThisShouldHappen = 1
newString = stringYouWantEdited.replace(substringYouWantReplaced , newSubstring , numberOfTimesThisShouldHappen )
print(newString)

Formatting and Understanding the Process for an "Unjumbler"

Basically, the code is supposed to print out what it believes to be the unjumbled letters based on the amount of one letter in an index. When I run it, it keeps saying stringlist is not defined. Any idea why? Could use some help with formatting too.
def getMessages():
stringlist=[]
stringinput=""
while stringinput!="DONE":
stringinput=input("Type each string. When you are finished, type DONE. ")
if stringinput=="DONE":
return stringlist
else:
stringlist.append(stringinput)
def countFrequencies(stringlist, indexval):
letterdict={"a":0, "b":0, "c":0, "d":0, "e":0, "f":0, "g":0, "h":0, "i":0, "j":0, "k":0, "l":0,
"m":0, "n":0, "o":0, "p":0, "q":0, "r":0, "s":0, "t":0, "u":0, "v":0, "w":0, "x":0,
"y":0, "z":0}
for i in stringlist:
counter=i[indexval]
letterdict[counter]+=1
return letterdict
def mostCommonLetter(letterdict):
ungarble=""
highest=-1
for i in letterdict.keys():
if letterdict[i]>highest:
ungarble=i
highest=letterdict[i]
return ungarble
getMessages()
countFrequencies(stringlist, indexval)
print("Recovered message: ", mostCommonLetter(letterdict))
Your indentation is incorrect.
You could use Counter to aggregate the frequencies of letters in each line.
from collections import Counter
def getMessages():
stringlist=[]
stringinput=""
while stringinput!="DONE":
stringinput=input("Type each string. When you are finished, type DONE. ")
if stringinput=="DONE":
return stringlist
else:
stringlist.append(stringinput)
def countFrequencies(stringlist):
frequencies = Counter()
for line in stringlist:
frequencies.update(line)
return frequencies
def mostCommonLetter(frequencies):
return max(frequencies)
stringlist = getMessages()
frequencies = countFrequencies(stringlist)
print("Recovered message: ", mostCommonLetter(frequencies))

How to parse optional and named arguments into list and dict?

I would like a compact way to parse one-line strings that start with mandatory list-elements (unspecified number) and ends with dictionary-like definitions using =.
The element-separator should be , and spaces should become part of the element -- which rules out shlex, I think.
Spaces should/may be stripped at the start and end (quotes, too)
If an element would contain a , the user is required to quote with "
either "key=value,with,comma"
or key="value,with,comma" -- whatever is easier to implement
It's ok to have undefined behavior with wrong quoting or with elements containing a quote-char.
Behaviour with double keys is also undefined.
Slight variations of this are ok if it simplifies the implementation a lot.
Lets call the function opts and have it return a list and a dict,
Here are some input examples and desired results:
opts('dog,cat') # -> ["dog", "cat"], {}
opts('big fish,cat') # -> ["big fish", "cat"], {}
opts('"a dog, a cat",a fish') # -> ["a dog, a cat", "a fish"], {}
opts('key=value') # -> [] {'key':'value'}
opts('key=the value,x=y') # -> [] {'key':'the value', 'x':'y'}
opts('dog, big fish, eats="any, but peas", flies = no! '
# -> ['dog','big fish'], {'eats':'any, but peas', 'flies':'no!' }
I disregarded shlex, argparse, optparse and configparser, I can't see how I should do it with those. I am not sure if Regular Expressions crack this nut, though. json is a bit too strict with the syntax, I think. As is eval, if a bit more to my liking (because it parses python ;-))
My manual solution in macro is not very flexible and I would like to have its parameter handling be replaced by the more general opts(s) function described above:
def macro(s):
kw = { 'see':u"\\see", 'type':u"Chapter", 'title': u'??' }
params = s.split(",")
kw['label'] = params[0]
if len(params) > 1: # very inflexible
kw['title'] = params[1]
for param in params[2:]: # wrong if p[1] is already key=value
key, value = param.split("=",1) # doesn't handle anything, too simple
kw[key] = value
# ...rest of code...
The goal is to have the reusable function opts to be used here:
def macro_see(s):
ls, kw = opts(s)
# ...rest of code...
In this solution, opts is essentially the same as yuvi's (with the added strip). The splitter is a customization of shlex, using posix mode to handle quotes.
def mylex(x):
lex = shlex.shlex(x, posix=True)
lex.whitespace = ','
lex.whitespace_split = True
return list(lex)
def opts(x):
ll = []
dd = {}
items = mylex(x)
for item in items:
if '=' in item:
k, v = item.split('=',1)
dd[k.strip(' "')] = v.strip(' "')
else:
ll.append(item.strip(' "'))
return (ll,dd)
It passes:
trials = [
['dog,cat',(["dog", "cat"], {})],
['big fish,cat',(["big fish", "cat"], {})],
['"a dog, a cat",a fish',(["a dog, a cat", "a fish"], {})],
['key=value',([], {'key':'value'})],
['key=the value,x=y',([], {'key':'the value', 'x':'y'})],
['dog, big fish, eats="any, but peas", flies = no!',(['dog','big fish'], {'eats':'any, but peas', 'flies':'no!' })],
]
for (x,y) in trials:
print('%r'%x)
args = opts(x)
print(args)
if args != y:
print('error, %r'%y)
print('')
What you probably want is to create your own split function, with a flag that toggles when " are introduced. Something like this:
def my_split(string, deli):
res = []
flag = True
start = 0
for i, c in enumerate(string):
if c == '"':
if flag:
flag = False
else:
flag = True
if c == deli and flag:
res.append(string[start:i])
start = i+1
res.append(string[start:])
return res
From there, it's really easy to proceed:
def opts(s):
items = map(lambda x: x.strip(), my_split(s, ','))
# collect
ls = []
kw = {}
for item in items:
if '=' in item:
k, v = item.split('=', 1)
kw[k.strip()] = v.strip()
else:
ls.append(item)
return ls, kw
It's not perfect, there are still a few thing you might need to work on, but that's definetly a start.
Here's an approach where I massage the input so it matches the syntax requirements for python function arguments, then harness the python interpreter via eval to parse them.
import re
s = 'hog, "cog" , dog, bog, "big fish", eats="any, but peas", flies = "no!" '
# I think this will add quotes around any unquoted positional arguments
s = re.sub('(^|,)\ *([^\"\',\ ]+)\ *(?=,|$)', r'\1"\2"', s)
def f(*args, **kwargs):
return (args, kwargs)
print eval("f("+s+")", {'f':f})
output:
(('hog', 'cog', 'dog', 'bog', 'big fish'), {'flies': 'no!', 'eats': 'any, but peas'})

Implement python replace() function without using regexp

I'm trying to rewrite the equivalent of the python replace() function without using regexp. Using this code, i've managed to get it to work with single chars, but not with more than one character:
def Replacer(self, find_char, replace_char):
s = []
for char in self.base_string:
if char == find_char:
char = replace_char
#print char
s.append(char)
s = ''.join(s)
my_string.Replacer('a','E')
Anybody have any pointers how to make this work with more than one character? example:
my_string.Replacer('kl', 'lll')
How clever are you trying to be?
def Replacer(self, find, replace):
return(replace.join(self.split(find)))
>>> Replacer('adding to dingoes gives diamonds','di','omg')
'adomgng to omgngoes gives omgamonds'
Here is a method that should be pretty efficient:
def replacer(self, old, new):
return ''.join(self._replacer(old, new))
def _replacer(self, old, new):
oldlen = len(old)
i = 0
idx = self.base_string.find(old)
while idx != -1:
yield self.base_string[i:idx]
yield new
i = idx + oldlen
idx = self.base_string.find(old, i)
yield self.base_string[i:]
Let's try with some slices (but you really should consider using the builtin method of python) :
class ReplacableString:
def __init__(self, base_string):
self.base_string =base_string
def replacer(self, to_replace, replacer):
for i in xrange(len(self.base_string)):
if to_replace == self.base_string[i:i+len(to_replace)]:
self.base_string = self.base_string[:i] + replacer + self.base_string[i+len(to_replace):]
def __str__(self):
return str(self.base_string)
test_str = ReplacableString("This is eth string")
test_str.replacer("eth", "the")
print test_str
>>> This is the string

Categories