My problem is that when i run my code with the input 'word1 word2 word3 word2' (for example) it works as intended, meaning it removes a word if it finds a duplicate of that word in the string.
I have no clue why i get assertion error why i try to run:
set1 = StringSet('word1 word2 word3 word2')
assert str(set1) == 'word1 word2 word3'
output:
assert str(set1) == 'word1 word2 word3'
AssertionError
if i remove the assert and print set1 the code works.
'word1 word2 word3'
Thanks in regards.
My code:
class StringSet:
def __init__(self, a_string=""):
self.a_string = a_string
self.__words = []
for word in a_string.split(" "):
if word not in self.__words:
self.__words.append(word)
def __str__(self):
s = ""
for word in self.__words:
s+= f"{word}"
s+=" "
return "{}".format(s)
def size(self):
counter = 0
for word in self.__words:
counter += 1
return f"{counter}"
def __add__(self, other):
s = ""
new_string = self.a_string + " " + other.a_string
new_set = StringSet(new_string)
return new_set
def make_list(self):
pass
def query(self, a_string=""):
new_string = StringSet(a_string)
return new_string
def at(self):
pass
Related
def spin_words(sentence):
adjusted_string = sentence.split()
for i in adjusted_string:
if len(i) > 5:
print(i[::-1], end = ' ')
else:
print(i, end = ' ')
The problem is asking to take a string and return the same string but, with all the five letter words or more in reversed
def spin_words(sentence):
splitted_string = sentence.split()
reversed_fives = [s[::-1] if len(s) >= 5 else s for s in splitted_string]
return " ".join(reversed_fives)
I have the following python script which does regex matching using 'AND', 'OR' features as well:
class PyBoolReException(Exception):
def __init__(self, value):
self.value = value
def __str__(self):
return str(self.value)
class PyBoolRe:
def __init__(self, boolstr):
# Require whitespace before words?
self.__needspace = True
# whitespace re
self._wspre = re.compile('^\s*$')
# create regexp string
self.__rexplist = []
oparct = boolstr.count('(')
clparct = boolstr.count(')')
if oparct != clparct:
raise PyBoolReException, 'Mismatched parantheses!'
self.__parse(boolstr)
# if NOT is one of the members, reverse
# the list
# print self.__rexplist
if '!' in self.__rexplist:
self.__rexplist.reverse()
s = self.__makerexp(self.__rexplist)
# print s
self.__rexp = re.compile(s)
def match(self, data):
""" Match the boolean expression, behaviour
is same as the 'match' method of re """
return self.__rexp.match(data)
def search(self, data):
""" Search the boolean expression, behaviour
is same as the 'search' method of re """
return self.__rexp.search(data)
def __parse(self, s):
""" Parse the boolean regular expression string
and create the regexp list """
# The string is a nested parantheses with
# any character in between the parens.
scopy = s[:]
oparmatch, clparmatch = False, False
# Look for a NOT expression
index = scopy.rfind('(')
l = []
if index != -1:
oparmatch = True
index2 = scopy.find(')', index)
if index2 != -1:
clparmatch = True
newstr = scopy[index+1:index2]
# if the string is only of whitespace chars, skip it
if not self._wspre.match(newstr):
self.__rexplist.append(newstr)
replacestr = '(' + newstr + ')'
scopy = scopy.replace(replacestr, '')
self.__parse(scopy)
if not clparmatch and not oparmatch:
if scopy: self.__rexplist.append(scopy)
def is_inbetween(self, l, elem):
""" Find out if an element is in between
in a list """
index = l.index(elem)
if index == 0:
return False
if index>2:
if index in range(1, len(l) -1):
return True
else:
return False
else:
return True
def __makenotexpr(self, s):
""" Make a NOT expression """
if s.find('!') == 0:
return ''.join(('(?!', s[1:], ')'))
else:
return s
def __makerexp(self, rexplist):
""" Make the regular expression string for
the boolean match from the nested list """
is_list = True
if type(rexplist) is str:
is_list = False
elem = rexplist
elif type(rexplist) is list:
elem = rexplist[0]
if type(elem) is list:
elem = elem[0]
eor = False
if not is_list or len(rexplist) == 1:
eor = True
word_str = '.*'
s=''
# Implementing NOT
if elem == '!':
return ''.join(('(?!', self.__makerexp(rexplist[1:]), ')'))
# Implementing OR
elif elem.find(' | ') != -1:
listofors = elem.split(' | ')
for o in listofors:
index = listofors.index(o)
in_bet = self.is_inbetween(listofors, o)
if o:
o = self.__makenotexpr(o)
if in_bet:
s = ''.join((s, '|', word_str, o, '.*'))
else:
s = ''.join((s, word_str, o, '.*'))
# Implementing AND
elif elem.find(' & ') != -1:
listofands = elem.split(' & ')
for a in listofands:
index = listofands.index(a)
in_bet = self.is_inbetween(listofands, a)
if a:
a = self.__makenotexpr(a)
s = ''.join((s, word_str, a, '.*'))
else:
if elem:
elem = self.__makenotexpr(elem)
s = ''.join((elem, '.*'))
if eor:
return s
else:
return ''.join((s, self.__makerexp(rexplist[1:])))
When the search phrase is as follows:
p = PyBoolRe('Python | Perl')
s1 = 'Guido invented Python'
s2 = 'Guido Perl'
if p.match(s1):
print 'Match found for first string'
else:
print 'No match found for first string'
if p.match(s2):
print 'Match found for second string'
else:
print 'No match found for second string'
Then both s1 & s2 match
But when the search phrase is:
p = PyBoolRe('Guido & (Python | Perl)')
s1 = 'Guido invented Python'
s2 = 'Guido Perl is great'
Then it should match if s1 or s2 has "Guido Python" or "Guido Perl". s2 has that but it does not match it. On the other hand, it matches s1, which it should not. Why is that?
Please help!! How can I get it to work??
Your generated expression is
.*Python.*|.*Perl.*.*Guido.*
while it should look like
(?=.*Guido.*)(?:.*Python.*|.*Perl.*)
So the parser needs some revision.
1) x|y should be enclosed into (?:...) (at least when used inside another block). Otherwise, | unluckily takes the global priority in the regexp.
2) x & y should be converted into (?=x)y (trailing context may be used to express the and between regular expressions)
I am writing a function where a user puts in text and a word and if the word is in the list, it returns the location of the word in the list.
list = ["hello", "goodbye", "name"]
def fact(txt, my_list):
text = txt.split()
for i in range(0, len(my_list)):
for j in range(0, len(text)):
if(my_list[i] == text[i]):
return my_list[i]
value = fact("hello, my name is", "name")
print(value)
However, this only seems to return none every time. Is there any particular reason it is not working?
Example:
def f(text, search):
if search in text.split():
print('Word "{}" has been found # index: {}'.format(search, text.split().index(search)))
Output:
data = 'hello world, my name is -e'
f(data, '-e')
Word "-e" has been found # index: 5
this works fine
def getword(word, text):
text = text.replace(',', '') # remove ',' by nothing
tmp = text.split(' ')
if word in tmp:
print("word: [%s] find at index %s in this text:[ %s]" % (word, tmp.index(word), text))
return tmp.index(word)
else:
print("Did not find [%s] in [%s]" % (word, text))
return -1
word = "what"
text = "Hello, I am groot, what is your name"
index = getword(word, text)
So me and my groupmates are trying to make a Markov Model that finds the probability of letter transitions in a text file. In the text file we have a group of words "Steam, Teams, Meets, Teems, Eat, Ate, State, Tease, Test, Mast, Mates". In the code we have spaces added to the beginning of the first letter and after the last letter in each word. So the problem we are having is making a function that puts the letter transitions into separate dictionaries. For example all the e transitions(ex: "_e", "ea"...etc, the _ is a space) would go into a dictionary and then the t, s, a, and m.
This is the code we have so far:
import random
import re
inFile = open("markov.txt",'r')
file = inFile.read().lower()
inFile.close()
file=re.sub('[^[a-z\ \']+', " ", file)
fileTuple=tuple(file.split())
fileList=list(fileTuple)
fileString=file
def addSpaces(atuple):
theString=''
for i in atuple:
theString=theString+' '+i+' '
return(theString)
print('The words in the text file:',addSpaces(fileTuple))
fileDict = { }
for i in fileList:
fileDict['_'+i+'_']=''
print("This is a dictionary of the words in the text file with underscores as spaces:",fileDict)
def countTotalWords(atuple):
count=0
for i in atuple:
count=count+1
return(count)
print('Total amount of words:',countTotalWords(fileTuple))
def findFirstLetter(aDict):
for i in aDict:
aDict[i]=i[0:2]
return(aDict)
print('The first letters of each word in the file:',findFirstLetter(fileDict))
valueList=list(fileDict.values())
keyList=list(fileDict.keys())
def countFirstLetters(alist):
d={}
count = 0
for character in alist:
if character in d:
d[character] += 1
else:
d[character] = 1
return d
print('Total amount of occurences of each first letter:',countFirstLetters(valueList))
def countFirstLettersProbability(alist):
d={}
count = 0
for character in alist:
if character in d:
d[character] += (1/countTotalWords(fileTuple))
else:
d[character] = (1/countTotalWords(fileTuple))
return d
print('Probility that each letter is the first in the word:',countFirstLettersProbability(valueList))
def countAllLetters(alist):
d={}
for word in alist:
for char in word:
if char in d:
d[char] += 1
else:
d[char] = 1
return d
print('Total amount of occurences of each letter:',countFirstLetters(fileString))
Here is a solid start; I've rewritten your code as a Markov class.
from random import choice
import re
from collections import defaultdict
from itertools import chain, tee, izip
def strip_non_alpha(text, reg=re.compile('[^a-z\']+', re.IGNORECASE)):
return reg.sub(' ', text.strip())
def nwise(iterable, n):
"s -> (s0,s1, ... sn-1), (s1,s2, ... sn), (s2, s3, ... sn+1), ..."
args = tee(iterable, n)
for i,t in enumerate(args):
for j in range(i):
next(t, None)
return izip(*args)
class Markov():
CHAINLEN = 3
PRE = ' '*(CHAINLEN - 1)
#classmethod
def from_file(cls, fname):
with open(fname) as inf:
return Markov(inf)
def __init__(self, text):
"""
Create a new Markov chain model
text
Either a string or a sequence of strings
"""
self.lookup = defaultdict(list)
self.words = 0
self.strings = 0
if hasattr(text, '__iter__'):
for s in text:
self.add_text(s)
else:
self.add_text(text)
def add_text(self, text):
"""
Add a string to the lookup table
text
string to add
"""
text = strip_non_alpha(text).lower()
self.words += len(text.split())
self.strings += 1
for chars in nwise(chain(Markov.PRE, text, Markov.PRE), Markov.CHAINLEN):
stem = ''.join(chars[:-1])
self.lookup[stem].append(chars[-1])
def gen_text(self, upto=200):
"""
Generate a string
upto
maximum length of string to be generated
"""
s = Markov.PRE
res = []
for i in range(upto + Markov.CHAINLEN):
ch = choice(self.lookup[s])
res.append(ch)
s = s[1:] + ch
if s == Markov.PRE: # terminal string
break
return ''.join(res[:-(Markov.CHAINLEN - 1)])
def __str__(self):
return '\n'.join("'{}': {}".format(k, self.lookup[k]) for k in sorted(self.lookup))
def main():
# mc = Markov.from_file('markov.txt')
mc = Markov('Steam,Teams,Meets,Teems,Eat,Ate,State,Tease,Test,Mast,Mates'.split(','))
print mc.strings, mc.words
print mc
for i in range(10):
print(mc.gen_text())
if __name__=="__main__":
main()
I have this code:
def reverse (word):
newword = ''
letterflag = -1
for numletter in word:
newword += word[letterflag]
letterflag-=1
s=newword
s.upper()
return newword
def isPalindrome(word, ignorecase=False):
"""
>>> type(isPalindrome("bob"))
<type 'bool'>
>>> isPalindrome("abc")
False
>>> isPalindrome("bob")
True
>>> isPalindrome("a man a plan a canal, panama")
True
>>> isPalindrome("A man a plan a canal, Panama")
False
>>> isPalindrome("A man a plan a canal, Panama", ignorecase=True)
True
"""
word = str (word)
newword = reverse(word)
if word == newword:
return True
else:
return False
When I type "Bob", I want it to return true because of the capital B.
Just make your input always lower case that way you can avoid that problem altogether.
word = str(word)
word = word.lower()
word = word.replace(',', '') # removes any commas from the string
newword = word[::-1] # reverse string
if word == newword:
return True
else:
return False
The best way to learn what is being done in this answer is to try individual parts of it in the Python console.
To fix your reverse() do this:
def reverse (word):
newword = ''
letterflag = -1
for numletter in word:
newword += word[letterflag]
letterflag-=1
return newword
Notice I also took out the .upper() parts since they are not effective and reverse is not the correct place to have it since you can not compare a reversed upper case word with the original word. Also s.upper() does not work like you think it does. It returns an upper case copy of s without modifying s. You would only to do return newword.upper() to make it work.
Additionally, the letterflag is not needed, you could simply do:
def reverse (word):
newword = ''
for letter in word:
newword = letter + newword #adds each new letter to beginning
return newword
However the simplest way to do a reverse functions is:
def reverse (word):
return word[::-1]
Your isPalendrome needs to be this to basically work:
def isPalindrome(word, ignorecase=False):
word = word.replace(',', '').replace(' ', '') #removes ,'s and spaces
if ignorecase:
return word.lower() == reverse(word).lower()
else:
return word == reverse(word)
Here is a more advanced solution that will ignore anything that is not a letter with an option to ignore case. This version does need reverse.
def isPalindrome(word, ignorecase=False):
abcs = 'abcdefghijklmnopqrstuvwxyz'
word = [c for c in word.lower()*ignorecase or word if c in abcs]
return word == word[::-1] #no need for reverse
If you want to have the option to be case sensitive, or the option to be case insensitive, add an IF statement in the isPalindrome() function:
if ignorecase == True:
word = word.lower()
It should look like this when you are done:
import string
def reverse (word):
newword = ''
letterflag = -1
for numletter in word:
newword += word[letterflag]
letterflag-=1
s=newword
s.upper()
return newword
def isPalindrome(word, ignorecase=False):
"""
>>> type(isPalindrome("bob"))
<type 'bool'>
>>> isPalindrome("abc")
False
>>> isPalindrome("bob")
True
>>> isPalindrome("a man a plan a canal, panama")
True
>>> isPalindrome("A man a plan a canal, Panama")
False
>>> isPalindrome("A man a plan a canal, Panama", ignorecase=True)
True
"""
if ignorecase == True:
word = word.lower()
word = word.replace(',', '')
word = word.replace(' ', '')
newword = reverse(word)
if word == newword:
return True
else:
return False
That code gives me the following feedback:
isPalindrome('Bob', ignorecase=True)
Out[34]: True
isPalindrome('Bob')
Out[35]: False