how to find max. relative frequency in string, python? - python

How to find max. relative frequency in string? The function which takes a string argument and returns the highest relative frequency of any letter in the string. For example, the string 'sufficit' has 8 letters, and the most frequent letters are f and i, which both occur twice. Thus, the highest relative frequency is 2/8. The string 'Non-even' has 7 letters (- is not a letter), and the most frequent letter is n which occurs three times (one N and two n). Thus, the highest relative frequency is 3/7.
def max_rel_freq(string):
dic = {}
total = float(len(string))
for ch in string:
if ch in dic:
dic[ch] = dic[ch] + 1
else:
dic[ch] = 1
frequencies = []
for s in string:
frequencies.append(float(dic[s]/total))
return max(frequencies)
This works for only 'sufficit' but does not give required output for 'Non-even'.

this should work
def max_rel_freq(string):
max_freq = 1
string = string.lower()
string = list(map(str,string.strip('')))
for i in string:
if string.count(i)> max_freq:
max_freq = string.count(i)
return max_freq

Related

Why does my while loop calculate incorrect value of the string?

I am trying to find greatest length of a word from the string return it by using values of each letter from alphabets by assigning each letter it's value as per it's rank . So for example For a string s = 'abcd a', I intend to return 10 [a=1 + b=2 + c =3 + d=4] .But, I am getting output as 7 When I debugged the code, I noticed that in while loop my code skips i=2 and directly jumps on i=3. Where am I going wrong? Below is my code.
class Solution(object):
def highest_scoring_word(self,s):
# Dictionary of English letters
dt = {'a':1,'b':2,'c':3,'d':4,'e':5,'f':6,
'g':7,'h':8,'i':9,'j':10,'k':11,'l':12,
'm':13,'n':14,'o':15,'p':16,'q':17,
'r':18,'s':19,'t':20,'u':21,'v':22,
'w':23,'x':24,'y':25,'z':26}
value_sum =0
max_value =value_sum
for i in range(0,len(s)):
if s.upper():
s= s.lower()
words = s.split()
# convert the string in char array
to_char_array = list(words[i])
j=0
while j<len(to_char_array):
if to_char_array[j] in dt.keys() :
value_sum = max(dt.get(to_char_array[j]),value_sum + dt.get(to_char_array[j]))
max_value = max(value_sum,max_value)
else:
pass
j +=j+1
return max_value
if __name__ == '__main__':
p = 'abcd a'
print(Solution().highest_scoring_word(p))
`
I have created a dictionary where I have stored all letters in english alphabet and their values and later I have split the string into words using split() and then after converting each individual word into character array I have traversed it to find their occurrence in the dictionary and add to the final value. I am expecting to get a correct value of a string and finally the greatest value.
As you are using a class and methods, make use of them:
from string import ascii_lowercase as dt
class Solution(object):
def __init__(self, data):
self.scores = {}
self.words = data.lower().strip().split()
def get_scoring(self):
# for each word caculate the scoring
for word in self.words:
score = 0
# for each character in the word, find its index in 'a..z' and add it to score
# same as in your dt implementation (just using index not absolute values)
for c in word:
score += dt.find(c) + 1
self.scores[word] = score
print(self.scores)
# filer the dictionary by its greates value in order to get the word with max score:
return max(self.scores.keys(), key=lambda k: self.scores[k])
if __name__ == '__main__':
p = 'abcd fg11'
maxWord = Solution(p).get_scoring()
print(maxWord)
Out:
{'abcd': 10, 'fg11': 13}
fg11
Try using this:
class Solution(object):
def highest_scoring_word(self,s):
# Dictionary of English letters
dt = {'a':1,'b':2,'c':3,'d':4,'e':5,'f':6,
'g':7,'h':8,'i':9,'j':10,'k':11,'l':12,
'm':13,'n':14,'o':15,'p':16,'q':17,
'r':18,'s':19,'t':20,'u':21,'v':22,
'w':23,'x':24,'y':25,'z':26}
value_sum1 =0
max_value1 =value_sum1
value_sum2 =0
max_value2 =value_sum2
for i in range(0,len(s)):
if s.upper():
s= s.lower()
words = s.split()
if len(words)>1:
# convert the string in char array
to_char_array = list(words[0])
j=0
while j<len(to_char_array):
if to_char_array[j] in dt.keys() :
value_sum1 = max(dt.get(to_char_array[j]),value_sum1 + dt.get(to_char_array[j]))
max_value1 = max(value_sum1,max_value1)
else:
pass
j=j+1
to_char_array = list(words[1])
j=0
while j<len(to_char_array):
if to_char_array[j] in dt.keys() :
value_sum2 = max(dt.get(to_char_array[j]),value_sum2 + dt.get(to_char_array[j]))
max_value2 = max(value_sum2,max_value2)
else:
pass
j=j+1
if max_value2>max_value1:
return max_value2
elif max_value1>max_value2:
return max_value1
else:
return 'Both words have equal score'
else:
# convert the string in char array
to_char_array = list(words[i])
j=0
while j<len(to_char_array):
if to_char_array[j] in dt.keys() :
value_sum1 = max(dt.get(to_char_array[j]),value_sum1 + dt.get(to_char_array[j]))
max_value1 = max(value_sum1,max_value1)
else:
pass
j=j+1
return max_value1
if __name__ == '__main__':
p = 'abcd fg'
print(Solution().highest_scoring_word(p))
It is maybe of interest that the code can be greatly simplified by using features available in Python:
the_sum = sum(ord(c)-96 for c in s.lower() if c.isalpha())
to break this down. for c in s.lower() gets the lower-case characters one by one; the function ord() gives the numerical value with a of 97 so we subtract to get 1. Then we check if the character is a letter and if so accept it. Then sum() adds up all the numbers. You could break up this one line an check how the separate parts work.

How to encode (replace) parts of word from end to beginning for some N value (like abcabcab to cbacbaba for n=3)?

I would like to create a program for encoding and decoding words.
Specifically, the program should take part of the word (count characters depending on the value of n) and turns them backwards.
This cycle will be running until it encodes the whole word.
At first I created the number of groups of parts of the word which is the number of elements n + some possible remainder
*(For example for Language with n = 3 has 3 parts - two parts of 3 chars and one remainder with 2 chars).This unit is called a general.
Then, depending on the general, I do a cycle that n * takes the given character and always adds it to the group (group has n chars).
At the end of the group cycle, I add (in reverse order) to new_word and reset the group value.
The goal should be to example decode word Language with (n value = 2) to aLgnaueg.
Or Language with (n value = 3) to naL aug eg and so on.
Next example is word abcabcab (n=3) to cba cba ba ?
Output of my code don´t do it right. Output for n=3 is "naLaugeg"
Could I ask how to improve it? Is there some more simple python function how to rewrite it?
My code is there:
n = 3
word = "Language"
new_word = ""
group = ""
divisions = (len(word)//n)
residue = (len(word)%n)
general = divisions + residue
for i in range(general):
j=2
for l in range(n):
group += word[i+j]
print(word[i+j], l)
j=j-1
for j in range((len(group)-1),-1,-1):
new_word += group[j]
print(word[j])
group = ""
print(group)
print(new_word)
import textwrap
n = 3
word = "Language"
chunks = textwrap.wrap(word, n)
reversed_chunks = [chunk[::-1] for chunk in chunks]
>>> print(' '.join(reversed_chunks))
naL aug eg

Count of sub-strings that contain character X at least once. E.g Input: str = “abcd”, X = ‘b’ Output: 6

This question was asked in an exam but my code (given below) passed just 2 cases out of 7 cases.
Input Format : single line input seperated by comma
Input: str = “abcd,b”
Output: 6
“ab”, “abc”, “abcd”, “b”, “bc” and “bcd” are the required sub-strings.
def slicing(s, k, n):
loop_value = n - k + 1
res = []
for i in range(loop_value):
res.append(s[i: i + k])
return res
x, y = input().split(',')
n = len(x)
res1 = []
for i in range(1, n + 1):
res1 += slicing(x, i, n)
count = 0
for ele in res1:
if y in ele:
count += 1
print(count)
When the target string (ts) is found in the string S, you can compute the number of substrings containing that instance by multiplying the number of characters before the target by the number of characters after the target (plus one on each side).
This will cover all substrings that contain this instance of the target string leaving only the "after" part to analyse further, which you can do recursively.
def countsubs(S,ts):
if ts not in S: return 0 # shorter or no match
before,after = S.split(ts,1) # split on target
result = (len(before)+1)*(len(after)+1) # count for this instance
return result + countsubs(ts[1:]+after,ts) # recurse with right side
print(countsubs("abcd","b")) # 6
This will work for single character and multi-character targets and will run much faster than checking all combinations of substrings one by one.
Here is a simple solution without recursion:
def my_function(s):
l, target = s.split(',')
result = []
for i in range(len(l)):
for j in range(i+1, len(l)+1):
ss = l[i] + l[i+1:j]
if target in ss:
result.append(ss)
return f'count = {len(result)}, substrings = {result}'
print(my_function("abcd,b"))
#count = 6, substrings = ['ab', 'abc', 'abcd', 'b', 'bc', 'bcd']
Here you go, this should help
from itertools import combinations
output = []
initial = input('Enter string and needed letter seperated by commas: ') #Asking for input
list1 = initial.split(',') #splitting the input into two parts i.e the actual text and the letter we want common in output
text = list1[0]
final = [''.join(l) for i in range(len(text)) for l in combinations(text, i+1)] #this is the core part of our code, from this statement we get all the available combinations of the set of letters (all the way from 1 letter combinations to nth letter)
for i in final:
if 'b' in i:
output.append(i) #only outputting the results which have the required letter/phrase in it

Common words frequency and their frequency sum?

I have two dictionary. Each of dictionary include words. some words are common some are not. I want to show to output common word frequency1 frequency2 and frequency sum. How can I do that ? and I have to find the top 20.
For example my output must be like:
Common WORD frequ1. freq2 freqsum
1 print 10. 5. 15
2 number. 2. 1. 3.
3 program 19. 20. 39
Here is my code:
commonwordsbook1andbook2 = []
for element in finallist1:
if element in finallist2:
commonwordsbook1andbook2.append(element)
common1 = {}
for word in commonwordsbook1andbook2:
if word not in common1:
common1[word] = 1
else:
common1[word] += 1
common1 = sorted(common1.items(), key=lambda x: x[1], reverse=True) #distinct2
for k, v in wordcount2[:a]:
print(k, v)
Assuming that the dictionaries have individual frequencies of each word, we can do something simpler. Like...
print("Common Word | Freq-1 | Freq-2 | Freq-Sum")
for i in freq1:
if i in freq2:
print(i,freq1[i],freq2[i],freq1[i]+freq2[i])
Since you aren't allowed to use Counter, you can implement the same functionality using dictionaries. Let's define a function to return a dictionary that contains the counts of all words in the given list. Dictionaries have a get() function that gets the value of the given key, while also allowing you to specify a default if the key is not found.
def countwords(lst):
dct = {}
for word in lst:
dct[word] = dct.get(word, 0) + 1
return dct
count1 = countwords(finallist1)
count2 = countwords(finallist2)
words1 = set(count1.keys())
words2 = set(count2.keys())
count1.keys() will give us all the unique words in finallist1.
Then we convert both of these to sets and then find their intersection to get the common words.
common_words = words1.intersection(words2)
Now that you know the common words, printing them and their counts should be trivial:
for w in common_words:
print(f"{w}\t{count1[w]}\t{count2[w]}\t{count1[w] + count2[w]}")

How to find the most amount of shared characters in two strings? (Python)

yamxxopd
yndfyamxx
Output: 5
I am not quite sure how to find the number of the most amount of shared characters between two strings. For example (the strings above) the most amount of characters shared together is "yamxx" which is 5 characters long.
xx would not be a solution because that is not the most amount of shared characters. In this case the most is yamxx which is 5 characters long so the output would be 5.
I am quite new to python and stack overflow so any help would be much appreciated!
Note: They should be the same order in both strings
Here is simple, efficient solution using dynamic programming.
def longest_subtring(X, Y):
m,n = len(X), len(Y)
LCSuff = [[0 for k in range(n+1)] for l in range(m+1)]
result = 0
for i in range(m + 1):
for j in range(n + 1):
if (i == 0 or j == 0):
LCSuff[i][j] = 0
elif (X[i-1] == Y[j-1]):
LCSuff[i][j] = LCSuff[i-1][j-1] + 1
result = max(result, LCSuff[i][j])
else:
LCSuff[i][j] = 0
print (result )
longest_subtring("abcd", "arcd") # prints 2
longest_subtring("yammxdj", "nhjdyammx") # prints 5
This solution starts with sub-strings of longest possible lengths. If, for a certain length, there are no matching sub-strings of that length, it moves on to the next lower length. This way, it can stop at the first successful match.
s_1 = "yamxxopd"
s_2 = "yndfyamxx"
l_1, l_2 = len(s_1), len(s_2)
found = False
sub_length = l_1 # Let's start with the longest possible sub-string
while (not found) and sub_length: # Loop, over decreasing lengths of sub-string
for start in range(l_1 - sub_length + 1): # Loop, over all start-positions of sub-string
sub_str = s_1[start:(start+sub_length)] # Get the sub-string at that start-position
if sub_str in s_2: # If found a match for the sub-string, in s_2
found = True # Stop trying with smaller lengths of sub-string
break # Stop trying with this length of sub-string
else: # If no matches found for this length of sub-string
sub_length -= 1 # Let's try a smaller length for the sub-strings
print (f"Answer is {sub_length}" if found else "No common sub-string")
Output:
Answer is 5
s1 = "yamxxopd"
s2 = "yndfyamxx"
# initializing counter
counter = 0
# creating and initializing a string without repetition
s = ""
for x in s1:
if x not in s:
s = s + x
for x in s:
if x in s2:
counter = counter + 1
# display the number of the most amount of shared characters in two strings s1 and s2
print(counter) # display 5

Categories