count characters occurences in string

count characters occurences in string - python

I want to find out how often does "reindeer" (in any order) come in a random string and what is the left over string after "reindeer" is removed. I need to preserve order of the left over string
So for example
"erindAeer" -> A (reindeer comes 1 time)
"ierndeBeCrerindAeer" -> ( 2 reindeers, left over is BCA)
I thought of sorting and removing "reindeer", but i need to preserve the order . What's a good way to do this?

We can replace those letters after knowing how many times they repeat, and Counter is convenient for counting elements.
from collections import Counter
def leftover(letter_set, string):
lcount, scount = Counter(letter_set), Counter(string)
repeat = min(scount[l] // lcount[l] for l in lcount)
for l in lcount:
string = string.replace(l, "", lcount[l] * repeat)
return f"{repeat} {letter_set}, left over is {string}"
print(leftover("reindeer", "ierndeBeCrerindAeer"))
print(leftover("reindeer", "ierndeBeCrerindAeere"))
print(leftover("reindeer", "ierndeBeCrerindAee"))
Output:
2 reindeer, left over is BCA
2 reindeer, left over is BCAe
1 reindeer, left over is BCerindAee

Here is a rather simple approach using collections.Counter:
from collections import Counter
def purge(pattern, string):
scount, pcount = Counter(string), Counter(pattern)
cnt = min(scount[x] // pcount[x] for x in pcount)
scount.subtract(pattern * cnt)
return cnt, "".join(scount.subtract(c) or c for c in string if scount[c])
>>> purge("reindeer", "ierndeBeCrerindAeer")
(2, 'BCA')

Here is the code in Python:
def find_reindeers(s):
rmap = {}
for x in "reindeer":
if x not in rmap:
rmap[x] = 0
rmap[x] += 1
hmap = {key: 0 for key in "reindeer"}
for x in s:
if x in "reindeer":
hmap[x] += 1
total_occ = min([hmap[x]//rmap[x] for x in "reindeer"])
left_over = ""
print(hmap, rmap)
for x in s:
if (x in "reindeer" and hmap[x] > total_occ * rmap[x]) or (x not in "reindeer"):
left_over += x
return total_occ, left_over
print(find_reindeers("ierndeBeCrerindAeer"))
Output for ierndeBeCrerindAeer:
(2, "BCA")

You can do it by using count and replace string function:
import queue
word = "reindeer"
given_string = "ierndeBeCrerindAeer"
new_string = ""
counter = 0
tmp = ""
letters = queue.Queue()
for i in given_string:
if not i in word:
new_string += i
else:
letters.put(i)
x = 0
while x < len(word):
while not letters.empty():
j = letters.get()
if j == word[x]:
tmp += j
# print(tmp)
break
else:
letters.put(j)
x = x +1
if tmp == word:
counter += 1
tmp = ""
x = 0
print(f"The word {word} occurs {counter} times in the string {given_string}.")
print("The left over word is",new_string)
Output will be:
The word reindeer occurs 2 times in the string ierndeBeCrerindAeer.
The left over word is BCA
It's easy to use queue here so that we don't repeat the elements that are already present or found.
Hope this answers your question, Thank you!

Related

To Find Vowel-Substring From a String

I have a string of lowercase English letters and an integer of the substring length. I have to find the substring of that length that contains the most vowels.
Example:
s = 'azerdii'
k = 5
The possible 5 character substrings are:
'azerd' No. of vowels = 2
'zerdi' No. of vowels = 2
'erdii' No. of vowels = 3
So the answer should be 'erdii'
Here is my code:
def findSubstring(s, k):
i = 0
lst = []
count = 0
tempL = []
while(i != len(s)):
a = i+k
temp = s[i:a]
lst.append(temp)
if a != len(s):
i+=1
else:
break
for word in lst:
for alphabet in word:
if alphabet in 'aeiou':
count += 1
tempL.append(count)
print(lst)
print(tempL)
return
s = input()
k = int(input().strip())
print(findSubstring(s, k))
I'm getting
['azerd', 'zerdi', 'erdii']
[2, 4, 7]
But the count should be
[2, 2, 3]
Please forgive any stupid errors I may have. I will certainly appreciate any help.

You need to reset the count=0 at line number 17.
Try this code
def findSubstring(s, k):
i = 0
lst = []
count = 0
tempL = []
while(i != len(s)):
a = i+k
temp = s[i:a]
lst.append(temp)
if a != len(s):
i+=1
else:
break
for word in lst:
count = 0
for alphabet in word:
if alphabet in 'aeiou':
count += 1
tempL.append(count)
print(lst)
print(tempL)
return
s = 'azerdii'
k = 5
print(findSubstring(s, k))

You could use a sliding window approach, for an optimal time complexity single-pass solution:
def find_substring_length_k_most_vowels(s: str, k: int) -> str:
'''Returns first substring of length k that has the max number of vowels.'''
vowels = set('aeiou')
max_vowel_count = curr_vowel_count = 0
max_window_start, max_window_end = 0, -1
window_start = 0
for window_end, ch in enumerate(s):
if ch in vowels:
curr_vowel_count += 1
if window_end - window_start + 1 == k:
if curr_vowel_count > max_vowel_count:
max_vowel_count = curr_vowel_count
max_window_start, max_window_end = window_start, window_end
curr_vowel_count -= 1 if s[window_start] in vowels else 0
window_start += 1
return s[max_window_start:max_window_end + 1]
def main() -> None:
s = 'azerdii'
k = 5
print(find_substring_length_k_most_vowels(s, k))
if __name__ == '__main__':
main()
Output:
erdii

Try the following:
def find_substr(s, k):
substrings = [s[i:i+k] for i in range(len(s) - k + 1)] # list of substrings
# vowels = [sum(c in 'aeiou' for c in s) for s in substrings]
# print(vowels) # [2, 2, 3]
return max(substrings, key=lambda s: sum(c in 'aeiou' for c in s))
print(find_substr('azerdii', 5)) # 'erdii'
If you un-comment the lines that are commented out, you will see the number of vowels is correctly computed as [2, 2, 3].
Here, sum(c in 'aeiou' for c in s) gets the number of vowels in a string s, which is equivalent to
count = 0
for alphabet in word:
if alphabet in 'aeiou':
count += 1
which in turn is the same as your code, except the line count = 0. After processing each word, you need to reset count. So try adding count = 0 in your code.

Brute Force Solution
Intuition :
Initialize a set and push all the elements of the sub string in that set(knowing that the element is a vowel). When its sizes == 5(as there are 5 vowels in English alphabet) then increase the counter by 1
Approach :
Make a function isVowel that return (1/true) when the passed character is a vowel otherwise (0/false).
Initialize 2 for loop
a. Outer loop running from 0 to word.size()(till the end of the string).
b. Inner loop running from i to word.size()(till the end of the string).
Now initialize a SET inside the outer for loop (as we don’t want that the values of previous iteration to persist in it).
In the inner for loop initialize a condition that if the J th Character value is not a vowel then break the loop.
And if it is a vowel then insert it in the initailzed SET
Now again initialize a condition that if the size of SET is == 5 then increase the counter by 1.
At last return the counter.
Code:
// Function that checks that the given character is a vowel or not
int isVowel(char c)
{
if ((c == 'a') || (c == 'e') || (c == 'i') || (c == 'o') || (c == 'u'))
{
return 1;
}
return 0;
}
int countVowelSubstrings(string word)
{
int counter = 0; // initalize the counter with 0
for (int i = 0; i < word.size(); i++)
{
set<char> temp;// set initalized outside the for loop so as to avoid
// the persitance of the pervious iteration values in
//the set
for (int j = i; j < word.size(); j++)
{
// if the jth charcter is not a vowel then break the loop
if (isVowel(word[j]) == 0)
{
break;
}
// else insert the char value in the SET
temp.insert(word[j]);
// if size == 5 (as there are 5 vowels in English alphabet) then
// increase the counter by 1
if (temp.size() == 5)
{
counter++;
}
}
}
return counter;
}
// Code By Ambuj Kumar

An almost one-liner functional approach:
from functools import reduce
def getVowelsLength(word, k):
chunks = [word[i:i+k] for i in range(len(word)-(k-1))]
return reduce(lambda x, y: x if x[1] > y[1] else y, list(zip(chunks, [sum(1 for l in w if l in 'aeiou') for w in chunks])))
which will give the correct answer: ('erdii', 3)

Creating a function that translates number to letter

I have written this function which is supposed to go through a user-provided string like 1-3-5, and output a corresponding series of letters, where A is assigned to 1, B is assigned to 2, C is assigned to 3, etc. So in the case of 1-3-5 the output would be ACE. For 2-3-4, it should print BCD. For ?-3-4 or --3-4 it should still print BCD. Here is the code I have written so far:
def number_to_letter(encoded):
result = ""
start = 0
for char in range(len(encoded)):
if encoded[char] == '-':
i = encoded.index("-")
sub_str = encoded[start:i]
if not sub_str.isdigit():
result += ""
else:
letter = chr(64 + int(sub_str))
if 0 < int(sub_str) < 27:
result += letter
else:
result += ""
start += len(sub_str) + 1
return result
print(num_to_let('4-3-25'))
My output is D, when it should be DCY. I am trying to do this without using any lists or using the split function, just by finding the - character in the sub-string and converting the numbers before it into a letter. What can I do?

You can try doing something like this:
def number_to_letter(encoded):
result = ""
buffer = ""
for ch in encoded:
if ch == '-':
if buffer and 0 < int(buffer) < 27:
result += chr(64 + int(buffer))
buffer = ""
elif ch.isdigit():
buffer += ch
else:
if buffer and 0 < int(buffer) < 27:
result += chr(64 + int(buffer))
return result
print(number_to_letter('1-3-5'))
output:
ACE
Explanation:
we loop for each character and add it to some buffer. when we encounter - (delimiter) we try to parse the buffer and reset it. And we do the same parsing at the end one more time and return the result.
The way the validation works is that, whenever we populate the buffer we check for number validity (using .isdigit()) and when we parse the buffer we check for the range constraints.

import string
alphabet = list(string.ascii_lowercase)
combination = "1-2-3"
def seperate(s, sep='-'):
return [s[:s.index(sep)]] + seperate(s[s.index(sep)+1:]) if sep in s else [s]
combination = seperate(combination)
print("".join([alphabet[int(i)-1] for i in combination]))

the approach of this code is to find the first '-' and then store where it is so next time we can look for the first '-' after the last one
when the comments in my code talk about a cycle means going through the loop (While looping:) once
def number_to_letter(encoded):
letterString = ""
startSubStr = 0
endSubStr = 0
looping = True
while looping:
if endSubStr > (len(encoded)-4):# if we're at the last number we don't look for '-'. we go to the end of the str and end the loop
endSubStr = len(encoded)
looping = False
else:
endSubStr = encoded.index('-', startSubStr) #find the first '-' after the '-' found in the last cycle
number = int(encoded[startSubStr:endSubStr]) #get the number between the '-' found in the last cycle through this loop and the '-' found in this one
if number < 27:
letter = chr(64 + int(number))
letterString += letter
startSubStr = endSubStr + 1 #set the start of the substring to the end so the index function doesn't find the '-' found in this cycle again
return letterString
print(number_to_letter("23-1-1-2")) #>>> WAAB
result:
WAAB

I see you don't want to use split, how about filter? ;)
import itertools
s = '1-2-3'
values = [''.join(e) for e in filter(
lambda l: l != ['-'],
[list(g) for k, g in itertools.groupby(
[*s], lambda s: s.isnumeric()
)
]
)
]
That will essentially do what .split('-') does on s. Also list(s) will behave the same as [*s] if you wanna use that instead.
Now you can just use ord and chr to construct the string you require-
start_pivot = ord('A') - 1
res = ''.join([chr(int(i) + start_pivot) for i in values])
Output
>>> s = '2-3-4'
>>> values = [''.join(e) for e in filter(
...: lambda l: l != ['-'],
...: [list(g) for k, g in itertools.groupby(
...: [*s], lambda s: s.isnumeric()
...: )
...: ]
...: )
...: ]
>>> start_pivot = ord('A') - 1
>>> res = ''.join([chr(int(i) + start_pivot) for i in values])
>>> res
'BCD'

No lists, no dicts. What about RegExp?
import re
def get_letter(n):
if int(n) in range(1,27): return chr(int(n)+64)
def number_to_letter(s):
return re.sub(r'\d+', lambda x: get_letter(x.group()), s).replace('-','')
print(number_to_letter('1-2-26')) # Output: ABZ

No lists, okay. But what about dicts?
def abc(nums):
d = {'-':'','1':'A','2':'B','3':'C','4':'D','5':'E','6':'F','7':'G','8':'H','9':'I','0':'J'}
res = ''
for n in nums: res += d[n]
return res
print(abc('1-2-3-9-0')) # Output: ABCIJ
Here is a corrected version:
def abc(nums):
d = {'-':'','1':'A','2':'B','3':'C','4':'D','5':'E','6':'F','7':'G','8':'H','9':'I','0':'J'}
res = ''
for n in nums:
if n in d:
res += d[n]
return res
print(abc('?-2-3-9-0')) # Output: BCIJ

count substrings of a string with limitation

I have a string and a dictionary. I need to count number of substrings of a given string that has letters(and number of letters) not more than in the dict. I counted only 15 substrings(2a +4b +1d + 2ba + 2ab +bd +db +abc +dba) but I cannot write the program. Need to upgrade it(I hope it requires only ELSE condition)
string = 'babdbabcce'
dict= {'a':1,'b':1,'d':1}
counter= 0
answer = 0
for i in range(len(string)):
for j in dict:
if string[i] == j:
if dict[j] > 0:
dict[j] = dict[j] - 1
counter+= 1
answer+= counter
# else:
print(answer)

It seems like you're looking for permutations of strings (including substrings within them) within another string,
so build the strings using the dictionary, then load the permutations, then
count the permutations in the other string. Note that this probably not the most efficient solution, but it's effective.
Example code:
import itertools
import re
string_to_look_into = 'babdbabcce'
dict= {'a':1,'b':1,'d':1}
permutation_string = ''
for c, n in dict.items():
permutation_string += c * n
permutations = itertools.permutations(permutation_string)
matches_to_count = set()
for perm in permutations:
for i in range(1, len(perm)+1):
matches_to_count.add(''.join(perm[:i]))
sum_dict = {} # to verify matches
sum = 0
for item in matches_to_count:
count = len(re.findall(item, string_to_look_into))
sum_dict[item] = count
sum += count
print(sum)

How to count the number of dashes between any two alphabetical characters?

If we have a string of alphabetical characters and some dashes, and we want to count the number of dashes between any two alphabetic characters in this string. what is the easiest way to do this?
Example:
Input: a--bc---d-k
output: 2031
This means that there are 2 dashes between a and b, 0 dash between b and c, 3 dashes between c and d and 1 dash between d and k
what is a good way to find this output list in python?

You can use a very simple solution like this:
import re
s = 'a--bc---d-k'
# Create a list of dash strings.
dashes = re.split('[a-z]', s)[1:-1]
# Measure the length of each dash string in the list and join as a string.
results = ''.join([str(len(i)) for i in dashes])
Output:
'2031'

Solution with regex:
import re
x = 'a--bc---d-k'
results = [
len(m) for m in
re.findall('(?<=[a-z])-*(?=[a-z])', x)
]
print(results)
print(''.join(str(r) for r in results))
output:
[2, 0, 3, 1]
2031
Solution with brute force loop logic:
x = 'a--bc---d-k'
count = 0
results = []
for c in x:
if c == '-':
count += 1
else:
results.append(count)
count = 0
results = results[1:] # cut off first length
print(results)
output:
[2, 0, 3, 1]

If you input may also begin with a dash, you could use this:
def count_dashes(string):
all_counts = []
dash_count = 0
for char in string:
if char == "-":
dash_count += 1
else:
all_counts.append(dash_count)
dash_count = 0
return all_counts
But if your input always starts with a letter, you may not like the 0 that's always at the head of the list.
If you need the output as a string of ints, then you could add this:
def count_dashes(string):
all_counts = []
dash_count = 0
for char in string:
if char == "-":
dash_count += 1
else:
all_counts.append(dash_count)
dash_count = 0
return "".join([str(number) for number in all_counts])

Here's a simple loop approach:
myinput = 'a--bc---d-k'
output = []
output_count = -1
for elem in myinput:
if elem == '-':
output[output_count] = output[output_count]+1
else:
output.append(0)
output_count += 1
print(output)

Longest sequence of consecutive letters

Suppose I have a string of lower case letters, e.g.
'ablccmdnneofffpg'
And my aim is to find the longest sequence of the consecutive numbers inside this string which in this case is:
'abcdefg'
The intuitive attempt is to find loop around each letter and obtain the longest sequence starting from that letter. One possible solution is
longest_length = 0
start = None
current_start = 0
while current_start < len(word) - longest_length:
current_length = 1
last_in_sequence = ord(word[current_start])
for i in range(current_start + 1, len(word)):
if ord(word[i]) - last_in_sequence == 1:
current_length += 1
last_in_sequence = ord(word[i])
if current_length > longest_length:
longest_length = current_length
start = current_start
while (current_start < len(word) - 1 and
ord(word[current_start + 1]) - ord(word[current_start]) == 1):
current_start += 1
current_start += 1
Are there any other ways of solving the problem with fewer lines, or even using some pythonic methods?

You could keep track of all subsequences of consecutive characters as seen in the string using a dictionary, and then take the one with the largest length.
Each subsequence is keyed by the next candidate in the alphabet so that once the anticipated candidate is reached in the string, it is used to update the value of the corresponding subsequence in the dictionary and added as a new dictionary value keyed by the next alphabet:
def longest_sequence(s):
d = {}
for x in s:
if x in d:
d[chr(ord(x)+1)] = d[x] + x
else:
d[chr(ord(x)+1)] = x
return max(d.values(), key=len)
print(longest_sequence('ablccmdnneofffpg'))
# abcdefg
print(longest_sequence('ba'))
# b
print(longest_sequence('sblccmtdnneofffpgtuyvgmmwwwtxjyuuz'))
# stuvwxyz

A solution that trades memory for (some) time:
It keeps track of all sequences seen and then at the end prints the longest found (although there could be more than one).
from contextlib import suppress
class Sequence:
def __init__(self, letters=''):
self.letters = letters
self.last = self._next_letter(letters[-1:])
def append(self, letter):
self.letters += letter
self.last = self._next_letter(letter)
def _next_letter(self, letter):
with suppress(TypeError):
return chr(ord(letter) + 1)
return 'a'
def __repr__(self):
return 'Sequence({}, {})'.format(repr(self.letters),
repr(self.last))
word = 'ablccmdnneofffpg'
sequences = []
for letter in word:
for s in sequences:
if s.last == letter:
s.append(letter)
break
else:
sequences.append(Sequence(letters=letter))
sequences = list(sorted(sequences, key=lambda s: len(s.letters), reverse=True))
print(sequences[0].letters)

You are basically asking for the longest increasing subsequence, which is a well-studied problem. Have a look at the pseudo code in Wikipedia.

Similar to MosesKoledoye's solution, but only stores the lengthes for the ordinals of the chars and only builts the solution string in the end. This should therefore be a little more space-efficient:
def longest_seq(s):
d = {}
for c in s:
c, prev_c = ord(c), ord(c) - 1
d[c] = max(d.get(c, 0), d.pop(prev_c, 0) + 1)
c, l = max(d.items(), key=lambda i: i[1])
return ''.join(map(chr, range(c-l+1, c+1)))

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

count characters occurences in string - python

Related

To Find Vowel-Substring From a String

Creating a function that translates number to letter

count substrings of a string with limitation

How to count the number of dashes between any two alphabetical characters?

Longest sequence of consecutive letters

Categories

Resources