Determining how many times a substring occurs in a string in Python - python

I am trying to figure out how many times a string occurs in a string. For example:
nStr = '000123000123'
Say the string I want to find is 123. Obviously it occurs twice in nStr but I am having trouble implementing this logic into Python. What I have got at the moment:
pattern = '123'
count = a = 0
while pattern in nStr[a:]:
a = nStr[a:].find(pattern)+1
count += 1
return count
The answer it should return is 2. I'm stuck in an infinite loop at the moment.
I was just made aware that count is a much better way to do it but out of curiosity, does anyone see a way to do it similar to what I have already got?

Use str.count:
>>> nStr = '000123000123'
>>> nStr.count('123')
2
A working version of your code:
nStr = '000123000123'
pattern = '123'
count = 0
flag = True
start = 0
while flag:
a = nStr.find(pattern, start) # find() returns -1 if the word is not found,
#start i the starting index from the search starts(default value is 0)
if a == -1: #if pattern not found set flag to False
flag = False
else: # if word is found increase count and set starting index to a+1
count += 1
start = a + 1
print(count)

The problem with count() and other methods shown here is in the case of overlapping substrings.
For example: "aaaaaa".count("aaa") returns 2
If you want it to return 4 [(aaa)aaa, a(aaa)aa, aa(aaa)a, aaa(aaa)] you might try something like this:
def count_substrings(string, substring):
string_size = len(string)
substring_size = len(substring)
count = 0
for i in xrange(0,string_size-substring_size+1):
if string[i:i+substring_size] == substring:
count+=1
return count
count_substrings("aaaaaa", "aaa")
# 4
Not sure if there's a more efficient way of doing it, but I hope this clarifies how count() works.

import re
pattern = '123'
n =re.findall(pattern, string)
We can say that the substring 'pattern' appears len(n) times in 'string'.

In case you are searching how to solve this problem for overlapping cases.
s = 'azcbobobegghaklbob'
str = 'bob'
results = 0
sub_len = len(str)
for i in range(len(s)):
if s[i:i+sub_len] == str:
results += 1
print (results)
Will result in 3 because: [azc(bob)obegghaklbob] [azcbo(bob)egghaklbob] [azcbobobegghakl(bob)]

I'm pretty new, but I think this is a good solution? maybe?
def count_substring(str, sub_str):
count = 0
for i, c in enumerate(str):
if sub_str == str[i:i+2]:
count += 1
return count

string.count(substring) is not useful in case of overlapping.
My approach:
def count_substring(string, sub_string):
length = len(string)
counter = 0
for i in range(length):
for j in range(length):
if string[i:j+1] == sub_string:
counter +=1
return counter

You are not changing a with each loop. You should put:
a += nStr[a:].find(pattern)+1
...instead of:
a = nStr[a:].find(pattern)+1

def count_substring(string, substring):
c=0
l=len(sub_string)
for i in range(len(string)):
if string [i:i+l]==sub_string:
c=c+1
return c
string=input().strip()
sub_string=input().strip()
count= count_substring(string,sub_string)
print(count)

As mentioned by #João Pesce and #gaurav, count() is not useful in the case of overlapping substrings, try this out...
def count_substring(string, sub_string):
c=0
for i in range(len(string)):
if(string[i:i+len(sub_string)]==sub_string):
c = c+1
return c

def countOccurance(str,pat):
count=0
wordList=str.split()
for word in wordList:
if pat in word:
count+=1
return count

Usually i'm using enumerate for this kind of problems:
def count_substring(string, sub_string):
count = 0
for i, j in enumerate(string):
if sub_string in string[i:i+3]:
count = count + 1
return count

def count(sub_string,string):
count = 0
ind = string.find(sub_string)
while True:
if ind > -1:
count += 1
ind = string.find(sub_string,ind + 1)
else:
break
return count

def count_substring(string, sub_string):
count = 0
len_sub = len(sub_string)
for i in range(0,len(string)):
if(string[i:i+len_sub] == sub_string):
count+=1
return count

Related

count letter in string without using count()

I am trying to do the function count without using count() it is working and everything okay but when I try to search two letters in one word its returning 0. When I try to search 1 word in a letter its working normal.
def count(str, sub):
found = 0
for key in str:
if key == sub:
found += 1
return found
str = input("Enter a string: ") #or we can initialize a string
sub = input("Enter a substring: ") #or we can initialize a substring
count(str, sub)
print ("letter: ", sub)
print ("count: ", count(str, sub))
Following your method I suggest you do something like this:
def count(string, sub):
found = 0
size = len(sub)
for i in range(len(string) + 1 - size):
if string[i:i+size] == sub:
found += 1
return found
That way you can use it for any size of sub.
def count(string, sub):
found = 0
for c in range(len(string)):
start = c
end = -(len(string)-len(sub)-c) if -(len(string)-len(sub)-c) != 0 else None
if string[start:end] == sub:
found += 1
return found
string = input("Enter a string: ") #or we can initialize a string
sub = input("Enter a substring: ") #or we can initialize a substring
print ("letter: ", sub)
print ("count: ", count(string, sub))
Below should work for all length sub strings.
def count(str, sub):
found = 0
for i in range(1,len(str)+1): #iterate for all length substrings
for j in range(len(str)-i+1): #iterate for different starting positions of substrings.
key = str[j:j+i]
if key == sub:
found += 1
return found

counting number of occurrence in string

I'm trying to count the number of times "bob" has occurred in a given string. this is what I tried:
s = input("give me a string:")
count = 0
for i in s:
if i=="b":
for j in s:
x=0
if j!="b":
x+=1
else:
break
if s[x+1]=="o" and s[x+2]=="b":
count+=1
print(count)
if I give the string "bob", it gives back 2, and if I give something like "jbhxbobalih", it gives back 0. I don't know why this happens. any idea?
The easiest manual count would probably use indeces and slices. The main difference between this and the much simpler s.count("bob") is that it also counts overlapping occurrences:
# s = "aboboba" -> 2
count = 0
for i, c in enumerate(s):
if s[i:i+3] == "bob":
count += 1
You can try checking 3 consecutive characters, if they are 'bob', just add our counter up, and do nothing otherwise.
Your code should be like this:
s = input("give me a string:")
count = 0
for i in range(0, len(s) - 3):
if s[i] == 'b' and s[i + 1] == 'o' and s[i + 2] == 'b':
count += 1
print(count)
100 % working this will work for all string.
import re
def check(string, sub_str):
count = 0
val = re.findall(sub_str, string)
for i in val:
count+=1
print(count)
# driver code
string = "baadbobaaaabobsasddswqbobdwqdwqsbob"
sub_str = "bob"
check(string, sub_str)
This gives the correct output.

to print the number of times that the substring occurs in the given string from left to right in python

In this challenge, the user enters a string and a substring. User has to print the number of times that the substring occurs in the given string. String traversal will take place from left to right, not from right to left.
Example:
Input:
WoW!ItSCoOWoWW
oW
Output:
2
Code:
def count_substring(string, sub_string):
j=0
flag=0
counter=0
for i in range(len(string)):
if string[i] == sub_string[j]:
if(j==0):
flag=i
j+=1
else:
j=0
if(j==len(sub_string)-1):
counter+=1
i=flag+1
j=0
return counter
if __name__ == '__main__':
string = input().strip()
sub_string = input().strip()
count = count_substring(string, sub_string)
print(count)
Error:
Traceback (most recent call last): File
"C:/Users/shivangi/Documents/python codes/hackerrank solutions.py",
line 24, in
count = count_substring(string, sub_string) File "C:/Users/shivangi/Documents/python codes/hackerrank solutions.py",
line 7, in count_substring
if string[i] == sub_string[j]: IndexError: string index out of range
I do not understand why it is showing "string index out of range" and how can it be solved
You can simply do next:
input_str = 'WoW!ItSCoOWoWW'
pattern = 'oW'
print(input_str.count(pattern)) # will output 2
And you don't need so complicated solution
Also same with using regexp:
import re
input_str = 'WoW!ItSCoOWoWW'
pattern = 'oW'
print(len(re.findall(pattern, input_str))) # also output 2
and simple for loop implementation
idx = 0
pattern = 'oW'
input_str = 'WoW!ItSCoOWoWW'
pattern_len = len(pattern)
counter = 0
for ch in input_str:
if ch == pattern[idx]:
idx += 1
else:
idx = 0
if pattern_len == idx:
counter += 1
idx = 0
print(counter) # 2
You are getting an index out of bounds because of j. So let's say the string is WowWowWow and the sub_string is ow. So when you are in the 4. loop, your i = 3 so you already checked Wow and you found one instance of the sub_string. But in that case, your j=2 because it got increased by one when you found the first o and then again when you found the first lower case w. But Your sub_string has no value at index [2]. So your sub_string[j] is then out of bounds. I hope everything is clear.
A better solution would be to use
counter = string.count(sub_string)
print(counter)
or
print(string.count(sub_string))
#This will definitely work#
#Use find function to get lower index where sub_string found#
def count_substring(string, sub_string):
counter=0
for i in range(len(string)-len(sub_string)):
pos=string.find(sub_string)
if pos!=-1:
pos+=1
string=string[pos+1:]
counter+=1
return count
if __name__ == '__main__':
string = input().strip()
sub_string = input().strip()
count = count_substring(string, sub_string)
print(count)
You can also split the string with the substring. It will return a list with n + 1 elements. For example, if substring occurs 2 times, then the length of list will be 3. So we need to reduce 1 from that list.
s = 'WoW!ItSCoOWoWW'
count = len(s.split('oW')) - 1 # output 2
checked for this example, works fine. Not sure if it works for other example as well.
While searching for substrings please make sure that it counts overlaps.
Eg- string: rabdadada substring: dada Count should come two not one
def countSubstr(string, sub_string):
count = start = 0
while True:
start = string.find(sub_string, start) + 1
if start > 0:
count+=1
else:
return count
if __name__ == '__main__':
string = input().strip()
sub_string = input().strip()
count = countSubstr(string, sub_string)
print(count)
def count_substring(string, sub_string):
c = 0
for i in range(len(string)):
if string[i:].startswith(sub_string):
c = c + 1
return c
if __name__ == '__main__':
string = input().strip()
sub_string = input().strip()
count = count_substring(string, sub_string)
print(count)
Code 1
def count_substring(string, sub_string):
count = 0
for i in range(len(string)):
if string[i:].startswith(sub_string):
count += 1
return count
if __name__ == '__main__':
string = input().strip()
sub_string = input().strip()
count = count_substring(string, sub_string)
print(count)
Code 2
string, substring = (input().strip(), input().strip())
print(sum([ 1 for i in range(len(string)-len(substring)+1) if
string[i:i+len(substring)] == substring]))
Using List Comprehension
In a list comprehension, we slide through bigger string by one position at a time with the sliding window of length of smaller string. We can compute the sliding count by substracting the length of smaller string from bigger string. For each slide, we compare that part of bigger string with our smaller string and generate 1 in a list if match found. Sum of all of these 1's in a list will give us total number of matches found.
def count_substring(string, sub_string):
c = 0
for i in range(0, len(string)):
if string[i:i+len(sub_string)] == sub_string:
c += 1
return c
print(count_substring("In the convential world, it won't ever happen", 'lD,'))
What's with all the unnecessary complications? This code will work, and count any overlaps, as others have mentioned. There's a simpler answer as well, but some people prefer using find(). It increments the counter if the result of find() is greater than -1 because that's what find() returns if there is no match. If there is a match it tells you the position in the string, so as long is the result is anywhere between 0 and the end of the string it will increment the counter. You could also cut down the amount of loops if you subtract the length of the substring from the for loop. Shouldn't matter in most cases, but if your substring is really really long there's no reason to keep checking for it if you don't even have that many characters left in your main string.
def count_substring(string, sub_string):
counter = 0
for x in range(0, len(string)):
if string.find(sub_string, x, x+len(sub_string)) > -1:
counter += 1
return counter
if __name__ == '__main__':
string = input().strip()
sub_string = input().strip()
count = count_substring(string, sub_string)
print(count)
You can solve it by more than one way
def count_substring(string, sub_string):
count = 0
for i in range(len(string)-len(sub_string)+1):
if (string[i:i+len(sub_string)] == sub_string):
count += 1
return count
import re
def count_substring(string, sub_string):
match = re.findall('(?='+sub_string+')',string)
return len(match)
def count_substring(string, sub_string):
sum_1 = 0
for i in range(len(string)):
if sub_string[0] == string[i]:
sum_1 += string.count(sub_string,i,len(sub_string)+i)
return sum_1
In the last solution I make this conditional statement inside the loop to avoid repeating the sequence more than once
def count_substring(string, sub_string):
stringLength = len(string)
subStringLength = len(sub_string)
times = stringLength -subStringLength
i=0
result =0
while (i<=times):
if string[i:subStringLength] == sub_string:
result+=1
subStringLength+=1
i+=1
return result
def count_substring(string, sub_string):
ans=0
lenght=len(sub_string)
for i in range(0, len(string)):
if sub_string in string[i:lenght+i]:
ans+=1
else:
pass
return ans
if __name__ == '__main__':
string = input().strip()
sub_string = input().strip()
count = count_substring(string, sub_string)
print(count)
def count_substring(string, sub_string):
j=0
counter=0
for i in range(len(string)):
if string[i] == sub_string[j]:
j+=1
else:
j = 0
if j == len(sub_string):
j = 0
counter += 1
return counter
if __name__ == '__main__':
string = input().strip()
sub_string = input().strip()
count = count_substring(string, sub_string)
print(count)

How to count specific substrings using slice notation

I want to count the number of occurrences of the substring "bob" within the string s. I do this exercise for an edX Course.
s = 'azcbobobegghakl'
counter = 0
numofiterations = len(s)
position = 0
#loop that goes through the string char by char
for iteration in range(numofiterations):
if s[position] == "b": # search pos. for starting point
if s[position+1:position+2] == "ob": # check if complete
counter += 1
position +=1
print("Number of times bob occurs is: " + str(counter))
However it seems that the s[position+1:position+2] statement is not working properly. How do i adress the two chars behind a "b"?
The second slice index isn't included. It means that s[position+1:position+2] is a single character at position position + 1, and this substring cannot be equal to ob. See a related answer. You need [:position + 3]:
s = 'azcbobobegghakl'
counter = 0
numofiterations = len(s)
position = 0
#loop that goes through the string char by char
for iteration in range(numofiterations - 2):
if s[position] == "b": # search pos. for starting point
if s[position+1:position+3] == "ob": # check if complete
counter += 1
position +=1
print("Number of times bob occurs is: " + str(counter))
# 2
You could use .find with an index:
s = 'azcbobobegghakl'
needle = 'bob'
idx = -1; cnt = 0
while True:
idx = s.find(needle, idx+1)
if idx >= 0:
cnt += 1
else:
break
print("{} was found {} times.".format(needle, cnt))
# bob was found 2 times.
Eric's answer explains perfectly why your approach didn't work (slicing in Python is end-exclusive), but let me propose another option:
s = 'azcbobobegghakl'
substrings = [s[i:] for i in range(0, len(s))]
filtered_s = filter(substrings, lambda s: s.startswith("bob"))
result = len(filtered_s)
or simply
s = 'azcbobobegghakl'
result = sum(1 for ss in [s[i:] for i in range(0, len(s))] if ss.startswith("bob"))

Return the number of times that the string "code" appears anywhere in the given string [closed]

Closed. This question needs to be more focused. It is not currently accepting answers.
Want to improve this question? Update the question so it focuses on one problem only by editing this post.
Closed 1 year ago.
Improve this question
Return the number of times that the string "code" appears anywhere in
the given string, except we'll accept any letter for the 'd', so
"cope" and "cooe" count.
I have achieved this with the following code using regular expressions:
import re
def count_code(str):
exp = '^co[a-z|A-Z]e$'
count = 0
for i in range(len(str) - 1):
if re.match(exp, str[i:i + 4]):
count = count + 1
return count
print count_code('aaacodebbb') # prints 1
print count_code('codexxcode') # prints 2
print count_code('cozexxcope') # prints 2
Is there any other way of achieving this without using regular expressions?
One way is you can make every possible string with co*e where * is any alphabet
Like
x=["co"+i+"e" for i in string.lowercase]
Then iterate
for i in x:
if i in <your string>:
count+=<your string>.count(i)
You can try this:
def count_code(str):
x=["co"+i+"e" for i in str.lower()]
count = 0
index = 0
for i in x:
if i in str[index:]:
index = str.find(i)+1
count+=1
return count
print count_code('aaacodebbb') # prints 1
print count_code('codexxcode') # prints 2
print count_code('cozexxcope') # prints 2
this is a simple and clean solution for this problem:
def count_code(str):
count = 0
for i in range(len(str)):
if str[i:i+2] == "co" and str[i+3:i+4] == "e":
count+=1
return count
To improve on other answers, notice that there is no need for slices. We can just compare the three positions of interest to the letters that should be there:
def count_code(str):
count = 0
for i in range(len(str)-3):
if str[i]=='c' and str[i+1] == 'o' and str[i+3]=='e':
count+=1
return count
def count_code(str):
a = 0
for i in range(len(str) - 3):
if str[i:i+2] + str[i+3] == 'coe':
a += 1
return a
You can try also :
using Python String Method 'count'
def count_code1(str):
counts=0
for i in range(97,123): #all the lowercase ASCII characters
count+= str.count('co'+chr(i)+'e')
return counts
def count_code(str):
code = 0
for c in range(len(str)-1):
if str[c+1] == 'o' and str[c:c+4:3] == 'ce':
code+=1
return code
You could define your logic in a way you can reuse like so - in this case without a count or regex
def count_code(str):
start = 'co' #first 2 letter
start1 = 'e' #last letter
counter = 0 #initiate counter
strlen=len(str) #for each word
for i,x in enumerate(str):
if str[i:i+2]==start:
#for each letter - is that letter and the next equal to start
if len(str[i:strlen]) >=4: #is string long enough?
if str[i+3]==start1: # if so is last letter right?
counter+=1
else:
counter
return counter
def count_code(s):
count=0
for i in range(len(s)):
if s[-(i+3):-(i+1)]=='co' and s[-i]=='e':
count=count+1
return count
This should work too:
def count_code(str):
counter = 0
for i in range(len(str)-3):
if str[i:i+2] == 'co' and str[i+3] == 'e':
counter +=1
return counter
Hope can help you!
def count_code(str):
a=''
count=0
for char in ("abcdefghijklmnopqrstuvwxyz"):
a=char
count+=str.count("co"+a+"e")
return (count)
a = 0
for i in range(len(str)-3):
if str[i:i+2] == 'co' and str[i+3] == 'e':
a +=1
return a

Categories