Using Python, how to print output string as -> aaa3bb2c1ddddd5 when Input string is aaabbcddddd - python

Using Python, how to print output string as -> aaa3bb2c1ddddd5 when Input string is aaabbcddddd
I want to concatenate actual character value and number of times a character is repeated in a string
def mycode(myString):
lenstr = len(myString)
print('length of string is '+str(lenstr));
for ele in myString:
count=0
for character in myString:
if character == ele:
count = count+1
totalstr = ele+str(count)
return totalstr

If the string is always sorted and grouped together like that, then you can use a collections.Counter to do it.
from collections import Counter
inp = "aaabbcddddd"
counter = Counter(inp)
out = "".join(k * v + str(v) for k,v in counter.items())
Or in one line:
print(''.join(k * v + str(v) for k,v in Counter(inp).items()))
Output:
aaa3bb2c1ddddd5
Or you can do it manually:
inp = "aaabbcddddd"
last = inp[0]
out = inp[0]
count = 1
for i in inp[1:]:
if i == last:
count += 1
else:
out += str(count)
count = 1
last = i
out += i
out += str(count)
print(out)

Here is a one line solution using a regex replacement with callback:
inp = "aaabbcddddd"
output = re.sub(r'((\w)\2*)', lambda m: m.group(1) + str(len(m.group(1))), inp)
print(output) # aaa3bb2c1ddddd5

Another one-liner:
import itertools
test = 'aaabbcddddd'
out = ''.join(f"{(g := ''.join(ig))}{len(g)}" for _, ig in itertools.groupby(test))
assert out == 'aaa3bb2c1ddddd5'

def char_counter_string(string):
prev_char = None
char_counter = 0
output = ''
for char_index in range(len(string)+1):
if char_index == len(string):
output += str(char_counter)
break
if string[char_index] != prev_char and prev_char is not None:
output += str(char_counter)
char_counter = 0
output += string[char_index]
char_counter += 1
prev_char = string[char_index]
return output
if __name__ == '__main__':
print(char_counter_string('aaabbcddddd'))

you can do like..
Code:
Time Complexity: O(n)
input_string="aaabbcddddd"
res=""
count=1
for i in range(1, len(input_string)):
if input_string[i] == input_string[i-1]:
count += 1
else:
res+=input_string[i-1]*count + str(count)
count = 1
res+=input_string[-1]*count + str(count)
print(res) #aaa3bb2c1ddddd5

Here's another way, ...
Full disclosure: ... as long as the run of characters is 10 or less, it will work. I.e., if there are 11 of anything in row, this won't work (the count will be wrong).
It's just a function wrapping a reduce.
from functools import reduce
def char_rep_count(in_string):
return reduce(
lambda acc, inp:
(acc[:-1]+inp+str(int(acc[-1])+1))
if (inp==acc[-2])
else (acc+inp+"1"),
in_string[1:],
in_string[0]+"1"
)
And here's some sample output:
print(char_rep_count("aaabbcdddd"))
aaa3bb2c1dddd4

I think this fulfils the brief and is also very fast:
s = 'aaabbcddddd'
def mycode(myString):
if myString:
count = 1
rs = [prev := myString[0]]
for c in myString[1:]:
if c != prev:
rs.append(f'{count}')
count = 1
else:
count += 1
rs.append(prev := c)
rs.append(f'{count}')
return ''.join(rs)
return myString

Related

Character counter should not display amount if amount is 1

def encode(message):
encoded_message = ""
i = 0
while (i <= len(message)-1):
count = 1
ch = message[i]
j = i
while (j < len(message)-1):
if (message[j] == message[j+1]):
count = count+1
j = j+1
else:
break
encoded_message=encoded_message+str(count)+ch
i = j+1
return encoded_message
#Provide different values for message and test your program
encoded_message=encode("ABBBBCCCCCCCCAB")
print(encoded_message)
This code generates the following output: 1A4B8C1A1B
But if the value is 1 it should just display the letter like this: A4B8CAB
Simply replace this line:
encoded_message=encoded_message+str(count)+ch
with this:
encoded_message += (str(count) if count > 1 else "") + ch
It should do the trick. Now it only appends the count to the string if said count is bigger than one.

Python reverse each word in a sentence without inbuilt function python while preserve order

Not allowed to use "Split(),Reverse(),Join() or regexes" or any other
helping inbuilt python function
input something like this:
" my name is scheven "
output like this:
"ym eman si nevehcs"
you need to consider removing the starting,inbetween,ending spaces aswell in the input
I have tried 2 tries, both failed i will share my try to solve this and maby an idea to improve it
First try:
def reverseString(someString):
#lenOfString = len(someString)-1
emptyList = []
for i in range(len(someString)):
emptyList.append(someString[i])
lenOfString = len(emptyList)-1
counter = 0
while counter < lenOfString:
if emptyList[counter] == " ":
counter+=1
if emptyList[lenOfString] == " ":
lenOfString-=1
else:
swappedChar = emptyList[counter]
emptyList[counter] = emptyList[lenOfString]
emptyList[lenOfString] = swappedChar
counter+=1
lenOfString-=1
str_contactantion = ""
#emptyList = emptyList[::-1]
#count_spaces_after_letter=0
for letter in emptyList:
if letter != " ":
str_contactantion+=letter
#str_contactantion+=" "
str_contactantion+=" "
return str_contactantion
second try:
def reverse(array, i, j):
emptyList = []
if (j == i ):
return ""
for k in range(i,j):
emptyList.append(array[k])
start = 0
end = len(emptyList) -1
if start > end: # ensure i <= j
start, end =end, start
while start < end:
emptyList[start], emptyList[end] = emptyList[end], emptyList[start]
start += 1
end -= 1
strconcat=""
for selement in emptyList:
strconcat+=selement
return strconcat
def reverseStr(someStr):
start=0
end=0
help=0
strconcat = ""
empty_list = []
for i in range(len(someStr)):
if(someStr[i] == " "):
continue
else:
start = i
j = start
while someStr[j] != " ":
j+=1
end = j
#if(reverse(someStr,start,end) != ""):
empty_list.append(reverse(someStr,start,end))
empty_list.append(" ")
for selement in empty_list:
strconcat += selement
i = end + 1
return strconcat
print(reverseStr(" my name is scheven "))
The following works without managing indices:
def reverseString(someString):
result = crnt = ""
for c in someString:
if c != " ":
crnt = c + crnt # build the reversed current token
elif crnt: # you only want to do anything for the first space of many
if result:
result += " " # append a space first
result += crnt # append the current token
crnt = "" # and reset it
if crnt:
result += " " + crnt
return result
reverseString(" my name is scheven ")
# 'ym eman si nevehcs'
Try this:
def reverseString(someString):
result = ""
word = ""
for i in (someString + " "):
if i == " ":
if word:
result = result + (result and " ") + word
word = ""
else:
word = i + word
return result
You can then call it like this:
reverseString(" my name is scheven ")
# Output: 'ym eman si nevehcs'
Try this:
string = " my name is scheven "
def reverseString(someString):
result = ''
curr_word = ''
for i in someString:
if i == ' ':
if curr_word:
if result:
result = f'{result} {curr_word}'
else:
result = f'{result}{curr_word}'
curr_word = ''
else:
curr_word = f'{i}{curr_word}'
return result
print(repr(reverseString(string)))
Output:
'ym eman si nevehcs'
Note: if you're allowed to use list.append method, I'd suggest using a collections.deque as it's more performant than appending to a list. But of course, in the end you'll need to join the list together, and you mentioned that you're not allowed to use str.join, so that certainly poses an issue.

How to replace all "&int-int" with the respective string slices in an input string?

I have a school project question (for Python) that goes like this:
Given a string_input such as "abcd&1-4efg", the function must remove the "&1-4" and insert the string slice from 1 to 4 where the "&1-4" was.
eg. if string_input = "abcd&1-4efg",
"&1-4" is removed.
The remaining characters are indexed as follows: a=0, b=1, c=2, d=3, e=4, f=5, g=6
The new string becomes:
"abcdbcdeefg"
I've managed to write a long chunk of code to do this, but I'm wondering if anyone has any more efficient solutions?
Things to note:
The instructions can include double digits (eg. &10-15)
If the index isn't found, the returned string should print "?" for every missing index
(eg. "abcd&5-10efgh" would return "abcdfgh???efgh")
Intructions can be back-to-back (eg. "&10-15abcdef&1-5&4-5pqrs")
The code I've written is:
def expand(text):
text += "|"
import string
digits_dash = string.digits + "-"
idx_ref_str = ""
replace_list = []
record_val = False
output_to_list = []
instruct = ""
and_idx_mark = 0
#builds replace_list & idx_ref_list
for idx in range(len(text)):
if text[idx] == "&" and record_val==True:
output_to_list.append(instruct)
output_to_list.append(and_idx_mark)
replace_list.append(output_to_list)
output_to_list, instruct, inst_idx, and_idx_mark = [],"",0,0
and_idx_mark = idx
continue
elif text[idx] == "&":
record_val = True
and_idx_mark = idx
continue
#executes if currently in instruction part
if record_val == True:
#adds to instruct
if text[idx] in digits_dash:
instruct += text[idx]
#take info, add to replace list
else:
output_to_list.append(instruct)
output_to_list.append(and_idx_mark)
replace_list.append(output_to_list)
output_to_list, instruct, inst_idx, and_idx_mark, record_val = [],"",0,0,False
#executes otherwise
if record_val == False:
idx_ref_str += text[idx]
idx_ref_str = idx_ref_str[:-1]
text = text[:-1]
#converts str to int indexes in replace list[x][2]
for item in replace_list:
start_idx = ""
end_idx = ""
#find start idx
for char in item[0]:
if char in string.digits:
start_idx += char
elif char == "-":
start_idx = int(start_idx)
break
#find end idx
for char in item[0][::-1]:
if char in string.digits:
end_idx = char + end_idx
elif char == "-":
end_idx = int(end_idx)
break
start_end_list = [start_idx,end_idx]
item+=start_end_list
#split text into parts in list
count = 0
text_block = ""
text_block_list = []
idx_replace = 0
for char in text:
if char == "&":
text_block_list.append(text_block)
text_block = ""
count += len(replace_list[idx_replace][0])
idx_replace +=1
elif count > 0:
count -= 1
else:
text_block += char
text_block_list.append(text_block)
#creates output str
output_str = ""
for idx in range(len(text_block_list)-1):
output_str += text_block_list[idx]
#creates to_add var to add to output_str
start_repl = replace_list[idx][1]
end_repl = replace_list[idx][1] + len(replace_list[idx][0])
find_start = replace_list[idx][2]
find_end = replace_list[idx][3]
if end_idx >= len(idx_ref_str):
gap = end_idx + 1 - len(idx_ref_str)
to_add = idx_ref_str[find_start:] + "?" * gap
else:
to_add = idx_ref_str[find_start:find_end+1]
output_str += to_add
output_str += text_block_list[-1]
return output_str
Here's how I would do it. Always open to criticism.
import re
s = 'abcd&1-4efg'
c = re.compile('&[0-9]+-[0-9]+')
if (m := c.search(s)):
a, b = m.span()
left = s[:a]
right = s[b:]
o = [int(x) for x in m.group(0)[1:].split('-')]
mid = (left+right)[o[0]:o[1]+1]
print(left + mid + right)

Python compression string not quite right

I have the following code that is self explanatory in the docstring. How do I get it to not flag single letters with a 1, thereby turning a single digit into 2 in the final compressed string?
For example in the docstring it turns AAABBBBCDDDD -> A3B4C1D4 but I want it to turn into A3B4CD4. I'm new at this so it's any comments are greatly appreciated.
class StringCompression(object):
'''
Run Length Compression Algorithm: Given a string of letters, such as
nucleotide sequences, compress it using numbers to flag contiguous repeats.
Ex: AAABBBBCDDDD -> A3B4C1D4
>>>x = StringCompression('AAAAbC')
>>>x.compress()
'A4bC'
'''
def __init__(self, string):
self.string = string
def compress(self):
'''Executes compression on the object.'''
run = ''
length = len(self.string)
if length == 0:
return ''
if length == 1:
return self.string #+ '1'
last = self.string[0]
count = 1
i = 1
while i < length:
if self.string[i] == self.string[i - 1]:
count += 1
else:
run = run + self.string[i - 1] + str(count)
count = 1
i += 1
run = (run + self.string[i - 1] + str(count))
return run
Here's an alternative solution using itertools.groupby and a generator:
from itertools import chain, groupby
x = 'AAABBBBCDDDD'
def compressor(s):
for i, j in groupby(s):
size = len(list(j))
yield (i, '' if size==1 else str(size))
res = ''.join(chain.from_iterable(compressor(x)))
print(res)
A3B4CD4
Now it works the way I wanted it to. Thanks!
class StringCompression(object):
'''
Run Length Compression Algorithm: Given a string of letters, such as
nucleotide sequences, compress it using numbers to flag contiguous repeats.
Ex: AAABBBBCDDDD -> A3B4CD4
Notice that single letter do not get a 1 flag to prevent expansion.
>>>x = StringCompression('AAAAbC')
>>>x.compress()
'A4bC'
'''
def __init__(self, string):
self.string = string
def compress(self):
'''Executes compression on the object.'''
run = ''
length = len(self.string)
if length == 0:
return ''
if length == 1:
return self.string #+ '1'
last = self.string[0]
count = 1
i = 1
while i < length:
if self.string[i] == self.string[i - 1]:
count += 1
else:
run = run + self.string[i - 1] + str(count)
count = 1
i += 1
run = (run + self.string[i - 1] + str(count))
compressed_string = ''
for i in run:
if i != '1':
compressed_string += i
return compressed_string

Search Strings in a List with Loop Return Order

I'm very new to Python and I have a question.
I have a List that looks like this:
List = ["B-Guild","I-Guild","I-Guild","L-Guild","B-Gene","L-Gene","U-Car"]
All of the words with B-(I)-L belong to each other and I want to use a function to show that.
def combine(x):
foo = []
regexp_B = ("B-" + r'.*')
regexp_I = ("I-" + r'.*')
regexp_L = ("L-" + r'.*')
regexp_U = ("U-" + r'.*')
for i in range(0,len(x),1):
if re.match(regexp_B, x[i]):
print("Found B")
foo.append[i+x[i]]
if re.match(regexp_I, x[i+1]):
print("Found I")
foo.append[i+1+x[i+1]]
if re.match(regexp_I, x[i+1]):
print("Found I")
foo.append[i+1+x[i+1]]
else:
print("Found L")
foo.append[i+1+x[i+1]]
else:
print("Found L")
foo.append[i1+x[i1]]
elif re.match(regexp_L, x[i]):
print("L")
foo.append[i1+x[i1]]
elif re.match(regexp_U, x[i]):
print("Found U")
foo.append[i1+x[i1]]
return foo
List_New = combine(List)
Desired Output:
foo = ["0B-Guild","0I-Guild","0I-Guild","OL-Guild","1B-Gene","1L-Gene","2U-Car"]
Edit:
The output follows this logic: Every time a "B-" prefix appears, the words to follow are part of one "theme" until a "L-" prefix appears. These words got to have the same number before them so they can be grouped for further functions. "U-" prefixes don't follow that logic and just need a number before them to distinguish them from the other words. Think of it as a Counter that groups these word into a cluster.
def combine(some_list):
current_group = 0 # starts with 0
g_size = 0 # current group size
for elem in some_list:
g_size += 1
if elem.startswith('U-') and g_size > 1:
g_size = 1
current_group += 1
yield '{}{}'.format(current_group, elem)
if elem.startswith(('L-', 'U-')): # each L- or U- also finishes a group
g_size = 0
current_group += 1
>>> List = ["B-Guild","I-Guild","I-Guild","L-Guild","B-Gene","L-Gene","U-Car"]
>>> print(list(combine(List)))
>>> List = ["B-Guild","I-Guild","I-Guild","L-Guild","B-Guild","L-Guild","U-Guild"]
>>> print(list(combine(List)))

Categories