Implementing Python's split() function using purely recursion - python

I'm attempting to implement Python's split() function using recursion with no additional parameters and no loops.
For a given input string, this is the desired output
mySplit('hello,there,world', ',')
=> ['hello', 'there', 'world']
Here is my current attempt, but it really only removes the delimiter and places the string in a list, but I cannot figure out how to append items to the list!
def mySplit(string, delim):
if len(string) == 1:
return [string]
if string[0] == delim:
return [mySplit(string[1:], delim)[0]]
return [string[0] + mySplit(string[1:], delim)[0]]
This code results in ['hellothereworld']

I'd write something like:
def my_split(s, delim):
for i, c in enumerate(s):
if c == delim:
return [s[:i]] + my_split(s[i + 1 :], delim)
return [s]
EDIT: Oops, skipped over a crucial part of your question. I think this works.
def my_split(s, delim, i=0):
if i == len(s):
return [s]
elif s[i] == delim:
return [s[:i]] + my_split(s[i + 1 :], delim)
return my_split(s, delim, i + 1)
EDIT 2: It's a tricky one for sure. Really interesting problem. Hopefully I don't hit any more constraints with this one:
def my_split(s, delim):
if not s:
return [""]
elif s[0] == delim:
a = my_split(s[1:], delim)
return "", *a
b, *rest = my_split(s[1:], delim)
return [s[0] + b] + rest
assert my_split("hello,there,world", ",") == ["hello", "there", "world"]
assert my_split("hello world!", ",") == ["hello world!"]
assert my_split("hello world!", " ") == ["hello", "world!"]

def mySplit(string, delim):
if string.count(delim) == 0:
return [string]
idx = string.index(delim)
return [string[:idx]] + mySplit(string[idx + 1:], delim)
print(mySplit('hello,there,world', ','))

Related

Using Python, how to print output string as -> aaa3bb2c1ddddd5 when Input string is aaabbcddddd

Using Python, how to print output string as -> aaa3bb2c1ddddd5 when Input string is aaabbcddddd
I want to concatenate actual character value and number of times a character is repeated in a string
def mycode(myString):
lenstr = len(myString)
print('length of string is '+str(lenstr));
for ele in myString:
count=0
for character in myString:
if character == ele:
count = count+1
totalstr = ele+str(count)
return totalstr
If the string is always sorted and grouped together like that, then you can use a collections.Counter to do it.
from collections import Counter
inp = "aaabbcddddd"
counter = Counter(inp)
out = "".join(k * v + str(v) for k,v in counter.items())
Or in one line:
print(''.join(k * v + str(v) for k,v in Counter(inp).items()))
Output:
aaa3bb2c1ddddd5
Or you can do it manually:
inp = "aaabbcddddd"
last = inp[0]
out = inp[0]
count = 1
for i in inp[1:]:
if i == last:
count += 1
else:
out += str(count)
count = 1
last = i
out += i
out += str(count)
print(out)
Here is a one line solution using a regex replacement with callback:
inp = "aaabbcddddd"
output = re.sub(r'((\w)\2*)', lambda m: m.group(1) + str(len(m.group(1))), inp)
print(output) # aaa3bb2c1ddddd5
Another one-liner:
import itertools
test = 'aaabbcddddd'
out = ''.join(f"{(g := ''.join(ig))}{len(g)}" for _, ig in itertools.groupby(test))
assert out == 'aaa3bb2c1ddddd5'
def char_counter_string(string):
prev_char = None
char_counter = 0
output = ''
for char_index in range(len(string)+1):
if char_index == len(string):
output += str(char_counter)
break
if string[char_index] != prev_char and prev_char is not None:
output += str(char_counter)
char_counter = 0
output += string[char_index]
char_counter += 1
prev_char = string[char_index]
return output
if __name__ == '__main__':
print(char_counter_string('aaabbcddddd'))
you can do like..
Code:
Time Complexity: O(n)
input_string="aaabbcddddd"
res=""
count=1
for i in range(1, len(input_string)):
if input_string[i] == input_string[i-1]:
count += 1
else:
res+=input_string[i-1]*count + str(count)
count = 1
res+=input_string[-1]*count + str(count)
print(res) #aaa3bb2c1ddddd5
Here's another way, ...
Full disclosure: ... as long as the run of characters is 10 or less, it will work. I.e., if there are 11 of anything in row, this won't work (the count will be wrong).
It's just a function wrapping a reduce.
from functools import reduce
def char_rep_count(in_string):
return reduce(
lambda acc, inp:
(acc[:-1]+inp+str(int(acc[-1])+1))
if (inp==acc[-2])
else (acc+inp+"1"),
in_string[1:],
in_string[0]+"1"
)
And here's some sample output:
print(char_rep_count("aaabbcdddd"))
aaa3bb2c1dddd4
I think this fulfils the brief and is also very fast:
s = 'aaabbcddddd'
def mycode(myString):
if myString:
count = 1
rs = [prev := myString[0]]
for c in myString[1:]:
if c != prev:
rs.append(f'{count}')
count = 1
else:
count += 1
rs.append(prev := c)
rs.append(f'{count}')
return ''.join(rs)
return myString

Longest Common Subsequence using Lose it or Use it

I need to return the string of the Longest Common Subsequence of 2 strings. But the code keeps crashing, probably due to Infinite recursion? But I don't know where it happens? Am I missing a base case?
def lcs(s, t):
""" Arguments are 2 strings s and t. The function outputs a string LCS (longest common subsequence)
"""
use = lcs(s[1:], t[1:])
result1 = lcs(s[1:], t)
result2 = lcs(s, t[1:])
#lose =
index_list = []
the_string = ''.join(index_list)
if s or t == "":
return ""
if len(s) or len(t) == 0:
return the_string
if s[0] == t[0]:
return index_list + s[0] + use
if s[0] != t[0]:
return result1 or result2
#Tests
#assert lcs('mens', 'chimpansee') == 'mns'
#assert lcs('gattaca', 'tacgaacta') == 'gaaca'
#assert lcs('wow', 'wauw') == 'ww'
#assert lcs('', 'wauw') == ''
#assert lcs('abcdefgh', 'efghabcd') == 'abcd'

How to inject characters into a string?

I have a function in which I need to inject a character before every odd index of a string.
def inject(s, i):
sent = str(s)
new = []
for w in sent:
if w == ' ':
new.append(w)
elif (sent.index(w) % 2 == 1):
new.append(str(i) + w)
else:
new.append(w)
return ''.join(new)
The function works for:
inject(1339, 3)
output: '1333339'
and works for
inject('This is a test', 'x')
output: 'Txhixs ixs a txexst'
but won't work for:
inject('Hello World', 'x')
output: 'Hxello World'
I know it has something to do with it breaking on the 'll' but can't figure out how to fix it.
Thanks!
I think you meant to do this:
def inject(s, i):
sent = str(s)
new = []
for idx,w in enumerate(sent):
if w == ' ':
new.append(w)
elif idx % 2 == 1:
new.append(str(i) + w)
else:
new.append(w)
return ''.join(new)
for t in [1339, 'This is a test', 'Hello World']:
print(t, inject(t,'x'))
When using the enumerate() you get the index of each character without having to search for it.
The sent.index(w) call just doesn't do what you want it to even though it might seem like it does when there are no repeated characters in a string.

Question on Python Function to capaitalize letters at even position

I am able to perform the capitalization function using the below for loop and enumerate function.
wordlist = list(word)
for i,v in enumerate(wordlist):
if i%2 != 0:
wordlist[i] = v.upper()
else:
wordlist[i] = v.lower()
word2 = "".join(wordlist)
print(word2)
However when I try to put it into the function in python, I am not able to reproduce the same result as above:
def myfunc(*word):
wordlist = list(word)
for i,v in enumerate(wordlist):
if i%2 != 0:
wordlist[i] = v.upper()
else:
wordlist[i] = v.lower()
word = "".join(wordlist)
return (word)
Can anyone help me with my question?
No need to unpack (*) that makes word equal to (word,) i.e. single element tuple:
def myfunc(word):
wordlist = list(word)
for i,v in enumerate(wordlist):
if i%2 != 0:
wordlist[i] = v.upper()
else:
wordlist[i] = v.lower()
word = "".join(wordlist)
return (word)
Or easier:
def myfunc(word):
word=list(word.lower())
for i,v in enumerate(word):
if i%2 != 0:
word[i] = v.upper()
return ''.join(word)
Better:
def myfunc(word):
word=list(word.lower())
word[1::2]=map(str.upper,word[1::2])
return ''.join(word)
Easier and simpler approach
def func(word):
result = ""
for i in range(len(word)):
if i % 2 == 0:
result += word[i].upper()
else:
result += word[i]
return result
word = "prasanna"
def myfunc(word):
word=list(word.lower())
for i,v in enumerate(word):
if i%2 != 0:
word[i] = v.upper()
print ''.join(word)
myfunc(word)

Splitting C string in Python

I would like to split a string similar to
'abc "defg hijk \\"l; mn\\" opqrs"; tuv'
into
(['abc', '"defg hijk \\"l; mn\\" opqrs"'], 33)
i.e. I don't want to break on semicolon inside (nested) quotes. What's the easiest way, tokenize? It doesn't hurt if it's fast, but short is better.
Edit: I forgot one more detail that makes it even more tricky. I need the position of the semicolon that is cutting off the string, or -1 if there is none. (I'm doing changes to legacy code that used to be recursive, but stackoverflowed when the string became very long.)
It's unlikely there is an easy way to solve this without a proper parser. You could probably get away with a hand built parser that doesn't require tokenizing though.
Something like the following should be a good guide:
def parse(s):
cur_s = []
strings = []
def flush_string():
strings.push(''.join(cur_s))
cur_s = []
def handle_special_cases():
# TODO: Fill this in
for c in s:
if c == ';':
break
elif c in ['\\' '"']:
handle_special_cases()
elif c == ' ':
flush_string()
else:
cur_s.push(c)
flush_string()
return strings
It's a stateful search, so simple stateless operations are not available. Here's a simple char-by-char stateful evaluator that might meet your "short" without resorting to full tokenization/parsing:
#!/usr/bin/env python
inp="""abc "defg hijk \\"l; mn\\" opqrs"; tuv'`"""
def words_to_semi(inpstr):
ret = ['']
st8 = 1 # state: 1=reg, 2=in quotes, 3=escaped quote, 4=escaped reg, 0=end
ops = { 1 : {' ': lambda c: (None,1),
'"': lambda c: (c,2),
';': lambda c: ('',0),
'\\': lambda c: (c,4),
},
2 : {'\\': lambda c: (c,3),
'"': lambda c: (c,1),
},
3 : {None: lambda c: (c,2)},
4 : {None: lambda c: (c,1)},
}
pos = 0
for C in inpstr:
oc,st8 = ops[st8].get(C, ops[st8].get(None, lambda c:(c,st8)))(C)
if not st8: break
if oc is None:
ret.append('')
else:
ret[-1] += oc
pos = pos + 1
return ret, pos
print str(words_to_semi(inp))
Just modify the ops dict (and add new states) to handle other cases; everything else is generic code.
Here's the brute-force method I went with. Brrr...
def f(s):
instr = False
inescape = False
a = ''
rs = []
cut_index = -1
for idx,ch in enumerate(s):
if instr:
a += ch
if inescape:
inescape = False
elif ch == '\\':
inescape = True
elif ch == '"':
if a:
rs += [a]
a = ''
instr = False
elif ch == '"':
if a:
rs += [a]
a = ch
instr = True
elif ch == ';':
if a:
rs += [a]
cut_index = idx
break
elif ch == ' ' or ch == '\t' or ch == '\n':
if a:
rs += [a]
a = ''
else:
a += ch
return rs, cut_index
f('abc "defg hijk \\"l; mn\\" opqrs"; tuv')

Categories