Replace string one by one - python

I have a string and I need to replace "e" with "x" one at a time. For e.g.
x = "three"
Then the expected output is:
("thrxe", "threx")
and if I have 3 characters to replace, for e.g.
y = "threee"
Then the expected output will be:
("thrxee", "threxe", "threex")
I have tried this:
x.replace("e", "x", 1) # -> 'thrxe'
But not sure how to return the second string "threx".

Try this
x = "threee"
# build a generator expression that yields the position of "e"s
# change "e"s with "x" according to location of "e"s yielded from the genexp
[f"{x[:i]}x{x[i+1:]}" for i in (i for i, e in enumerate(x) if e=='e')]
['thrxee', 'threxe', 'threex']

You could use a generator to replace e with x sequentially through the string. For example:
def replace(string, old, new):
l = len(old)
start = 0
while start != -1:
start = string.find(old, start + l)
if start != -1:
yield string[:start] + new + string[start + l:]
z = replace('threee', 'e', 'x')
for s in z:
print(s)
Output:
thrxee
threxe
threex
Note I've generalised the code to allow for arbitrary length match and replacement strings, if you don't need that just replace l (len(old)) with 1.

def replace(string,old,new):
f = string.index(old)
l = list(string)
i = 0
for a in range(string.count(old)):
l[f] = new
yield ''.join(l)
l[f]=old
try:
f = string.index(old,f+1)
except ValueError:
pass
i+=1
z = replace('threee', 'e', 'x')
for a in z:
print(a)
OUTPUT
thrxee
threxe
threex

Related

Count of sub-strings that contain character X at least once. E.g Input: str = “abcd”, X = ‘b’ Output: 6

This question was asked in an exam but my code (given below) passed just 2 cases out of 7 cases.
Input Format : single line input seperated by comma
Input: str = “abcd,b”
Output: 6
“ab”, “abc”, “abcd”, “b”, “bc” and “bcd” are the required sub-strings.
def slicing(s, k, n):
loop_value = n - k + 1
res = []
for i in range(loop_value):
res.append(s[i: i + k])
return res
x, y = input().split(',')
n = len(x)
res1 = []
for i in range(1, n + 1):
res1 += slicing(x, i, n)
count = 0
for ele in res1:
if y in ele:
count += 1
print(count)
When the target string (ts) is found in the string S, you can compute the number of substrings containing that instance by multiplying the number of characters before the target by the number of characters after the target (plus one on each side).
This will cover all substrings that contain this instance of the target string leaving only the "after" part to analyse further, which you can do recursively.
def countsubs(S,ts):
if ts not in S: return 0 # shorter or no match
before,after = S.split(ts,1) # split on target
result = (len(before)+1)*(len(after)+1) # count for this instance
return result + countsubs(ts[1:]+after,ts) # recurse with right side
print(countsubs("abcd","b")) # 6
This will work for single character and multi-character targets and will run much faster than checking all combinations of substrings one by one.
Here is a simple solution without recursion:
def my_function(s):
l, target = s.split(',')
result = []
for i in range(len(l)):
for j in range(i+1, len(l)+1):
ss = l[i] + l[i+1:j]
if target in ss:
result.append(ss)
return f'count = {len(result)}, substrings = {result}'
print(my_function("abcd,b"))
#count = 6, substrings = ['ab', 'abc', 'abcd', 'b', 'bc', 'bcd']
Here you go, this should help
from itertools import combinations
output = []
initial = input('Enter string and needed letter seperated by commas: ') #Asking for input
list1 = initial.split(',') #splitting the input into two parts i.e the actual text and the letter we want common in output
text = list1[0]
final = [''.join(l) for i in range(len(text)) for l in combinations(text, i+1)] #this is the core part of our code, from this statement we get all the available combinations of the set of letters (all the way from 1 letter combinations to nth letter)
for i in final:
if 'b' in i:
output.append(i) #only outputting the results which have the required letter/phrase in it

How to remove leading zeros from the calculator expression in a string? python

I have a doubt, in python
the string is, Z = "00123+0567*29/03-7"
how to convert it to "123+567*29/3-7"
Even I tried with re.split('[+]|[*]|-|/', Z) later with for i in res : i = i.lstrip("0")
but it will split correctly, but to join back with the same operands as in the string "Z" as Z = "123+567*29/3-7"
How to resolve it
def cut_zeroes(Z):
i, res = 0, []
n = len(Z)
while i < n:
j = i
while i < n and Z[i] not in '+-/*':
i += 1
res.append(int(Z[j:i]))
if i < n:
res.append(Z[i])
i += 1
return ''.join(map(str,res))
Z = "00123+0567*29/03-700"
print(cut_zeroes(Z))
Z = "00123+0567*29/03-7"
print Z
import re
res = re.split(r'(\D)', Z)
print res
empty_lst = []
for i in res :
i = i.lstrip("0")
empty_lst.append(i)
print i
print empty_lst
new_str = ''.join(empty_lst)
print new_str
def zero_simplify(Z):
from re import sub
return [char for char in sub("0{2,}", "0", Z)]
Z = "00123+0567*29/03-7+0-000"
Z = zero_simplify(Z)
pos = len(Z)-1
while pos>-1:
if Z[pos]=="0":
end = pos
while Z[pos] == "0":
pos-=1
if pos==-1:
del Z[pos+1:end+1]
if (not Z[pos].isdigit()) and (Z[pos] != ".") and (Z[pos] == "0"):
del Z[pos+1:end+1]
else:
pos-=1
Z = "".join(Z)
print(Z)
What this does is set Z, 'listify' it, and set pos to the last position in Z. Then it removes all of the unnecessary 0s using the loop and Z = "".join(Z). Then it prints Z at the end. If you want a function to remove the zeroes, you can have it this way:
def zero_simplify(Z):
from re import sub
return [char for char in sub("0{2,}", "0", Z)]
def remove_unnecessary_zeroes(Z):
Z = [char for char in Z]
pos = len(Z)-1
while pos>-1:
if Z[pos]=="0":
end = pos
while Z[pos] == "0":
pos-=1
if pos==-1:
del Z[pos+1:end+1]
if (not Z[pos].isdigit()) and (Z[pos] != ".") and (Z[pos] == "0"):
del Z[pos+1:end+1]
else:
pos-=1
Z = "".join(Z)
return Z
Z = "00123+0567*29/03-7+0-000"
print(remove_unnecessary_zeroes(Z))
Try it yourself and tell me in the comments if it worked for you!
Here's a concise (if you strip all the comment in the code) and elegant way to achieve this:
import re
Z = "00123+0567*29/03-7"
operators = re.findall('\D', Z) # List all the operators used in the string
nums = re.split('\D', Z) # List all the numbers in the list
operators.append('') # Add an empty operator at the end
nums = [num.lstrip('0') for num in nums] # Strip all the leading zeroes from each numbers
# Create a list with the operands (numbers) concatenated by operators
num_operator_list = [nums[i] + operators[i] for i in range(len(nums))]
# Join all the intermediate expressions to create a final expression
final_expression = ''.join(num_operator_list)
print(final_expression)
Output
123+567*29/3-7
Explanation
First of all you need to separate the operators and the operands and then lstrip the zeroes from each operands. After this add an additional empty operator at the end of operator list. Then concatenate each operand with corresponding operator (the empty operator is concatenated with the last operand). Finally, join the list to get a final expression.
It can be done with regular expressions:
import re
Z = "00123+0567*29/03-7"
r1=r"(\D)0+(\d+)"
r2=r"\b0+(\d+)"
#substitute non-digit,leading zeroes, digits with non-digit and digits
sub1=re.sub(r1,r"\1\2",Z)
#substitute start of string, leading zeroes, digits with digits
sub2=re.sub(r2,r"\1",sub1)
print(sub2)
It is done in two passes (to handle the leading zeroes in the beginning of the string), I don't know if it's possible to do in one pass.

Python compare partial string in a list with each other

I am trying to write a code to compare each string in a list to each other and then generate its regex for similarity
list = ["LONDON-UK-L16-N1",
"LONDON-UK-L17-N1",
"LONDON-UK-L16-N2",
"LONDON-UK-L17-N2",
"PARIS-France-L16-N2"]
I am trying to get an output as below
LONDON-UK-L(16|17)-N(1|2)
is that possible? thanks
Update: just to make it clear i am trying to
input: list, or strings
Action: compare list items to each other, and check for similarity (to fix it-first group of a string), and use regex for any other not similar part of item, so instead of having for items, we can have a single output (using regex)
output: regex to match not similar
input:
tez15-3-s1-y2
tez15-3-s2-y2
bro40-55-s1-y2
output:
tez15-3-s(1|2)-y2
,bro40-55-s1-y2
Its not entirely clear from your question what the exact problem is. Since the data you gave as an example is consistent and well ordered, this problem can be solved easily by simply splitting up the items in the list and categorising them.
loc_list = ["LONDON-UK-L16-N1", "LONDON-UK-L17-N1", "LONDON-UK-L16-N2",
"LONDON-UK-L16-N2", "PARIS-France-L16-N2"]
split_loc_list = [location.split("-") for location in loc_list]
locs = {}
for loc in split_loc_list:
locs.setdefault("-".join(loc[0:2]), {}).\
setdefault("L", set()).add(loc[2].strip("L"))
locs.setdefault("-".join(loc[0:2]), {}).\
setdefault("N", set()).add(loc[3].strip("N"))
for loc, vals in locs.items():
L_vals_sorted = sorted(list(map(int,vals["L"])))
L_vals_joined = "|".join(map(str,L_vals_sorted))
N_vals_sorted = sorted(list(map(int,vals["N"])))
N_vals_joined = "|".join(map(str,N_vals_sorted))
print(f"{loc}-L({L_vals_joined})-N({N_vals_joined})")
will output:
LONDON-UK-L(16|17)-N(1|2)
PARIS-France-L(16)-N(2)
Since there were only two tags here ("L" and "N"), I just wrote them into the code. If there are many tags possible, then you can strip by any letter using:
import re
split = re.findall('\d+|\D+', loc[2])
key, val = split[0], split[1]
locs.setdefault("-".join(loc[0:2]), {}).\
setdefault(key, set()).add(val)
Then iterate through all the tags instead of just fetching "L" and "N" in the second loop.
I post this new (second) implementation on this problem, I think more accurate and hope helpful:
import re
data = [
'LONDON-UK-L16-N1',
'LONDON-UK-L17-N1',
'LONDON-UK-L16-N2',
'LONDON-UK-L17-N2',
'LONDON-UK-L18-N2',
'PARIS-France-L16-N2',
]
def merge(data):
data.sort()
data = [y for y in [x.split('-') for x in data]]
for col in range(len(data[0]) - 1, -1, -1):
result = []
def add_result():
result.append([])
if headstr:
result[-1] += headstr.split('-')
if len(list(findnum)) > 1:
result[-1] += [f'{findstr}({"|".join(sorted(findnum))})']
elif len(list(findnum)) == 1:
result[-1] += [f'{findstr}{findnum[0]}']
if tailstr:
result[-1] += tailstr.split('-')
_headstr = lambda x, y: '-'.join(x[:y])
_tailstr = lambda x, y: '-'.join(x[y + 1:])
_findstr = lambda x: re.findall('(\D+)', x)[0] if re.findall('(\D+)', x) else ''
_findnum = lambda x: re.findall('(\d+)', x)[0] if re.findall('(\d+)', x) else ''
headstr = _headstr(data[0], col)
tailstr = _tailstr(data[0], col)
findstr = _findstr(data[0][col])
findnum = []
for row in data:
if headstr + findstr + tailstr != _headstr(row, col) + _findstr(row[col]) + _tailstr(row, col):
add_result()
headstr = _headstr(row, col)
tailstr = _tailstr(row, col)
findstr = _findstr(row[col])
findnum = []
if _findnum(row[col]) not in findnum:
findnum.append(_findnum(row[col]))
else:
add_result()
data = result[:]
return ['-'.join(x) for x in result]
print(merge(data)) # ['LONDON-UK-L(16|17)-N(1|2)', 'LONDON-UK-L18-N2', 'PARIS-France-L16-N2']
I've implemented the following solution:
import re
data = [
'LONDON-UK-L16-N1',
'LONDON-UK-L17-N1',
'LONDON-UK-L16-N2',
'LONDON-UK-L16-N2',
'PARIS-France-L16-N2'
]
def deconstruct(data):
data = [y for y in [x.split('-') for x in data]]
result = dict()
for x in data:
pointer = result
for y in x:
substr = re.findall('(\D+)', y)
if substr:
substr = substr[0]
if not substr in pointer:
pointer[substr] = {0: set()}
pointer = pointer[substr]
substr = re.findall('(\d+)', y)
if substr:
substr = substr[0]
pointer[0].add(substr)
return result
def construct(data, level=0):
result = []
for key in data.keys():
if key != 0:
if len(data[key][0]) == 1:
nums = list(data[key][0])[0]
elif len(data[key][0]) > 1:
nums = '(' + '|'.join(sorted(list(data[key][0]))) + ')'
else:
nums = ''
deeper_result = construct(data[key], level + 1)
if not deeper_result:
result.append([key + nums])
else:
for d in deeper_result:
result.append([key + nums] + d)
return result if level > 0 else ['-'.join(x) for x in result]
print(construct(deconstruct(data)))
# ['LONDON-UK-L(16|17)-N(1|2)', 'PARIS-France-L16-N2']
Don't use 'list' as a variable name... it's a reserved word.
import re
lst = ['LONDON-UK-L16-N1', 'LONDON-UK-L17-N1', 'LONDON-UK-L16-N2', 'LONDON-UK-L16-N2', 'PARIS-France-L16-N2']
def check_it(string):
return re.search(r'[a-zA-Z\-]*L(\d)*-N(\d)*', string)
[check_it(x).group(0) for x in lst]
will output:
['LONDON-UK-L16-N1',
'LONDON-UK-L17-N1',
'LONDON-UK-L16-N2',
'LONDON-UK-L16-N2',
'PARIS-France-L16-N2']
From there, look into groups and define a group to cover the pieces that you want to use for similarity.

Find longest unique substring in string python

I am trying that age old question (there are multitudes of versions around) of finding the longest substring of a string which doesn't contain repeated characters. I can't work out why my attempt doesn't work properly:
def findLongest(inputStr):
resultSet = []
substr = []
for c in inputStr:
print ("c: ", c)
if substr == []:
substr.append([c])
continue
print(substr)
for str in substr:
print ("c: ",c," - str: ",str,"\n")
if c in str:
resultSet.append(str)
substr.remove(str)
else:
str.append(c)
substr.append([c])
print("Result set:")
print(resultSet)
return max(resultSet, key=len)
print (findLongest("pwwkewambb"))
When my output gets to the second 'w', it doesn't iterate over all the substr elements. I think I've done something silly, but I can't see what it is so some guidance would be appreciated! I feel like I'm going to kick myself at the answer...
The beginning of my output:
c: p
c: w
[['p']]
c: w - str: ['p']
c: w
[['p', 'w'], ['w']]
c: w - str: ['p', 'w'] # I expect the next line to say c: w - str: ['w']
c: k
[['w'], ['w']] # it is like the w was ignored as it is here
c: k - str: ['w']
c: k - str: ['w']
...
EDIT:
I replaced the for loop with
for idx, str in enumerate(substr):
print ("c: ",c," - str: ",str,"\n")
if c in str:
resultSet.append(str)
substr[idx] = []
else:
str.append(c)
and it produces the correct result. The only thing is that the empty element arrays get set with the next character. It seems a bit pointless; there must be a better way.
My expected output is kewamb.
e.g.
c: p
c: w
[['p']]
c: w - str: ['p']
c: w
[['p', 'w'], ['w']]
c: w - str: ['p', 'w']
c: w - str: ['w']
c: k
[[], [], ['w']]
c: k - str: []
c: k - str: []
c: k - str: ['w']
c: e
[['k'], ['k'], ['w', 'k'], ['k']]
c: e - str: ['k']
c: e - str: ['k']
c: e - str: ['w', 'k']
c: e - str: ['k']
...
Edit, per comment by #seymour on incorrect responses:
def find_longest(s):
_longest = set()
def longest(x):
if x in _longest:
_longest.clear()
return False
_longest.add(x)
return True
return ''.join(max((list(g) for _, g in groupby(s, key=longest)), key=len))
And test:
In [101]: assert find_longest('pwwkewambb') == 'kewamb'
In [102]: assert find_longest('abcabcbb') == 'abc'
In [103]: assert find_longest('abczxyabczxya') == 'abczxy'
Old answer:
from itertools import groupby
s = set() ## for mutable access
''.join(max((list(g) for _, g in groupby('pwwkewambb', key=lambda x: not ((s and x == s.pop()) or s.add(x)))), key=len))
'kewamb'
groupby returns an iterator grouped based on the function provided in the key argument, which by default is lambda x: x. Instead of the default we are utilizing some state by using a mutable structure (which could have been done a more intuitive way if using a normal function)
lambda x: not ((s and x == s.pop()) or s.add(x))
What is happening here is since I can't reassign a global assignment in a lambda (again I can do this, using a proper function), I just created a global mutable structure that I can add/remove. The key (no pun) is that I only keep elements that I need by using a short circuit to add/remove items as needed.
max and len are fairly self explanatory, to get the longest list produced by groupby
Another version without the mutable global structure business:
def longest(x):
if hasattr(longest, 'last'):
result = not (longest.last == x)
longest.last = x
return result
longest.last = x
return True
''.join(max((list(g) for _, g in groupby('pwwkewambb', key=longest)), key=len))
'kewamb'
Not sure what is wrong in your attempt, but it's complex and in:
for str in substr:
print ("c: ",c," - str: ",str,"\n")
if c in str:
resultSet.append(str)
substr.remove(str)
you're removing elements from a list while iterating on it: don't do that, it gives unexpected results.
Anyway, my solution, not sure it's intuitive, but it's probably simpler & shorter:
slice the string with an increasing index
for each slice, create a set and store letters until you reach the end of the string or a letter is already in the set. Your index is the max length
compute the max of this length for every iteration & store the corresponding string
Code:
def findLongest(s):
maxlen = 0
longest = ""
for i in range(0,len(s)):
subs = s[i:]
chars = set()
for j,c in enumerate(subs):
if c in chars:
break
else:
chars.add(c)
else:
# add 1 when end of string is reached (no break)
# handles the case where the longest string is at the end
j+=1
if j>maxlen:
maxlen=j
longest=s[i:i+j]
return longest
print(findLongest("pwwkewambb"))
result:
kewamb
Depends on your definition of repeated characters: if you mean consecutive, then the approved solution is slick, but not of characters appearing more than once (e.g.: pwwkewabmb -> 'kewabmb' ).
Here's what I came up with (Python 2):
def longest(word):
begin = 0
end = 0
longest = (0,0)
for i in xrange(len(word)):
try:
j = word.index(word[i],begin,end)
# longest?
if end-begin >= longest[1]-longest[0]:
longest = (begin,end)
begin = j+1
if begin==end:
end += 1
except:
end = i+1
end=i+1
if end-begin >= longest[1]-longest[0]:
longest = (begin,end)
return word[slice(*longest)]
Thus
>>> print longest('pwwkewabmb')
kewabm
>>> print longest('pwwkewambb')
kewamb
>>> print longest('bbbb')
b
My 2-cents:
from collections import Counter
def longest_unique_substr(s: str) -> str:
# get all substr-ings from s, starting with the longest one
for substr_len in range(len(s), 0, -1):
for substr_start_index in range(0, len(s) - substr_len + 1):
substr = s[substr_start_index : substr_start_index + substr_len]
# check if all substr characters are unique
c = Counter(substr)
if all(v == 1 for v in c.values()):
return substr
# ensure empty string input returns ""
return ""
Run:
In : longest_unique_substr('pwwkewambb')
Out: 'kewamb'
s=input()
ma=0
n=len(s)
l=[]
a=[]
d={}
st=0;i=0
while i<n:
if s[i] not in d:
d[s[i]]=i
l.append(s[i])
else:
t=d[s[i]]
d[s[i]]=i
s=s[t+1:]
d={}
n=len(s)
if len(l)>=3:
a.append(l)
ma=max(ma,len(l))
l=[];i=-1
i=i+1
if len(l)!=0 and len(l)>=3:
a.append(l)
ma=max(ma,len(l))
if len(a)==0:
print("-1")
else:
for i in a:
if len(i)==ma:
for j in i:
print(j,end="")
break

Python: How to replace substring occurrences with next values from list?

I have the following string and list:
myString = "a:::b:::c:::d ..... " where ':::' is sort of delimiter
myList = [1,2,3,4.......]
I know how to cycle through list values, but how do i replace each occurrence of ':::' with next value in list, such that my final output is as follows:
myString = "a1b2c3d ...."
Quick & Dirty:
myString = "a:::b:::c:::d"
myList = [1,2,3,4]
s = iter(myString.split(":::"))
print(next(s) + "".join(str(y)+x for x,y in zip(s,myList)))
prints a1b2c3d
l = myString.split(":::")
result = "".join([x + str(y) for (x,y) in zip(l, myList)])
Here is how I solved it.
def join_string_and_list( string, list ):
string = string.split(":::")
for i in range(0, len(string)):
if string[i] != '' and len(list) > 0:
string.append("{}{}".format(string.pop(0),list.pop(0)))
return str("".join(string))
myString = "a:::b:::c:::d:::" #where ':::' is sort of delimiter
myList = [1,2,3,4]
print(join_string_and_list( myString, myList ))

Categories