Replace a pattern in python - python

How to replace the pattern in the string with
decoded_str=" Name(++info++)Age(++info++)Adress of the emp(++info++)"
The first pattern "(++info++)" needs to replaced with (++info a++)
The second pattern "(++info++)" needs to replaced with (++info b++)
The third pattern "(++info++)" needs to replaced with (++info c++)
If there many more then it should be replaced accordingly

This should be simple enough:
for character in range(ord('a'), ord('z')):
if "(++info++)" not in decoded_str:
break
decoded_str = decoded_str.replace("(++info++)", "(++info {0}++)".format(chr(character)), 1)
print decoded_str
It has the added benefit of stopping at 'z'. If you want to wrap around:
import itertools
for character in itertools.cycle(range(ord('a'), ord('z'))):
if "(++info++)" not in decoded_str:
break
decoded_str = decoded_str.replace("(++info++)", "(++info {0}++)".format(chr(character)), 1)
print decoded_str
And just for fun, a one-liner, and O(n):
dstr = "".join(x + "(++info {0}++)".format(chr(y)) for x, y in zip(dstr.split("(++info++)"), range(ord('a'), ord('z'))))[:-len("(++info a++)")]

import string
decoded_str = " Name(++info++)Age(++info++)Adress of the emp(++info++)"
s = decoded_str.replace('++info++', '++info %s++')
s % tuple(i for i in string.ascii_lowercase[:s.count('%s')])

Here is a rather ugly yet pragmatic solution:
import string
decoded_str = " Name(++info++)Age(++info++)Adress of the emp(++info++)"
letters = list(string.lowercase)
token = "(++info++)"
rep_token = "(++info %s++)"
i = 0
while (token in decoded_str):
decoded_str = decoded_str.replace(token, rep_token % letters[i], 1)
i += 1
print decoded_str

>>> import re
>>> rx = re.compile(r'\(\+\+info\+\+\)')
>>> s = "Name(++info++)Age(++info++)Adress of the emp(++info++)"
>>> atoz = iter("abcdefghijklmnopqrstuvwxyz")
>>> rx.sub(lambda m: '(++info ' + next(atoz) + '++)', s)
'Name(++info a++)Age(++info b++)Adress of the emp(++info c++)'

Here's a quick hack to do it:
string=" Name(++info++)Age(++info++)Adress of the emp(++info++)"
def doit(s):
import string
allTheLetters = list(string.lowercase)
i=0
s2 = s.replace("++info++","++info "+allTheLetters[i]+"++",1)
while (s2!=s):
s=s2
i=i+1
s2 = s.replace("++info++","++info "+allTheLetters[i]+"++",1)
return s
Note that performance is probably not very great.

import re, string
decoded_str=" Name(++info++)Age(++info++)Adress of the emp(++info++)"
sub_func=('(++info %s++)'%c for c in '.'+string.ascii_lowercase).send
sub_func(None)
print re.sub('\(\+\+info\+\+\)', sub_func, decoded_str)

from itertools import izip
import string
decoded_str=" Name(++info++)Age(++info++)Adress of the emp(++info++)"
parts = iter(decoded_str.split("(++info++)"))
first_part = next(parts)
tags = iter(string.ascii_lowercase)
encoded_str=first_part+"".join("(++info %s++)%s"%x for x in izip(tags, parts))
print encoded_str

decoded_str=" Name(++info++)Age(++info++)Adress of the emp(++info++)"
import re
for i, f in enumerate(re.findall(r"\(\+\+info\+\+\)",decoded_str)):
decoded_str = re.sub(r"\(\+\+info\+\+\)","(++info %s++)"%chr(97+i),decoded_str,1)
print decoded_str

Related

String Operation on captured group in re Python

I have a string:
str1 = "abc = def"
I want to convert it to:
str2 = "abc = #Abc#"
I am trying this:
re.sub("(\w+) = (\w+)",r"\1 = %s" % ("#"+str(r"\1").title()+"#"),str1)
but it returns: (without the string operation done)
"abc = #abc#"
What is the possible reason .title() is not working.?
How to use string operation on the captured group in python?
You can see what's going on with the help of a little function:
import re
str1 = "abc = def"
def fun(m):
print("In fun(): " + m)
return m
str2 = re.sub(r"(\w+) = (\w+)",
r"\1 = %s" % ("#" + fun(r"\1") + "#"),
# ^^^^^^^^^^
str1)
Which yields
In fun(): \1
So what you are basically trying to do is to change \1 (not the substitute!) to an uppercase version which obviously remains \1 literally. The \1 is replaced only later with the captured content than your call to str.title().
Go with a lambda function as proposed by #Rakesh.
Try using lambda.
Ex:
import re
str1 = "abc = def"
print( re.sub("(?P<one>(\w+)) = (\w+)",lambda match: r'{0} = #{1}#'.format(match.group('one'), match.group('one').title()), str1) )
Output:
abc = #Abc#

Delete Specific Part Of A String

I want to delete the part after the last '/' of a string in this following way:
str = "live/1374385.jpg"
formated_str = "live/"
or
str = "live/examples/myfiles.png"
formated_str = "live/examples/"
I have tried this so far ( working )
import re
for i in re.findall('(.*?)/',str):
j += i
j += '/'
Output :
live/ or live/examples/
I am a beginner to python so just curious is there any other way to do that .
Use rsplit:
str = "live/1374385.jpg"
print (str.rsplit('/', 1)[0] + '/')
live/
str = "live/examples/myfiles.png"
print (str.rsplit('/', 1)[0] + '/')
live/examples/
You can also use .rindex string method:
s = 'live/examples/myfiles.png'
s[:s.rindex('/')+1]
#!/usr/bin/python
def removePart(_str1):
return "/".join(_str1.split("/")[:-1])+"/"
def main():
print removePart("live/1374385.jpg")
print removePart("live/examples/myfiles.png")
main()

Removing words with special characters "\" and "/"

During the analysis of tweets, I run in the "words" that have either \ or / (could have more than one appearance in one "word"). I would like to have such words removed completely but can not quite nail this
This is what I tried:
sen = 'this is \re\store and b\\fre'
sen1 = 'this i\s /re/store and b//fre/'
slash_back = r'(?:[\w_]+\\[\w_]+)'
slash_fwd = r'(?:[\w_]+/+[\w_]+)'
slash_all = r'(?<!\S)[a-z-]+(?=[,.!?:;]?(?!\S))'
strt = re.sub(slash_back,"",sen)
strt1 = re.sub(slash_fwd,"",sen1)
strt2 = re.sub(slash_all,"",sen1)
print strt
print strt1
print strt2
I would like to get:
this is and
this i\s and
this and
However, I receive:
and
this i\s / and /
i\s /re/store b//fre/
To add: in this scenario the "word" is a string separated either by spaces or punctuation signs (like a regular text)
How's this? I added some punctuation examples:
import re
sen = r'this is \re\store and b\\fre'
sen1 = r'this i\s /re/store and b//fre/'
sen2 = r'this is \re\store, and b\\fre!'
sen3 = r'this i\s /re/store, and b//fre/!'
slash_back = r'\s*(?:[\w_]*\\(?:[\w_]*\\)*[\w_]*)'
slash_fwd = r'\s*(?:[\w_]*/(?:[\w_]*/)*[\w_]*)'
slash_all = r'\s*(?:[\w_]*[/\\](?:[\w_]*[/\\])*[\w_]*)'
strt = re.sub(slash_back,"",sen)
strt1 = re.sub(slash_fwd,"",sen1)
strt2 = re.sub(slash_all,"",sen1)
strt3 = re.sub(slash_back,"",sen2)
strt4 = re.sub(slash_fwd,"",sen3)
strt5 = re.sub(slash_all,"",sen3)
print(strt)
print(strt1)
print(strt2)
print(strt3)
print(strt4)
print(strt5)
Output:
this is and
this i\s and
this and
this is, and!
this i\s, and!
this, and!
One way you could do it without re is with join and a comprehension.
sen = 'this is \re\store and b\\fre'
sen1 = 'this i\s /re/store and b//fre/'
remove_back = lambda s: ' '.join(i for i in s.split() if '\\' not in i)
remove_forward = lambda s: ' '.join(i for i in s.split() if '/' not in i)
>>> print(remove_back(sen))
this is and
>>> print(remove_forward(sen1))
this i\s and
>>> print(remove_back(remove_forward(sen1)))
this and

Search Patterns replacement using lambda

I need to write into a file with Before and after search replacement patterns. I have written the below code. I have used function in writing to output file and it worked fine. But i have around 20 such replacement patterns and i feel i am not writing a good code because i need to create functions for all those replacements. Can you please let me know is there any other way in implementing this?
import re
Report_file = open("report.txt", "w")
st = '''<TimeLog>
<InTime='10Azx'>1056789</InTime>
<OutTime='14crg'>1056867</OutTime>
<PsTime='32lxn'>1056935</PsTime>
<ClrTime='09zvf'>1057689</ClrTime>
</TimeLog>'''
def tcnv(str):
Report_file.write("Previous TS: " + str + "\n\n")
v1 = re.search(r"(?i)<clrtime='(\d+\w+)'>", str)
val1 = v1.group(1)
v2 = re.search(r"(?i)(<clrtime='(\d+\w+)'>(.*?)</clrtime>)", str)
val2 = v2.group(3)
soutval = "<Clzone><clnvl='" + val1 + "'>" + val2 + "</clnvl></Clzone>"
Report_file.write("New TS: " + soutval + "\n")
return soutval
st = re.sub(r"(?i)(<clrtime='(\d+\w+)'>(.*?)</clrtime>)", lambda m: tcnv(m.group(1)), st)
st = re.sub(r"(?i)<intime='(\d+\w+)'>(.*?)</intime>", "<Izone><Invl='\\1'>\\2</Invl></Izone>", st)
st = re.sub(r"(?i)<outtime='(\d+\w+)'>(.*?)</outtime>", "<Ozone><onvl='\\1'>\\2</onnvl></Ozone>", st)
st = re.sub(r"(?i)<pstime='(\d+\w+)'>(.*?)</pstime>", "<Pszone><psnvl='\\1'>\\2</psnvl
I didn't see why you put the re.IGNORECASE flag under the form of (?i), so I don't use it the following solution, and the pattern is written with the uppercased letters where necessary according to your sample
Note that you should use the with statement to open the files, it would be far better:
with open('filename.txt','rb') as f:
ch = f.read()
The answer
import re
st = '''<InTime='10Azx'>1056789</InTime>
<OutTime='14crg'>1056867</OutTime>
<PsTime='32lxn'>1056935</PsTime>
<ClrTime='09zvf'>1057689</ClrTime>
'''
d = dict(zip(('InTime','OutTime','PsTime','ClrTime'),
(('Izone><Invl','/Invl></Izone'),
('Ozone><onvl','/onnvl></Ozone'),
('Pszone><psnvl','/psnvl></Pszone'),
('Clzone><clnvl','/clnvl></Clzone'))
)
)
def ripl(ma,d=d):
return "<{}='{}'>{}<{}>".format(d[ma.group(1)][0],
ma.group(2),
ma.group(3),
d[ma.group(1)][1])
st2 = re.sub(r"<(InTime|OutTime|PsTime|ClrTime)='(\d+\w+)'>(.*?)</\1>",
ripl, st)
print '%s\n\n%s\n' % (st,st2)

String Conversion

s='This is sample'
i need to convert like this
s='"This is sample"'
output="This is sample"
how to do this in dynamic
Thanks in advance
orig = 'This is sample'
converted = '"%s"' % orig
>>> s= 'This is a sample'
>>> s = '"' + s + '"' # or s = '"%s"' % s
>>> s
'"This is a sample"'
>>> print(s)
"This is a sample"
>>>

Categories