Python String Converter; how do I convert a string in python - python

Suppose there is a string as ("Hello! Python is a great language!") then how would I convert Hello! into hijkefghlmnolmnoopqr! If h=hijk e=efgh l=lmno o=opqrs and so for rest of the string?

subs = {}
alpha = 'abcdefghijklmnopqrstuvwxyzabc'
for i in range(26):
subs[alpha[i]] = alpha[i:i+4]
s = "Hello! Python is a great language!"
s = ''.join(subs.get(c, c) for c in s.lower())

LETTERS = "abcdefghijklmnopqrstuvwxyzabc"
def translate_char(ch):
i = LETTERS.find(ch.lower())
if i == -1:
return ch
else:
return LETTERS[i:i + 4]
def translate_string(s):
return "".join(translate_char(ch) for ch in s)
print(translate_string("Hello!")) # => hijkefghlmnolmnoopqr!

Related

Function return 'None' Python

Hi i am learning python on my own.
Task:
Reverse word without affecting special characters
Example "abcd efgh" => "dcba hgfe"
Example "a1bcd efg!h" => "d1cba hgf!e"
My problem: the function return None
Then i added this line:
return reverse_text but it still return None
Can anyone show me where is my mistake is, please?
My code:
from string import punctuation
from string import digits
def reverse_text(str_smpl):
sp = set.union(set(punctuation), set(digits))
reverse_text.lst = []
for word in str_smpl.split(' '):
letters = [c for c in word if c not in sp]
for c in word:
if c not in sp:
reverse_text.lst.append(letters.pop())
continue
else:
reverse_text.lst.append(c)
reverse_text.lst.append(' ')
return reverse_text
if __name__ == '__main__':
cases = [
("abcd efgh", "dcba hgfe"),
("a1bcd efg!h", "d1cba hgf!e"),
("", "")
]
for text, reversed_text in cases:
assert reverse_text(str_smpl) == reversed_text
reverse_text(input('Input string '))
print("".join(reverse_text.lst))
The issue is that you are returning reverse_text which is the name of the function, so the function is returning a reference to itself (not what you want!).
Assigning properties to functions like you have with reverse_text.lst is not something I have really come across in Python and I would suggest you just use a new local variable named something like reversed_text_list to avoid confusion.
I think you also want to join the characters in the list together and return a string.
The following seems to be doing what I think you are trying to do:
def reverse_text(str_smpl):
sp = set.union(set(punctuation), set(digits))
reversed_text_list = []
for word in str_smpl.split(' '):
letters = [c for c in word if c not in sp]
for c in word:
if c not in sp:
reversed_text_list.append(letters.pop())
continue
else:
reversed_text_list.append(c)
reversed_text_list.append(' ')
reversed_text = ''.join(reversed_text_list)
return reversed_text
It returned error because you had defined reverse_text.lst but returned only reverse_text, the following code will work:-
from string import punctuation
from string import digits
def reverse_text(str_smpl):
sp = set.union(set(punctuation), set(digits))
lst = []
for word in str_smpl.split(' '):
letters = [c for c in word if c not in sp]
for c in word:
if c not in sp:
lst.append(letters.pop())
continue
else:
lst.append(c)
lst.append(' ')
return "".join(lst[:len(lst)-1])
if __name__ == '__main__':
cases = [
("abcd efgh", "dcba hgfe"),
("a1bcd efg!h", "d1cba hgf!e"),
("", "")
]
for text, reversed_text in cases:
assert reverse_text(text) == reversed_text
print(reverse_text(input('Input string ')))

Python Regex matching wrong strings

I have the following python script which does regex matching using 'AND', 'OR' features as well:
class PyBoolReException(Exception):
def __init__(self, value):
self.value = value
def __str__(self):
return str(self.value)
class PyBoolRe:
def __init__(self, boolstr):
# Require whitespace before words?
self.__needspace = True
# whitespace re
self._wspre = re.compile('^\s*$')
# create regexp string
self.__rexplist = []
oparct = boolstr.count('(')
clparct = boolstr.count(')')
if oparct != clparct:
raise PyBoolReException, 'Mismatched parantheses!'
self.__parse(boolstr)
# if NOT is one of the members, reverse
# the list
# print self.__rexplist
if '!' in self.__rexplist:
self.__rexplist.reverse()
s = self.__makerexp(self.__rexplist)
# print s
self.__rexp = re.compile(s)
def match(self, data):
""" Match the boolean expression, behaviour
is same as the 'match' method of re """
return self.__rexp.match(data)
def search(self, data):
""" Search the boolean expression, behaviour
is same as the 'search' method of re """
return self.__rexp.search(data)
def __parse(self, s):
""" Parse the boolean regular expression string
and create the regexp list """
# The string is a nested parantheses with
# any character in between the parens.
scopy = s[:]
oparmatch, clparmatch = False, False
# Look for a NOT expression
index = scopy.rfind('(')
l = []
if index != -1:
oparmatch = True
index2 = scopy.find(')', index)
if index2 != -1:
clparmatch = True
newstr = scopy[index+1:index2]
# if the string is only of whitespace chars, skip it
if not self._wspre.match(newstr):
self.__rexplist.append(newstr)
replacestr = '(' + newstr + ')'
scopy = scopy.replace(replacestr, '')
self.__parse(scopy)
if not clparmatch and not oparmatch:
if scopy: self.__rexplist.append(scopy)
def is_inbetween(self, l, elem):
""" Find out if an element is in between
in a list """
index = l.index(elem)
if index == 0:
return False
if index>2:
if index in range(1, len(l) -1):
return True
else:
return False
else:
return True
def __makenotexpr(self, s):
""" Make a NOT expression """
if s.find('!') == 0:
return ''.join(('(?!', s[1:], ')'))
else:
return s
def __makerexp(self, rexplist):
""" Make the regular expression string for
the boolean match from the nested list """
is_list = True
if type(rexplist) is str:
is_list = False
elem = rexplist
elif type(rexplist) is list:
elem = rexplist[0]
if type(elem) is list:
elem = elem[0]
eor = False
if not is_list or len(rexplist) == 1:
eor = True
word_str = '.*'
s=''
# Implementing NOT
if elem == '!':
return ''.join(('(?!', self.__makerexp(rexplist[1:]), ')'))
# Implementing OR
elif elem.find(' | ') != -1:
listofors = elem.split(' | ')
for o in listofors:
index = listofors.index(o)
in_bet = self.is_inbetween(listofors, o)
if o:
o = self.__makenotexpr(o)
if in_bet:
s = ''.join((s, '|', word_str, o, '.*'))
else:
s = ''.join((s, word_str, o, '.*'))
# Implementing AND
elif elem.find(' & ') != -1:
listofands = elem.split(' & ')
for a in listofands:
index = listofands.index(a)
in_bet = self.is_inbetween(listofands, a)
if a:
a = self.__makenotexpr(a)
s = ''.join((s, word_str, a, '.*'))
else:
if elem:
elem = self.__makenotexpr(elem)
s = ''.join((elem, '.*'))
if eor:
return s
else:
return ''.join((s, self.__makerexp(rexplist[1:])))
When the search phrase is as follows:
p = PyBoolRe('Python | Perl')
s1 = 'Guido invented Python'
s2 = 'Guido Perl'
if p.match(s1):
print 'Match found for first string'
else:
print 'No match found for first string'
if p.match(s2):
print 'Match found for second string'
else:
print 'No match found for second string'
Then both s1 & s2 match
But when the search phrase is:
p = PyBoolRe('Guido & (Python | Perl)')
s1 = 'Guido invented Python'
s2 = 'Guido Perl is great'
Then it should match if s1 or s2 has "Guido Python" or "Guido Perl". s2 has that but it does not match it. On the other hand, it matches s1, which it should not. Why is that?
Please help!! How can I get it to work??
Your generated expression is
.*Python.*|.*Perl.*.*Guido.*
while it should look like
(?=.*Guido.*)(?:.*Python.*|.*Perl.*)
So the parser needs some revision.
1) x|y should be enclosed into (?:...) (at least when used inside another block). Otherwise, | unluckily takes the global priority in the regexp.
2) x & y should be converted into (?=x)y (trailing context may be used to express the and between regular expressions)

Find, replace and print out updated string

I am trying to create a python script that has 10-20 lines of fixed data in a string with 2 special character's that need to be replaced with a different, random string using randomword()
import random, string
def randomword(length):
letters = string.ascii_lowercase
return ''.join(random.choice(letters) for i in range(length))
junk = """
random_string1 = {};
random_string2 = {};
random_string3 = {};
random_string4 = {};
random_string5 = {};
"""
stra = string.replace(junk, '{}', randomword(40))
print (stra)
The two special characters in the string are {}, I would like to iterate through the string to find those characters and replace them with a different random string generated by randomword()
Above is as far as I got, this piece of code replaces all of the occurrences
of {} with a random string, but they have the same values, I would like to have differing values for each {}.
I don't know how to put this into a loop. Any help is appreciated.
use randint(97, 122)
import random
def randomword(length):
s = ""
for _ in range(length):
a = random.randint(97, 122)
s += chr(a)
return s
def replace(original_string, string_to_replace):
result_string = ""
while original_string.find(string_to_replace) >= 0:
pos = original_string.find(string_to_replace)
result_string += original_string[0: pos]
result_string += randomword(40)
next_pos = pos+2
original_string = original_string[next_pos:]
result_string += original_string[:]
return result_string
junk = """
random_string1 = {};
random_string2 = {};
random_string3 = {};
random_string4 = {};
random_string5 = {};
"""
stra = replace(junk, "{}")
print (stra)
Since you are already using the default placeholder for string formatting '{}', you can do:
>>> print(junk.format(*(randomword(40) for _ in range(junk.count('{}')))))
random_string1 = lazbdzezssemtsfknajawriafozpjwizigykvmac;
random_string2 = pxhkyrnjiqsvcivcppqqpbwuocsvbfauygdrwpuj;
random_string3 = ewhrsryjtfwtmulmqfqxzrzvyspiefrddpzrxkvq;
random_string4 = sqiulddoevddtieymjiexnmzezrdayvwigmsmgld;
random_string5 = evscqvrccknkulpkqchodcjlognsnrcxqcsexnrv;
Could use regex:
>>> print(re.sub('{}', lambda _: randomword(40), junk))
random_string1 = emgdidmmghkunqkwdfcnvsffdnfhvqrybhqdfklh;
random_string2 = mgggwchtzpuhntnfxzpsmkarkwnlghcwicnvxmpt;
random_string3 = liemdzegmzfpuozktclxnmfmavjkaxqhrfaldqkn;
random_string4 = ugchyijmsvzmeaiyzyaiudrnnfskzdboukvdwiye;
random_string5 = rfazvtvhygfixielqzbuhlnzvjjtrkhsdetomjri;

How to ensure all string literals are unicode in python

I have a fairly large python code base to go through. It's got an issue where some string literals are strings and others are unicode. And this causes bugs. I am trying to convert everything to unicode. I was wondering if there is a tool that can convert all literals to unicode. I.e. if it found something like this:
print "result code %d" % result['code']
to:
print u"result code %d" % result[u'code']
If it helps I use PyCharm (in case there is an extension that does this), however I am would be happy to use a command like too as well. Hopefully such a tool exists.
You can use tokenize.generate_tokens break the string representation of Python code into tokens. tokenize also classifies the tokens for you. Thus you can identify string literals in Python code.
It is then not hard to manipulate the tokens, adding 'u' where desired:
import tokenize
import token
import io
import collections
class Token(collections.namedtuple('Token', 'num val start end line')):
#property
def name(self):
return token.tok_name[self.num]
def change_str_to_unicode(text):
result = text.splitlines()
# Insert a dummy line into result so indexing result
# matches tokenize's 1-based indexing
result.insert(0, '')
changes = []
for tok in tokenize.generate_tokens(io.BytesIO(text).readline):
tok = Token(*tok)
if tok.name == 'STRING' and not tok.val.startswith('u'):
changes.append(tok.start)
for linenum, s in reversed(changes):
line = result[linenum]
result[linenum] = line[:s] + 'u' + line[s:]
return '\n'.join(result[1:])
text = '''print "result code %d" % result['code']
# doesn't touch 'strings' in comments
'handles multilines' + \
'okay'
u'Unicode is not touched'
'''
print(change_str_to_unicode(text))
yields
print u"result code %d" % result[u'code']
# doesn't touch 'strings' in comments
u'handles multilines' + u'okay'
u'Unicode is not touched'
Try this (uses regex), and it's shorter than #unutbu's solution.
But there's s loop hole, the strings containing # won't work with this.
import re
scode = '''
print "'Hello World'" # prints 'Hello World'
u'Unicode is unchanged'"""
# so are "comments"'''
x1 = re.compile('''(?P<unicode>u?)(?P<c>'|")(?P<data>.*?)(?P=c)''')
def repl(m):
return "u%(c)s%(data)s%(c)s" % m.groupdict()
fcode = '\n'.join(
[re.sub(x1,repl,i)
if not '#' in i
else re.sub(x1,repl,i[:i.find('#')])+i[i.find('#'):]
for i in scode.splitlines()])
print fcode
Outputs:
print u"'Hello World'" # prints 'Hello World'
u'Unicode is unchanged'
# so are "comments"
For # I have this (and it's longer than #unutbu's solution :| )
import re
scode = '''print "'Hello World'" # prints 'Hello World'
u'Unicode is unchanged'
# so are "comments"
'#### Hi' # 'Hi' '''
x1 = re.compile('''(?P<unicode>u?)(?P<c>'|")(?P<data>.*?)(?P=c)''')
def in_string(text,index):
curr,in_l,in_str,level = '',0,False,[]
for c in text[:index+1]:
if c == '"' or c == "'":
if in_str and curr == c:
instr = False
curr = ''
in_l -= 1
else:
instr = True
curr = c
in_l += 1
level.append(in_l)
return bool(level[index])
def repl(m):
return "u%(c)s%(data)s%(c)s" % m.groupdict()
def handle_hashes(i):
if i.count('#') == 1:
n = i.find('#')
else:
n = get_hash_out_of_string(i)
return re.sub(x1,repl,i[:n]) + i[n:]
def get_hash_out_of_string(i):
n = i.find('#')
curr = i[:]
last = (len(i)-1)-''.join(list(reversed(i))).find('#')
while in_string(curr,n) and n < last:
curr = curr[:n]+' '+curr[n+1:]
n = curr.find('#')
return n
fcode = '\n'.join(
[re.sub(x1,repl,i)
if not '#' in i
else handle_hashes(i)
for i in scode.splitlines()])
print fcode
Output:
print u"'Hello World'" # prints 'Hello World'
u'Unicode is unchanged'
# so are "comments"
u'#### Hi' # 'Hi'

regex to replace regex

I have this regex for getting strings in Python code:
x1 = re.compile('''((?P<unicode>u?)(?P<c1>'|")(?P<data>.+?)(?P<c2>'|"))''')
I want to extract the data and c1,c2 parts of this regex to make a replace string (if c1 == c2)
Something like:
repl = "u<c1><data><c2>"
How can I do this??
Is that possible in one line or by using re.sub?
UPDATE:
My new code:
x1 = re.compile('''(?P<unicode>u?)(?P<c>'|")(?P<data>.*?)(?P=c)''')
def repl(match):
if '#' in match.string:
### Confused
return "u%(c)s%(data)s%(c)s" % m.groupdict()
fcode = '\n'.join([re.sub(x1,repl,i) for i in scode.splitlines()])
Here, I am having problems to determine how to not change strings in comments, what do I have to do to ignore the comments??
Say you have a pattern:
pattern = r'''(?P<unicode>u?)(?P<c>'|")(?P<data>.*?)(?P=c)''' # did a little tweak
Match a string:
m = re.search(pattern, "print('hello')")
What you got:
>>> m.groups()
('', '"', 'hello')
>>> m.groupdict()
{'c': '"', 'unicode': '', 'data': 'hello'}
Now you can do whatever you want with these:
>>> 'u{c}{data}{c}'.format_map(m.groupdict())
'u"hello"'
Maybe you are using Python 2.x:
>>> 'u{c}{data}{c}'.format(**m.groupdict())
'u"hello"'
Or even you like old %
>>> "u%(c)s%(data)s%(c)s" % m.groupdict()
'u"hello"'
Edited:
The regex solution can't handle some situations correctly.
So I used a 2to3 hack(it's actually 3to2, and still can't solve everything):
cd /usr/lib/python3.3/lib2to3/fixes/
cp fix_unicode.py fix_unicode33.py
Edit fix_unicode33.py
-_literal_re = re.compile(r"[uU][rR]?[\'\"]")
+_literal_re = re.compile(r"[rR]?[\'\"]")
-class FixUnicode(fixer_base.BaseFix):
+class FixUnicode33(fixer_base.BaseFix):
- new.value = new.value[1:]
+ new.value = 'u' + new.value
Now 2to3 --list | grep unicode33 should output unicode33
Then you can run 2to3 -f unicode33 py3files.py.
Remember to remove fix_unicode33.py after
NOTE: In Python3 ur"string" throws SyntaxError. The logic here is simple, modify it to reach your goal.
The long code I ended up with.
x1 = re.compile('''(?P<unicode>u?)(?P<c>'|")(?P<data>.*?)(?P=c)''')
def in_string(text,index):
curr,in_l,in_str,level = '',0,False,[]
for c in text[:index+1]:
if c == '"' or c == "'":
if in_str and curr == c:
instr = False
curr = ''
in_l -= 1
else:
instr = True
curr = c
in_l += 1
level.append(in_l)
return bool(level[index])
def repl(m):
return "u%(c)s%(data)s%(c)s" % m.groupdict()
def handle_hashes(i):
if i.count('#') == 1:
n = i.find('#')
else:
n = get_hash_out_of_string(i)
return re.sub(x1,repl,i[:n]) + i[n:]
def get_hash_out_of_string(i):
n = i.find('#')
curr = i[:]
last = (len(i)-1)-''.join(list(reversed(i))).find('#')
while in_string(curr,n) and n < last:
curr = curr[:n]+' '+curr[n+1:]
n = curr.find('#')
return n

Categories