How to inject characters into a string? - python

I have a function in which I need to inject a character before every odd index of a string.
def inject(s, i):
sent = str(s)
new = []
for w in sent:
if w == ' ':
new.append(w)
elif (sent.index(w) % 2 == 1):
new.append(str(i) + w)
else:
new.append(w)
return ''.join(new)
The function works for:
inject(1339, 3)
output: '1333339'
and works for
inject('This is a test', 'x')
output: 'Txhixs ixs a txexst'
but won't work for:
inject('Hello World', 'x')
output: 'Hxello World'
I know it has something to do with it breaking on the 'll' but can't figure out how to fix it.
Thanks!

I think you meant to do this:
def inject(s, i):
sent = str(s)
new = []
for idx,w in enumerate(sent):
if w == ' ':
new.append(w)
elif idx % 2 == 1:
new.append(str(i) + w)
else:
new.append(w)
return ''.join(new)
for t in [1339, 'This is a test', 'Hello World']:
print(t, inject(t,'x'))
When using the enumerate() you get the index of each character without having to search for it.
The sent.index(w) call just doesn't do what you want it to even though it might seem like it does when there are no repeated characters in a string.

Related

Replace a single character in a string

I am trying to make a function that automatically generated a response to a selection of an action in a text adventure game. My problem is that I have to replace every second '_' with ' '. However I have tried everything I have though of and whenever I google the question the only solution I get is to use .replace(). However .replace() replaces every instance of that character. Here is my code, could you please fix this for me and explain how you fixed it.
example_actions = ['[1] Search desk', '[2] Search Cupboard', '[3] Search yard'
def response(avaliable_actions):
for i in avaliable_actions:
print(i, end=' ')
x = avaliable_actions.index(i)
avaliable_actions[x] = avaliable_actions[x][4:]
avaliable_actions = ' '.join(avaliable_actions)
avaliable_actions = avaliable_actions.lower()
avaliable_actions = avaliable_actions.replace(' ', '_')
avaliable_actions = list(avaliable_actions)
count = 0
for i in avaliable_actions:
if count == 2:
count = 0
index = avaliable_actions.index(i)
avaliable_actions[index] = ' '
elif i == '_':
count += 1
avaliable_actions = ' '.join(avaliable_actions)
print('\n\n' + str(avaliable_actions)) #error checking
Here's one approach:
s = 'here_is_an_example_of_a_sentence'
tokens = s.split('_')
result = ' '.join('_'.join(tokens[i:i+2]) for i in range(0,len(tokens),2))
print(result)
The result:
here_is an_example of_a sentence
Did I understand you correct, that you wanna produce something like this?
this_is_a_test -> this is_a test or this_is a_test?
If so, adapt the following for your needs:
s = "this_is_just_a_test"
def replace_every_nth_char(string, char, replace, n):
parts = string.split(char)
result = ""
for i, part in enumerate(parts):
result += part
if i % n == 0:
result += replace
else:
result += char
return ''.join(result)
res = replace_every_nth_char(s, "_", " ", 2)
print(s, "->", res)
# "this_is_just_a_test" -> "this is_just a_test"

Python - Remove white space using only loops

I want to remove extra spaces in a string using only for/while loops, and if statements; NO split/replace/join.
like this:
mystring = 'Here is some text I wrote '
while ' ' in mystring:
mystring = mystring.replace(' ', ' ')
print(mystring)
output:
Here is some text I wrote
Here's what I tried. Unfortunately, it doesn't quite work.
def cleanupstring(S):
lasti = ""
result = ""
for i in S:
if lasti == " " and i == " ":
i = ""
lasti = i
result += i
print(result)
cleanupstring("Hello my name is joe")
output:
Hello my name is joe
My attempt doesn't remove all the extra spaces.
Change your code to this:
for i in S:
if lasti == " " and i == " ":
i = ""
else:
lasti = i
result += i
print(result)
Check that the current character and the next one are spaces, and if not, add them to a clean string. There really is no need for an and in this case, since we are comparing to the same value
def cleanWhiteSpaces(str):
clean = ""
for i in range(len(str)):
if not str[i]==" "==str[i-1]:
clean += str[i]
return clean
Uses the end of result in place of lasti:
def cleanupstring(S):
result = S[0]
for i in S[1:]:
if not (result[-1] == " " and i == " "):
result += i
print(result)
cleanupstring("Hello my name is joe")
Just try this
t = "Hello my name is joe"
" ".join(t.split())
this will output
"Hello my name is joe"

I can't return a value

I'm trying to write a function that will translate the input into so-called "cow Latin." I want to return the values from the if statement but whenever I do I get a syntax error. I can print the value but I want to avoid the function returning None as well.
def cow_latinify_sentence(sentence):
vowels = tuple('aeiou1234567890!##$%^&*()-_=+|\\][}{?/.\',><`~"')
sentence = sentence.lower()
sentence_list = sentence.split()
for i in range(len(sentence_list)):
cow_word = sentence_list[i][:]
if cow_word.startswith(vowels):
print('{0}moo'.format(cow_word), end=' ')
else:
cow_word = sentence_list[i][1:] + sentence_list[i][:1]
print('{0}oo'.format(cow_word), end=' ')
cow_latin = cow_latinify_sentence("the quick red fox")
print(cow_latin)
In short, how can I get the function to return instead of print?
def cow_latinify_sentence(sentence):
vowels = tuple('aeiou1234567890!##$%^&*()-_=+|\\][}{?/.\',><`~"')
sentence = sentence.lower()
sentence_list = sentence.split()
result = ''
for i in range(len(sentence_list)):
cow_word = sentence_list[i][:]
if cow_word.startswith(vowels):
result += ('{0}moo'.format(cow_word) + ' ')
else:
result += '{0}oo'.format(sentence_list[i][1:] + sentence_list[i][:1]) + ' '
return result.strip()
>>> cow_latinify_sentence('hello there i am a fish')
'ellohoo heretoo imoo ammoo amoo ishfoo'
Why not just replace the two instances of
print('{0}moo'.format(cow_word), end=' ')
with
return '{0}moo'.format(cow_word)+' '
You have to get rid of end=; you don't have to replace the newline that would otherwise follow the output of print, but if you want a space at the end of the returned string you still have to append it yourself.
You need to create a list to accumulate your results.
result = []
your two print statements in your function would need changed to result.append(XXXX). Then when you have processed the entire sentence you can
return (result)
or, to re-form it into a sentence:
return " ".join(result) + '.'
def cow_latinify_sentence(sentence):
vowels = tuple('aeiou1234567890!##$%^&*()-_=+|\\][}{?/.\',><`~"')
sentence = sentence.lower()
sentence_list = sentence.split()
result = ''
for i in range(len(sentence_list)):
cow_word = sentence_list[i][:]
if cow_word.startswith(vowels):
result += '{0}moo'.format(cow_word) + ' '
else:
result += '{0}oo'.format(sentence_list[i][1:] + sentence_list[i][:1]) + ' '
return result.strip()

How to ensure all string literals are unicode in python

I have a fairly large python code base to go through. It's got an issue where some string literals are strings and others are unicode. And this causes bugs. I am trying to convert everything to unicode. I was wondering if there is a tool that can convert all literals to unicode. I.e. if it found something like this:
print "result code %d" % result['code']
to:
print u"result code %d" % result[u'code']
If it helps I use PyCharm (in case there is an extension that does this), however I am would be happy to use a command like too as well. Hopefully such a tool exists.
You can use tokenize.generate_tokens break the string representation of Python code into tokens. tokenize also classifies the tokens for you. Thus you can identify string literals in Python code.
It is then not hard to manipulate the tokens, adding 'u' where desired:
import tokenize
import token
import io
import collections
class Token(collections.namedtuple('Token', 'num val start end line')):
#property
def name(self):
return token.tok_name[self.num]
def change_str_to_unicode(text):
result = text.splitlines()
# Insert a dummy line into result so indexing result
# matches tokenize's 1-based indexing
result.insert(0, '')
changes = []
for tok in tokenize.generate_tokens(io.BytesIO(text).readline):
tok = Token(*tok)
if tok.name == 'STRING' and not tok.val.startswith('u'):
changes.append(tok.start)
for linenum, s in reversed(changes):
line = result[linenum]
result[linenum] = line[:s] + 'u' + line[s:]
return '\n'.join(result[1:])
text = '''print "result code %d" % result['code']
# doesn't touch 'strings' in comments
'handles multilines' + \
'okay'
u'Unicode is not touched'
'''
print(change_str_to_unicode(text))
yields
print u"result code %d" % result[u'code']
# doesn't touch 'strings' in comments
u'handles multilines' + u'okay'
u'Unicode is not touched'
Try this (uses regex), and it's shorter than #unutbu's solution.
But there's s loop hole, the strings containing # won't work with this.
import re
scode = '''
print "'Hello World'" # prints 'Hello World'
u'Unicode is unchanged'"""
# so are "comments"'''
x1 = re.compile('''(?P<unicode>u?)(?P<c>'|")(?P<data>.*?)(?P=c)''')
def repl(m):
return "u%(c)s%(data)s%(c)s" % m.groupdict()
fcode = '\n'.join(
[re.sub(x1,repl,i)
if not '#' in i
else re.sub(x1,repl,i[:i.find('#')])+i[i.find('#'):]
for i in scode.splitlines()])
print fcode
Outputs:
print u"'Hello World'" # prints 'Hello World'
u'Unicode is unchanged'
# so are "comments"
For # I have this (and it's longer than #unutbu's solution :| )
import re
scode = '''print "'Hello World'" # prints 'Hello World'
u'Unicode is unchanged'
# so are "comments"
'#### Hi' # 'Hi' '''
x1 = re.compile('''(?P<unicode>u?)(?P<c>'|")(?P<data>.*?)(?P=c)''')
def in_string(text,index):
curr,in_l,in_str,level = '',0,False,[]
for c in text[:index+1]:
if c == '"' or c == "'":
if in_str and curr == c:
instr = False
curr = ''
in_l -= 1
else:
instr = True
curr = c
in_l += 1
level.append(in_l)
return bool(level[index])
def repl(m):
return "u%(c)s%(data)s%(c)s" % m.groupdict()
def handle_hashes(i):
if i.count('#') == 1:
n = i.find('#')
else:
n = get_hash_out_of_string(i)
return re.sub(x1,repl,i[:n]) + i[n:]
def get_hash_out_of_string(i):
n = i.find('#')
curr = i[:]
last = (len(i)-1)-''.join(list(reversed(i))).find('#')
while in_string(curr,n) and n < last:
curr = curr[:n]+' '+curr[n+1:]
n = curr.find('#')
return n
fcode = '\n'.join(
[re.sub(x1,repl,i)
if not '#' in i
else handle_hashes(i)
for i in scode.splitlines()])
print fcode
Output:
print u"'Hello World'" # prints 'Hello World'
u'Unicode is unchanged'
# so are "comments"
u'#### Hi' # 'Hi'

regex to replace regex

I have this regex for getting strings in Python code:
x1 = re.compile('''((?P<unicode>u?)(?P<c1>'|")(?P<data>.+?)(?P<c2>'|"))''')
I want to extract the data and c1,c2 parts of this regex to make a replace string (if c1 == c2)
Something like:
repl = "u<c1><data><c2>"
How can I do this??
Is that possible in one line or by using re.sub?
UPDATE:
My new code:
x1 = re.compile('''(?P<unicode>u?)(?P<c>'|")(?P<data>.*?)(?P=c)''')
def repl(match):
if '#' in match.string:
### Confused
return "u%(c)s%(data)s%(c)s" % m.groupdict()
fcode = '\n'.join([re.sub(x1,repl,i) for i in scode.splitlines()])
Here, I am having problems to determine how to not change strings in comments, what do I have to do to ignore the comments??
Say you have a pattern:
pattern = r'''(?P<unicode>u?)(?P<c>'|")(?P<data>.*?)(?P=c)''' # did a little tweak
Match a string:
m = re.search(pattern, "print('hello')")
What you got:
>>> m.groups()
('', '"', 'hello')
>>> m.groupdict()
{'c': '"', 'unicode': '', 'data': 'hello'}
Now you can do whatever you want with these:
>>> 'u{c}{data}{c}'.format_map(m.groupdict())
'u"hello"'
Maybe you are using Python 2.x:
>>> 'u{c}{data}{c}'.format(**m.groupdict())
'u"hello"'
Or even you like old %
>>> "u%(c)s%(data)s%(c)s" % m.groupdict()
'u"hello"'
Edited:
The regex solution can't handle some situations correctly.
So I used a 2to3 hack(it's actually 3to2, and still can't solve everything):
cd /usr/lib/python3.3/lib2to3/fixes/
cp fix_unicode.py fix_unicode33.py
Edit fix_unicode33.py
-_literal_re = re.compile(r"[uU][rR]?[\'\"]")
+_literal_re = re.compile(r"[rR]?[\'\"]")
-class FixUnicode(fixer_base.BaseFix):
+class FixUnicode33(fixer_base.BaseFix):
- new.value = new.value[1:]
+ new.value = 'u' + new.value
Now 2to3 --list | grep unicode33 should output unicode33
Then you can run 2to3 -f unicode33 py3files.py.
Remember to remove fix_unicode33.py after
NOTE: In Python3 ur"string" throws SyntaxError. The logic here is simple, modify it to reach your goal.
The long code I ended up with.
x1 = re.compile('''(?P<unicode>u?)(?P<c>'|")(?P<data>.*?)(?P=c)''')
def in_string(text,index):
curr,in_l,in_str,level = '',0,False,[]
for c in text[:index+1]:
if c == '"' or c == "'":
if in_str and curr == c:
instr = False
curr = ''
in_l -= 1
else:
instr = True
curr = c
in_l += 1
level.append(in_l)
return bool(level[index])
def repl(m):
return "u%(c)s%(data)s%(c)s" % m.groupdict()
def handle_hashes(i):
if i.count('#') == 1:
n = i.find('#')
else:
n = get_hash_out_of_string(i)
return re.sub(x1,repl,i[:n]) + i[n:]
def get_hash_out_of_string(i):
n = i.find('#')
curr = i[:]
last = (len(i)-1)-''.join(list(reversed(i))).find('#')
while in_string(curr,n) and n < last:
curr = curr[:n]+' '+curr[n+1:]
n = curr.find('#')
return n

Categories