s='This is sample'
i need to convert like this
s='"This is sample"'
output="This is sample"
how to do this in dynamic
Thanks in advance
orig = 'This is sample'
converted = '"%s"' % orig
>>> s= 'This is a sample'
>>> s = '"' + s + '"' # or s = '"%s"' % s
>>> s
'"This is a sample"'
>>> print(s)
"This is a sample"
>>>
Related
During the analysis of tweets, I run in the "words" that have either \ or / (could have more than one appearance in one "word"). I would like to have such words removed completely but can not quite nail this
This is what I tried:
sen = 'this is \re\store and b\\fre'
sen1 = 'this i\s /re/store and b//fre/'
slash_back = r'(?:[\w_]+\\[\w_]+)'
slash_fwd = r'(?:[\w_]+/+[\w_]+)'
slash_all = r'(?<!\S)[a-z-]+(?=[,.!?:;]?(?!\S))'
strt = re.sub(slash_back,"",sen)
strt1 = re.sub(slash_fwd,"",sen1)
strt2 = re.sub(slash_all,"",sen1)
print strt
print strt1
print strt2
I would like to get:
this is and
this i\s and
this and
However, I receive:
and
this i\s / and /
i\s /re/store b//fre/
To add: in this scenario the "word" is a string separated either by spaces or punctuation signs (like a regular text)
How's this? I added some punctuation examples:
import re
sen = r'this is \re\store and b\\fre'
sen1 = r'this i\s /re/store and b//fre/'
sen2 = r'this is \re\store, and b\\fre!'
sen3 = r'this i\s /re/store, and b//fre/!'
slash_back = r'\s*(?:[\w_]*\\(?:[\w_]*\\)*[\w_]*)'
slash_fwd = r'\s*(?:[\w_]*/(?:[\w_]*/)*[\w_]*)'
slash_all = r'\s*(?:[\w_]*[/\\](?:[\w_]*[/\\])*[\w_]*)'
strt = re.sub(slash_back,"",sen)
strt1 = re.sub(slash_fwd,"",sen1)
strt2 = re.sub(slash_all,"",sen1)
strt3 = re.sub(slash_back,"",sen2)
strt4 = re.sub(slash_fwd,"",sen3)
strt5 = re.sub(slash_all,"",sen3)
print(strt)
print(strt1)
print(strt2)
print(strt3)
print(strt4)
print(strt5)
Output:
this is and
this i\s and
this and
this is, and!
this i\s, and!
this, and!
One way you could do it without re is with join and a comprehension.
sen = 'this is \re\store and b\\fre'
sen1 = 'this i\s /re/store and b//fre/'
remove_back = lambda s: ' '.join(i for i in s.split() if '\\' not in i)
remove_forward = lambda s: ' '.join(i for i in s.split() if '/' not in i)
>>> print(remove_back(sen))
this is and
>>> print(remove_forward(sen1))
this i\s and
>>> print(remove_back(remove_forward(sen1)))
this and
I have some input data from ASCII files which uses double quote to encapsulate string as well as still use double quote inside those strings, for example:
"Reliable" "Africa" 567.87 "Bob" "" "" "" "S 05`56'21.844"" "No Shift"
Notice the double quote used in the coordinate.
So I have been using:
valList = shlex.split(line)
But shlex get's confused with the double quote used as the second in the coordinate.
I've been doing a find and replace on '\"\"' to '\\\"\"'. This of course turns an empty strings to \"" as well so I do a find and replace on (this time with spaces) ' \\\"\" ' to ' \"\"" '. Not exactly the most efficient way of doing it!
Any suggestions on handling this double quote in the coordinate?
I would do it this way:
I would treat this line of text as a csv file. Then according to RFC 4180 :
If double-quotes are used to enclose fields, then a double-quote
appearing inside a field must be escaped by preceding it with
another double quote. For example:
"aaa","b""bb","ccc"
Then all you would need to do is to add another " to your coordinates. So it would look like this "S 0556'21.844"""(NOTE extra quote) Then you can use a standartcsv` module to break it apart and extract necessary information.
>>> from StringIO import StringIO
>>> import csv
>>>
>>> test = '''"Reliable" "Africa" 567.87 "Bob" "" "" "" "S 05`56'21.844""" "No Shift"'''
>>> test_obj = StringIO(test)
>>> reader = csv.reader(test_obj, delimiter=' ', quotechar='"', quoting=csv.QUOTE_ALL)
>>> for i in reader:
... print i
...
The output would be :
['Reliable', 'Africa', '567.87', 'Bob', '', '', '', 'S 05`56\'21.844"', 'No Shift']
I'm not good with regexes, but this non-regex suggestion might help ...
INPUT = ('"Reliable" "Africa" 567.87 "Bob" "" "" "" "S 05`56'
"'"
'21.844"" "No Shift"')
def main(input):
output = input
surrounding_quote_symbol = '<!>'
if input.startswith('"'):
output = '%s%s' % (surrounding_quote_symbol, output[1:])
if input.endswith('"'):
output = '%s%s' % (output[:-1], surrounding_quote_symbol)
output = output.replace('" ', '%s ' % surrounding_quote_symbol)
output = output.replace(' "', ' %s' % surrounding_quote_symbol)
print "Stage 1:", output
output = output.replace('"', '\"')
output = output.replace(surrounding_quote_symbol, '"')
return output
if __name__ == "__main__":
output = main(INPUT)
print "End results:", output
Here is my situation, I have a string as follows
'a':1 'b':2 'c':3
I want to turn this to a dict, so I have two options:
Split the string by ' ' and then by ':' then put the pairs to a dict.
Replace ' ' with ',', append '{', and '}' to string and use eval() to get a dict.
So my question is which one is faster?
I would do it like this:
import ast
result = ast.literal_eval(''.join(["{", s.replace(" ", ", "), "}"]))
You can also do this (although the difference may be negligible):
import ast
result = ast.literal_eval("{" + s.replace(" ", ", ") + "}")
It's better to use ast.literal_eval as it's safer for this purpose than using eval().
Ok, here are all of them so far:
import ast, re, json
s = "'a':1 'b':2 'c':3"
def a_eval(s):
s = s.replace(' ', ',')
return eval('{%s}' % s)
def a_ast(s):
s = s.replace(' ', ',')
return ast.literal_eval('{%s}' % s)
def a_parse(s):
d = {}
for p in s.split():
k, v = p.split(':')
d[k.strip("'")] = int(v)
return d
def a_re(s):
d = []
re.sub(r"'(\w+)':(\w+)", lambda m: d.append(m.groups()), s)
return dict((a, int(b)) for a, b in d)
def a_json(s):
s = s.replace(' ', ',')
s = s.replace("'", '"')
return json.loads('{%s}' % s)
#eof
import timeit
setup = open(__file__).read().split('#eof')[0]
results = []
for p in dir():
if p.startswith('a_'):
results.append((timeit.timeit('%s(s)' % p, setup, number=10000), p))
for p in sorted(results):
print '%.4f %s' % p
Results:
0.0753 a_parse
0.1068 a_json
0.1455 a_re
0.2211 a_eval
0.3297 a_ast
And on longer strings, json is the winner:
long_s = ((s + ' ') * 100).strip()
for p in dir():
if p.startswith('a_'):
results.append((timeit.timeit('%s("%s")' % (p, long_s), setup, number=100), p))
for p in sorted(results):
print '%.4f %s' % p
Results:
0.0166 a_json
0.0528 a_parse
0.0565 a_re
0.0927 a_eval
0.1519 a_ast
json.loads is faster than ast.literal_eval:
$ python -m timeit -s "s = '\'a\':1 \'b\':2';s = '{' + s.replace(' ', ', ') + '
}'; import json" "json.loads(s.replace('\'', '\"'))"
100000 loops, best of 3: 5.11 usec per loop
$ python -m timeit -s "s = '\'a\':1 \'b\':2';s = '{' + s.replace(' ', ', ') + '
}'; import ast" "ast.literal_eval(s)"
100000 loops, best of 3: 19.4 usec per loop
I have this code:
import re
#TEST CASES
match_dict = ['hello(here)',
'Hello (Hi)',
"'dfsfds Hello (Hi) fdfd' Hello (Yes)",
"Hello ('hi)xx')",
"Hello ('Hi')"]
for s in match_dict:
print "INPUT: %s" % s
m = re.sub(r"(?<!\()'[^']+'", '', s, flags=re.M)
paren_quotes = re.findall(r"Hello\s*\('([^']+)'\)", m, flags=re.M)
output = paren_quotes if paren_quotes else []
m = re.sub(r"Hello\s*\('[^']+'\)", '', m, flags=re.M)
paren_matches = re.findall(r"Hello\s*\(([^)]+)\)", m, flags=re.M)
if paren_matches:
output.extend(paren_matches)
print 'OUTPUT: %s\n' % output
This code is made to output everything in the parentheses after the word 'Hello',
Hello (Hi) would give 'Hi'
My problem is that when I put in:
Hello('Hi')
...It still returns 'Hi' when I want it to return "'Hi'"
Does anyone know how could I fix this code?
Just use non-greedy matching:
matches = re.search(r'^Hello\s*\((.*?)\)', text)
>>> import re
>>> p = re.compile(r'Hello\s*\((.*?)\)', re.M)
>>> m = p.findall("Hello ('Hi')")
>>> print m
["'Hi'"]
>>> m = p.findall("'dfsfds Hello (Hi) fdfd' Hello (Yes)")
>>> print m
['Hi', 'Yes']
How to replace the pattern in the string with
decoded_str=" Name(++info++)Age(++info++)Adress of the emp(++info++)"
The first pattern "(++info++)" needs to replaced with (++info a++)
The second pattern "(++info++)" needs to replaced with (++info b++)
The third pattern "(++info++)" needs to replaced with (++info c++)
If there many more then it should be replaced accordingly
This should be simple enough:
for character in range(ord('a'), ord('z')):
if "(++info++)" not in decoded_str:
break
decoded_str = decoded_str.replace("(++info++)", "(++info {0}++)".format(chr(character)), 1)
print decoded_str
It has the added benefit of stopping at 'z'. If you want to wrap around:
import itertools
for character in itertools.cycle(range(ord('a'), ord('z'))):
if "(++info++)" not in decoded_str:
break
decoded_str = decoded_str.replace("(++info++)", "(++info {0}++)".format(chr(character)), 1)
print decoded_str
And just for fun, a one-liner, and O(n):
dstr = "".join(x + "(++info {0}++)".format(chr(y)) for x, y in zip(dstr.split("(++info++)"), range(ord('a'), ord('z'))))[:-len("(++info a++)")]
import string
decoded_str = " Name(++info++)Age(++info++)Adress of the emp(++info++)"
s = decoded_str.replace('++info++', '++info %s++')
s % tuple(i for i in string.ascii_lowercase[:s.count('%s')])
Here is a rather ugly yet pragmatic solution:
import string
decoded_str = " Name(++info++)Age(++info++)Adress of the emp(++info++)"
letters = list(string.lowercase)
token = "(++info++)"
rep_token = "(++info %s++)"
i = 0
while (token in decoded_str):
decoded_str = decoded_str.replace(token, rep_token % letters[i], 1)
i += 1
print decoded_str
>>> import re
>>> rx = re.compile(r'\(\+\+info\+\+\)')
>>> s = "Name(++info++)Age(++info++)Adress of the emp(++info++)"
>>> atoz = iter("abcdefghijklmnopqrstuvwxyz")
>>> rx.sub(lambda m: '(++info ' + next(atoz) + '++)', s)
'Name(++info a++)Age(++info b++)Adress of the emp(++info c++)'
Here's a quick hack to do it:
string=" Name(++info++)Age(++info++)Adress of the emp(++info++)"
def doit(s):
import string
allTheLetters = list(string.lowercase)
i=0
s2 = s.replace("++info++","++info "+allTheLetters[i]+"++",1)
while (s2!=s):
s=s2
i=i+1
s2 = s.replace("++info++","++info "+allTheLetters[i]+"++",1)
return s
Note that performance is probably not very great.
import re, string
decoded_str=" Name(++info++)Age(++info++)Adress of the emp(++info++)"
sub_func=('(++info %s++)'%c for c in '.'+string.ascii_lowercase).send
sub_func(None)
print re.sub('\(\+\+info\+\+\)', sub_func, decoded_str)
from itertools import izip
import string
decoded_str=" Name(++info++)Age(++info++)Adress of the emp(++info++)"
parts = iter(decoded_str.split("(++info++)"))
first_part = next(parts)
tags = iter(string.ascii_lowercase)
encoded_str=first_part+"".join("(++info %s++)%s"%x for x in izip(tags, parts))
print encoded_str
decoded_str=" Name(++info++)Age(++info++)Adress of the emp(++info++)"
import re
for i, f in enumerate(re.findall(r"\(\+\+info\+\+\)",decoded_str)):
decoded_str = re.sub(r"\(\+\+info\+\+\)","(++info %s++)"%chr(97+i),decoded_str,1)
print decoded_str