Whether eval() better than self-analysis? - python

Here is my situation, I have a string as follows
'a':1 'b':2 'c':3
I want to turn this to a dict, so I have two options:
Split the string by ' ' and then by ':' then put the pairs to a dict.
Replace ' ' with ',', append '{', and '}' to string and use eval() to get a dict.
So my question is which one is faster?

I would do it like this:
import ast
result = ast.literal_eval(''.join(["{", s.replace(" ", ", "), "}"]))
You can also do this (although the difference may be negligible):
import ast
result = ast.literal_eval("{" + s.replace(" ", ", ") + "}")
It's better to use ast.literal_eval as it's safer for this purpose than using eval().

Ok, here are all of them so far:
import ast, re, json
s = "'a':1 'b':2 'c':3"
def a_eval(s):
s = s.replace(' ', ',')
return eval('{%s}' % s)
def a_ast(s):
s = s.replace(' ', ',')
return ast.literal_eval('{%s}' % s)
def a_parse(s):
d = {}
for p in s.split():
k, v = p.split(':')
d[k.strip("'")] = int(v)
return d
def a_re(s):
d = []
re.sub(r"'(\w+)':(\w+)", lambda m: d.append(m.groups()), s)
return dict((a, int(b)) for a, b in d)
def a_json(s):
s = s.replace(' ', ',')
s = s.replace("'", '"')
return json.loads('{%s}' % s)
#eof
import timeit
setup = open(__file__).read().split('#eof')[0]
results = []
for p in dir():
if p.startswith('a_'):
results.append((timeit.timeit('%s(s)' % p, setup, number=10000), p))
for p in sorted(results):
print '%.4f %s' % p
Results:
0.0753 a_parse
0.1068 a_json
0.1455 a_re
0.2211 a_eval
0.3297 a_ast
And on longer strings, json is the winner:
long_s = ((s + ' ') * 100).strip()
for p in dir():
if p.startswith('a_'):
results.append((timeit.timeit('%s("%s")' % (p, long_s), setup, number=100), p))
for p in sorted(results):
print '%.4f %s' % p
Results:
0.0166 a_json
0.0528 a_parse
0.0565 a_re
0.0927 a_eval
0.1519 a_ast

json.loads is faster than ast.literal_eval:
$ python -m timeit -s "s = '\'a\':1 \'b\':2';s = '{' + s.replace(' ', ', ') + '
}'; import json" "json.loads(s.replace('\'', '\"'))"
100000 loops, best of 3: 5.11 usec per loop
$ python -m timeit -s "s = '\'a\':1 \'b\':2';s = '{' + s.replace(' ', ', ') + '
}'; import ast" "ast.literal_eval(s)"
100000 loops, best of 3: 19.4 usec per loop

Related

concatinating multiple strings from dictionary and save in file using python

I able to write hostname in the /tmp/filter.log but any hint how can i write all three values[hostname, owner, seats] in the file?
def list_hosts(nc):
resp = nc.send_service_request('ListHosts', json.dumps({}))
result = resp['result']
l = []
f=open("/tmp/filter.log", "w+")
for r in result:
if "team-prod" in r['owner']:
print r['owner'], r['hostname'], r['seats']
f.write(r['hostname'] + "\n")
f.close()
l.append(r['hostname'])
return l
nc = create_client('zone', 'team_PROD_USERNAME', 'team_PROD_PASSWORD')
l = list_hosts(nc)
print l
The file should have entries as below:
team-prod\*, np-team-052, [u'123123123-18d1-483d-9af8-169ac66b26e4']
Current entry is:
np-team-052
f.write(str(r['owner']) + ', ' + str(r['hostname']) + ', ' + str(r['seats']) + '\n')

Removing words with special characters "\" and "/"

During the analysis of tweets, I run in the "words" that have either \ or / (could have more than one appearance in one "word"). I would like to have such words removed completely but can not quite nail this
This is what I tried:
sen = 'this is \re\store and b\\fre'
sen1 = 'this i\s /re/store and b//fre/'
slash_back = r'(?:[\w_]+\\[\w_]+)'
slash_fwd = r'(?:[\w_]+/+[\w_]+)'
slash_all = r'(?<!\S)[a-z-]+(?=[,.!?:;]?(?!\S))'
strt = re.sub(slash_back,"",sen)
strt1 = re.sub(slash_fwd,"",sen1)
strt2 = re.sub(slash_all,"",sen1)
print strt
print strt1
print strt2
I would like to get:
this is and
this i\s and
this and
However, I receive:
and
this i\s / and /
i\s /re/store b//fre/
To add: in this scenario the "word" is a string separated either by spaces or punctuation signs (like a regular text)
How's this? I added some punctuation examples:
import re
sen = r'this is \re\store and b\\fre'
sen1 = r'this i\s /re/store and b//fre/'
sen2 = r'this is \re\store, and b\\fre!'
sen3 = r'this i\s /re/store, and b//fre/!'
slash_back = r'\s*(?:[\w_]*\\(?:[\w_]*\\)*[\w_]*)'
slash_fwd = r'\s*(?:[\w_]*/(?:[\w_]*/)*[\w_]*)'
slash_all = r'\s*(?:[\w_]*[/\\](?:[\w_]*[/\\])*[\w_]*)'
strt = re.sub(slash_back,"",sen)
strt1 = re.sub(slash_fwd,"",sen1)
strt2 = re.sub(slash_all,"",sen1)
strt3 = re.sub(slash_back,"",sen2)
strt4 = re.sub(slash_fwd,"",sen3)
strt5 = re.sub(slash_all,"",sen3)
print(strt)
print(strt1)
print(strt2)
print(strt3)
print(strt4)
print(strt5)
Output:
this is and
this i\s and
this and
this is, and!
this i\s, and!
this, and!
One way you could do it without re is with join and a comprehension.
sen = 'this is \re\store and b\\fre'
sen1 = 'this i\s /re/store and b//fre/'
remove_back = lambda s: ' '.join(i for i in s.split() if '\\' not in i)
remove_forward = lambda s: ' '.join(i for i in s.split() if '/' not in i)
>>> print(remove_back(sen))
this is and
>>> print(remove_forward(sen1))
this i\s and
>>> print(remove_back(remove_forward(sen1)))
this and

remove similar lines in text file

I am not using Python but I have script in python:
part of script
elif line.find("CONECT") > -1:
con = line.split()
line_value = line_value + 1
#print line_value
#print con[2]
try:
line_j = "e" + ', ' + str(line_value) + ', ' + con[2] + "\n"
output_file.write(line_j)
print(line_j)
line_i = "e" + ', ' + str(line_value) + ', ' + con[3] + "\n"
output_file.write(line_i)
print(line_i)
line_k = "e"+ ', ' + str(line_value) + ', ' + con[4] + "\n"
print(line_k)
output_file.write(line_k)
except IndexError:
continue
which give .txt output in format
e, 1, 2
e, 1, 3
e, 1, 4
e, 2, 1
e, 2, 3
etc.
I need remove similar lines with the same numbers, but no matter on order this numbers
i.e. line e, 2, 1..
Is it possible?
Of course, it is better to modify your code to remove that lines BEFORE you're writing them to file. You can use a list to store already saved values, and on each itereation, perfom a search if the values you're want to add is already exists in that list. The code below isn't tested and optimized, but it explains an idea:
# 'added = []' should be placed somewhere before 'if'
added = []
# you part of code
elif line.find("CONECT") > -1:
con = line.split()
line_value = line_value + 1
try:
line_j = "e, %s, %s\n" % (str(line_value),con[2])
tmp = sorted((str(line_value),con[2]))
if tmp not in added:
added.append(tmp)
output_file.write(line_j)
print(line_j)
line_i = "e, %s, %s\n" % (str(line_value),con[3])
tmp = sorted((str(line_value),con[3]))
if tmp not in added:
added.append(tmp)
output_file.write(line_i)
print(line_i)
line_k = "e, %s, %s\n" % (str(line_value),con[4])
tmp = sorted((str(line_value),con[4]))
if tmp not in added:
added.append(tmp)
print(line_k)
output_file.write(line_k)
except IndexError:
continue
Here is a comparison method for two lines of your file:
def compare(line1, line2):
els1 = line1.strip().split(', ')
els2 = line2.strip().split(', ')
return Counter(els1) == Counter(els2)
See the documentation for the Counter class.
If the count of elements doesn't matter you can replace the Counter class with set instead
The following approach should work. First add the following line further up in your code:
seen = set()
Then replace everything inside the try with the following code:
for con_value in con[2:5]:
entry = frozenset((line_value, con_value))
if entry not in seen:
seen.append(entry)
line_j = "e" + ', ' + str(line_value) + ', ' + con_value + "\n"
output_file.write(line_j)
print(line_j)
Make sure this code is indented to the same level as the code it replaces.

Replace a pattern in python

How to replace the pattern in the string with
decoded_str=" Name(++info++)Age(++info++)Adress of the emp(++info++)"
The first pattern "(++info++)" needs to replaced with (++info a++)
The second pattern "(++info++)" needs to replaced with (++info b++)
The third pattern "(++info++)" needs to replaced with (++info c++)
If there many more then it should be replaced accordingly
This should be simple enough:
for character in range(ord('a'), ord('z')):
if "(++info++)" not in decoded_str:
break
decoded_str = decoded_str.replace("(++info++)", "(++info {0}++)".format(chr(character)), 1)
print decoded_str
It has the added benefit of stopping at 'z'. If you want to wrap around:
import itertools
for character in itertools.cycle(range(ord('a'), ord('z'))):
if "(++info++)" not in decoded_str:
break
decoded_str = decoded_str.replace("(++info++)", "(++info {0}++)".format(chr(character)), 1)
print decoded_str
And just for fun, a one-liner, and O(n):
dstr = "".join(x + "(++info {0}++)".format(chr(y)) for x, y in zip(dstr.split("(++info++)"), range(ord('a'), ord('z'))))[:-len("(++info a++)")]
import string
decoded_str = " Name(++info++)Age(++info++)Adress of the emp(++info++)"
s = decoded_str.replace('++info++', '++info %s++')
s % tuple(i for i in string.ascii_lowercase[:s.count('%s')])
Here is a rather ugly yet pragmatic solution:
import string
decoded_str = " Name(++info++)Age(++info++)Adress of the emp(++info++)"
letters = list(string.lowercase)
token = "(++info++)"
rep_token = "(++info %s++)"
i = 0
while (token in decoded_str):
decoded_str = decoded_str.replace(token, rep_token % letters[i], 1)
i += 1
print decoded_str
>>> import re
>>> rx = re.compile(r'\(\+\+info\+\+\)')
>>> s = "Name(++info++)Age(++info++)Adress of the emp(++info++)"
>>> atoz = iter("abcdefghijklmnopqrstuvwxyz")
>>> rx.sub(lambda m: '(++info ' + next(atoz) + '++)', s)
'Name(++info a++)Age(++info b++)Adress of the emp(++info c++)'
Here's a quick hack to do it:
string=" Name(++info++)Age(++info++)Adress of the emp(++info++)"
def doit(s):
import string
allTheLetters = list(string.lowercase)
i=0
s2 = s.replace("++info++","++info "+allTheLetters[i]+"++",1)
while (s2!=s):
s=s2
i=i+1
s2 = s.replace("++info++","++info "+allTheLetters[i]+"++",1)
return s
Note that performance is probably not very great.
import re, string
decoded_str=" Name(++info++)Age(++info++)Adress of the emp(++info++)"
sub_func=('(++info %s++)'%c for c in '.'+string.ascii_lowercase).send
sub_func(None)
print re.sub('\(\+\+info\+\+\)', sub_func, decoded_str)
from itertools import izip
import string
decoded_str=" Name(++info++)Age(++info++)Adress of the emp(++info++)"
parts = iter(decoded_str.split("(++info++)"))
first_part = next(parts)
tags = iter(string.ascii_lowercase)
encoded_str=first_part+"".join("(++info %s++)%s"%x for x in izip(tags, parts))
print encoded_str
decoded_str=" Name(++info++)Age(++info++)Adress of the emp(++info++)"
import re
for i, f in enumerate(re.findall(r"\(\+\+info\+\+\)",decoded_str)):
decoded_str = re.sub(r"\(\+\+info\+\+\)","(++info %s++)"%chr(97+i),decoded_str,1)
print decoded_str

String Conversion

s='This is sample'
i need to convert like this
s='"This is sample"'
output="This is sample"
how to do this in dynamic
Thanks in advance
orig = 'This is sample'
converted = '"%s"' % orig
>>> s= 'This is a sample'
>>> s = '"' + s + '"' # or s = '"%s"' % s
>>> s
'"This is a sample"'
>>> print(s)
"This is a sample"
>>>

Categories