I am trying to create a function that returns the similarity of words, but the loop stops after processing only the first argument! For example, if I execute example.py hello there the program returns this:
hello is close to:
held, heel, helpt, hele, Hallo, het, helaas, half, helden, heb, veel, Meld, zelf, heeft, beeld, alle, wel, Rel, Geld, cel, geld, Alle, hoezo,
there is close to:
Here is my code:
def create_data():
data =defaultdict(int)
value = 0
for line in sys.stdin:
[ident, user, text, terms] = line.rstrip().split('\t')
for word in terms.split():
data[word] = value
return data
def find_closest(word):
data = create_data()
data_with_distance= defaultdict(int)
for key in data:
distance = lev_dist(word, key)
data_with_distance[key] = distance
return {k: v for k, v in sorted(data_with_distance.items(), key=lambda item: item[1])}
def main():
if len(sys.argv) > 1:
for w in sys.argv[1:]:
print("\n",w, "is close to:\n")
closest = find_closest(w)
closest_words = [k for k, v in closest.items() if v < 4]
#minimal_distance = list(closest.values())[0]
for close in closest_words:
print(close, end=", ")
else:
sys.stderr.write("no argument\n")
if __name__ == '__main__':
main()
You need to cache the results of create_data if you want to reuse it:
def find_closest(word, data): # take data as param here
data_with_distance= defaultdict(int)
for key in data:
distance = lev_dist(word, key)
data_with_distance[key] = distance
return {k: v for k, v in sorted(data_with_distance.items(), key=lambda item: item[1])}
def main():
data = create_data() # load data from stdin ONCE
if len(sys.argv) > 1:
for w in sys.argv[1:]:
print("\n",w, "is close to:\n")
closest = find_closest(w, data) # pass data as param here
closest_words = [k for k, v in closest.items() if v < 4]
#minimal_distance = list(closest.values())[0]
for close in closest_words:
print(close, end=", ")
Another option would be to stick a caching decorator on create_data:
from functools import cache
#cache
def create_data():
data = defaultdict(int)
value = 0
for line in sys.stdin:
[ident, user, text, terms] = line.rstrip().split('\t')
for word in terms.split():
data[word] = value
return data
This "fixes" the function by making it cache the result from the first time you run it, and return the same result on subsequent calls instead of actually executing the function.
In a function that takes parameters, the caching would happen based on the parameters; since this function takes no parameters, it'll just cache a single return value. If the function had a desirable side effect you would not want to cache it like this, but in this case the side effect is undesirable so sticking a #cache on it is a very easy solution.
Related
I have a dictionary that is in the main function and I want to use it in another function to present the data in a tabular format.
I had created the dictionary in a function as follows:
def file_reader():
config_dict = {}
newDict = {}
configParser = configparser.ConfigParser()
configParser.read('config.ini')
for section in configParser.sections():
for k,v in configParser.items(section):
config_dict[k] = v
config_dict = dict(configParser.items('SectionTwo'))
rev_dict = dict(map(reversed, configParser.items('SectionOne')))
for v in rev_dict:
newDict[k] = rev_dict[v]
list_vals = list(config_dict.values())
list_keys = list(config_dict.keys())
return rev_dict, newDict
I then used the dictionary created in the above function in main function as follows:
def main():
rev_dict, newDict = file_reader()
parser = ap.ArgumentParser()
parser.add_argument('-s', '--start', help='start script', action='store_true')
args = parser.parse_args()
elif args.start:
for k,v in rev_dict.items():
print("\nTestcase:" + v + "\n");print(v, "=", k);print("\n");time.sleep(5);
proc = sp.call([k], shell=True);time.sleep(5);
print('Process ID is:', os.getpid())
if proc != 0:
if proc < 0:
print("\nKilled by signal!\n", -proc)
else:
print("\nFailed with return code: ", proc)
newDict[v] = 'Fail'
print(json.dumps(dic, indent=4, sort_keys=True))
else:
print("\nOK\n")
newDict[v] = 'Pass';
print(json.dumps(dic, indent=4, sort_keys=True))
sipResponse(args.ip)
I had then created a function called read_file() where I want to generate a report and use the updated dictionary named newDict from main function.
def read_file():
rev_dict = file_reader()
shutil.copy("logfile.log", "file.txt")
f = open("file.txt", "r+")
headers = ['Testcase', 'Path']
data = sorted([(k,v) for k,v in rev_dict.items()])
f.write(tabulate(data, headers=headers, tablefmt="grid"))
f.close()
sys.exit(0)
Can someone please guide?
You need to introduce an argument to the read_file function:
def read_file(new_dict):
Then in main when you want to call the new function, call read_file(newDict). Notice how the argument name in the function is new_dict, thus showing that the two names are actually different.
Thus main might look something like this, with other code removed for simplicity: just add a call to the other method.
def main():
rev_dict, newDict = file_reader()
read_file(newDict)
I have the following JSON file:
{
"add1":true,
"add2":false,
"add3":true
}
And the following Python programm:
def add1(n): return n+1
def add2(n): return n+2
def add3(n): return n+3
def score(n):
with open('file.json', 'r') as f:
conf = json.load(f)
for key, value in conf.items()
if value == True:
# add1 and add3 calls only because they're set to True
return (add1(n) + add3(n))
As written in the code I would like to call the functions only if there are set as True in the configuration JSON file. Is it the right way to proceed? Do existing tools ease this approach or do I have to write each case manually?
You can replace your for loop with a call to eval
def score(n):
with open('file.json', 'r') as file:
conf = json.load(file)
return sum(eval(f"{k}({n})") if v else 0 for k, v in conf.items())
This assumes that all entries in your json map to an existing function
You could do something like this:
import json
def add1(n): return n+1
def add2(n): return n+2
def add3(n): return n+3
defs = {
'add1' : add1,
'add2' : add2,
'add3' : add3
}
def score(n):
with open('file.json', 'r') as f:
conf = json.load(f)
return sum(function(n) for name, function in defs.items() if conf[name])
If you are ok with lambdas, you could even use:
defs = {
'add1' : lambda n: n+1,
'add2' : lambda n: n+2,
'add3' : lambda n: n+3
}
You should use eval: https://docs.python.org/3/library/functions.html#eval
def score(n):
with open('file.json', 'r') as f:
conf = json.load(f)
sum = 0
for key, value in conf.items():
if value == True:
sum += eval('{}({})'.format(key, n))
return sum
#Sayse had a good method although I'd change the sum line so that we don't need to add 0 for when the value is false.
return sum(eval(f"{key}({n})") for key, value in conf.items() if value)
Or the other way to call a function named by a string is
return sum(globals()[k](n) for k,v in conf.items() if v)
I'm not certain if best practices in Python favour one version or the other. The results are identical.
Note that you are trusting the data to specify the functions called by your program. Incorrect or malicious data could cause indeterminate and harmful behaviour, so you might want to check the keys to make sure they are all specifying one of the functions that you intend to use in this way.
I've spent many hours searching through all the "my function returns none" and "nested dict search" messages and none apply specifically nor do any resolve my issue.
I made a function to search a nested dictionary and return the path. This works great! I can print the results from within the function but the return immediately below the print returns None. Maybe I've been looking at it too long and it probably is right in front of my face but I'm just not seeing what's wrong here. Here's my complete code:
def search(v, searchterm, vid, path=(),):
if isinstance(v, dict):
for k, v2 in v.items():
p2 = path + ('{}'.format(k),)
search(v2, searchterm, vid, p2)
else:
if searchterm in v:
a = {}
a[0] = path
a[1] = v[vid]
print(a)
return(a)
def main():
mydata = {}
mydata[1] = {}
mydata[1][1] = 'data-1-1','reason-1-1','notes-1-1'
mydata[1][2] = 'data-1-2','reason-1-2','notes-1-2'
mydata[1][3] = 'data-1-3','reason-1-3','notes-1-3'
mydata[1][4] = 'data-1-4','reason-1-4','notes-1-4'
mydata[1][5] = 'data-1-5','reason-1-5','notes-1-5'
mydata[1][6] = 'data-1-6','reason-1-6','notes-1-6'
mydata[1][7] = 'data-1-7','reason-1-7','notes-1-7'
mydata[1][8] = 'data-1-8','reason-1-8','notes-1-8'
mydata[1][9] = 'data-1-9','reason-1-9','notes-1-9'
mydata[1][10] = 'data-1-10','reason-1-10','notes-1-10'
mydata[2] = {}
mydata[2][1] = 'data-2-1','reason-2-1','notes-2-1'
mydata[2][2] = 'data-2-2','reason-2-2','notes-2-2'
mydata[2][3] = 'data-2-3','reason-2-3','notes-2-3'
mydata[2][4] = 'data-2-4','reason-2-4','notes-2-4'
mydata[2][5] = 'data-2-5','reason-2-5','notes-2-5'
mydata[2][6] = 'data-2-6','reason-2-6','notes-2-6'
mydata[2][7] = 'data-2-7','reason-2-7','notes-2-7'
mydata[2][8] = 'data-2-8','reason-2-8','notes-2-8'
mydata[2][9] = 'data-2-9','reason-2-9','notes-2-9'
mydata[2][10] = 'data-2-10','reason-2-10','notes-2-10'
b = search(mydata,'reason-2-4', 2)
print(b)
if __name__ == '__main__':
main()
The results:
{0: ('2', '4'), 1: 'notes-2-4'}
None
You can see the print works great from within the function but the return and print from main returns None. I've been programming in Python for a few years now with many functions, classes and methods with returns written but this one has me stuck.
You're making a recursive call and the print statement is issued in the nested call. However, the return value of search is not used, that's why it never reaches the main function.
Below, I have added a nested variable that is checked, and if anything was found that is actually returned.
def search(v, searchterm, vid, path=(),):
if isinstance(v, dict):
for k, v2 in v.items():
p2 = path + ('{}'.format(k),)
nested = search(v2, searchterm, vid, p2)
if nested:
# before, nothing was ever returned here
return nested
else:
if searchterm in v:
a = {}
a[0] = path
a[1] = v[vid]
print(a)
return(a)
Not related, but in here you could make great use of the powerful dict literals of python
mydata = {
1: {
1: ('data-1-1', 'reason-1-1', 'notes-1-1'),
2: ('data-1-2', 'reason-1-2', 'notes-1-2')
2: {
1: ('data-2-1', 'reason-2-1', 'notes-2-1'),
2: ('data-2-2', 'reason-2-2', 'notes-2-2')
}
Also, if all your dictionary keys are int, you might as well use a list.
Out of the 3 paths in search(), only 1 returns something:
def search(v, searchterm, vid, path=(),):
if isinstance(v, dict):
# Path 1
for k, v2 in v.items():
p2 = path + ('{}'.format(k),)
search(v2, searchterm, vid, p2)
# Nothing returned here
else:
if searchterm in v:
# Path 2
a = {}
a[0] = path
a[1] = v[vid]
print(a)
return(a)
else:
# Path 3
# Nothing returned here
Path 1 calls Path 2 which explains why there is something printed but not returned to main().
Here is the code i wrote to compare the list of values associated with each key to all other keys in the dictionary... But it is taking a hell lot of time more some 10000 records in the csv file..Can any body help to optimize the code to execute in minimal time.. Don't worry about the external function call,it works fine.
import csv
import sys
file = sys.argv[1]
with open(file, 'rU') as inf:
csvreader=csv.DictReader(inf,delimiter=',')
result={}
temp = []
#Creating Dict
for r in csvreader:
name=[]
name.append(r['FIRST_NAME'])
name.append(r['LAST_NAME'])
name.append(r['ID'])
result.setdefault(r['GROUP_KEY'],[]).append(name)
#Processing the Dict
for key1 in result.keys():
temp.append(key1)
for key2 in result.keys():
if key1 != key2 and key2 not in ex:
for v1 in result[key1]:
for v2 in result[key2]:
score=name_match_score(v1,'',v2,'')[0] ####calling external function
if score > 0.90:
print v1[2],v2[2],score
Something like this will help. The goal is to reduce the number of raw calculations done in name_match_score by skipping redundant calculations and by caching calculations performed.
First, make your dictionary store a defaultdict of lists of tuples. Tuples are immutable so they can be used as keys in sets and dicts below.
from collections import defaultdict
import csv
import sys
file = sys.argv[1]
with open(file, 'rU') as inf:
csvreader=csv.DictReader(inf, delimiter=',')
result = defaultdict(list)
for r in csvreader:
name = (r['FIRST_NAME'], r['LAST_NAME'], r['ID'])
result[r['GROUP_KEY']].append(name)
Then, sort your keys to ensure you evaluate a pair of keys only once.
keys = sorted(result)
for i, key1 in enumerate(keys):
for key2 in keys[i+1:]:
And order v1 and v2 so that they form a unique key. This will help with caching.
for v1 in result[key1]:
for v2 in result[key2]:
v1, v2 = (min(v1, v2), max(v1, v2))
score=name_match_score(v1, v2)[0] ####calling external function
if score > 0.90:
print v1[2],v2[2],score
Then use a memoizing decorator to cache calculations:
class memoized(object):
'''Decorator. Caches a function's return value each time it is called.
If called later with the same arguments, the cached value is returned
(not reevaluated).
'''
def __init__(self, func):
self.func = func
self.cache = {}
def __call__(self, *args):
if not isinstance(args, collections.Hashable):
# uncacheable. a list, for instance.
# better to not cache than blow up.
return self.func(*args)
if args in self.cache:
return self.cache[args]
else:
value = self.func(*args)
self.cache[args] = value
return value
def __repr__(self):
'''Return the function's docstring.'''
return self.func.__doc__
def __get__(self, obj, objtype):
'''Support instance methods.'''
return functools.partial(self.__call__, obj)
And change name_match_score to use the decorator:
#memoized
def name_match_score(v1, v2):
# Whatever this does
return (0.75, )
This should minimize the number of raw calculations inside name_match_score that you do.
I have the input file :
sun vehicle
one number
two number
reduce command
one speed
five speed
zero speed
speed command
kmh command
I used the following code:
from collections import OrderedDict
output = OrderedDict()
with open('final') as in_file:
for line in in_file:
columns = line.split(' ')
if len(columns) >= 2:
word,tag = line.strip().split()
if output.has_key(tag) == False:
output[tag] = [];
output[tag].append(word)
else:
print ""
for k, v in output.items():
print '<{}> {} </{}>'.format(k, ' '.join(v), k)
output = OrderedDict()
I am getting the output as:
<vehicle> sun </vehicle>
<number> one two </number>
<command> reduce speed kmh </command>
<speed> one five zero </speed>
But my expected output should be:
<vehicle> sun </vehicle>
<number> one two </number>
<command> reduce
<speed> one five zero </speed>
speed kmh </command>
Can someone help me in solving this?
It looks like the output you want to achieve is underspecified!
You presumably want the code to "know in advance" that speed is a part of command, before you get to the line speed command.
To do what you want, you will need a recursive function.
How about
for k, v in output.items():
print expandElements(k, v,output)
and somewhere you define
def expandElements(k,v, dic):
out = '<' +k + '>'
for i in v:
# check each item of v for matches in dic.
# if no match, then out=out+i
# otherwise expand using a recursive call of expandElements()
# and out=out+expandElements
out = out + '<' +k + '>'
It looks like you want some kind of tree structure for your output?
You are printing out with print '<{}> {} </{}>'.format(k, ' '.join(v), k) so all of your output is going to have the form of '<{}> {} </{}>'.
If you want to nest things you are going to need a nested structure to represent them.
For recursivly parsing the input file I would make a class representing the tag. Each tag can have its children. Every children is first a string added manually with tag.children.append("value") or by calling tag.add_value(tag.name, "value").
class Tag:
def __init__(self, name, parent=None):
self.name = name
self.children = []
self.has_root = True
self.parent = parent
def __str__(self):
""" compose string for this tag (recursivly) """
if not self.children:
return self.name
children_str = ' '.join([str(child) for child in self.children])
if not self.parent:
return children_str
return '<%s>%s</%s>' % (self.name, children_str, self.name)
#classmethod
def from_file(cls, file):
""" create root tag from file """
obj = cls('root')
columns = []
with open(file) as in_file:
for line in in_file:
value, tag = line.strip().split(' ')
obj.add_tag(tag, value)
return obj
def search_tag(self, tag):
""" search for a tag in the children """
if self.name == tag:
return self
for i, c in enumerate(self.children):
if isinstance(c, Tag) and c.name == tag:
return c
elif isinstance(c, str):
if c.strip() == tag.strip():
self.children[i] = Tag(tag, self)
return self.children[i]
else:
result = c.search_tag(tag)
if result:
return result
def add_tag(self, tag, value):
"""
add a value, tag pair to the children
Firstly this searches if the value is an child. If this is the
case it moves the children to the new location
Afterwards it searches the tag in the children. When found
the value is added to this tag. If not a new tag object
is created and added to this Tag. The flag has_root
is set to False so the element can be moved later.
"""
value_tag = self.search_tag(value)
if value_tag and not value_tag.has_root:
print("Found value: %s" % value)
if value_tag.parent:
i = value_tag.parent.children.index(value_tag)
value = value_tag.parent.children.pop(i)
value.has_root = True
else:
print("not %s" % value)
found = self.search_tag(tag)
if found:
found.children.append(value)
else:
# no root
tag_obj = Tag(tag, self)
self.children.append(tag_obj)
tag_obj.add_tag(tag, value)
tag_obj.has_root = False
tags = Tag.from_file('final')
print(tags)
I know in this example the speed-Tag is not added twice. I hope that's ok.
Sorry for the long code.