is it possible to use a lambda as a dictionary default? - python

I'm trying to keep a dictionary of open files for splitting data into individual files. When I request a file from the dictionary I would like it to be opened if the key isn't there. However, it doesn't look like I can use a lambda as a default.
e.g.
files = {}
for row in data:
f = files.get(row.field1, lambda: open(row.field1, 'w'))
f.write('stuff...')
This doesn't work because f is set to the function, rather than it's result. setdefault using the syntax above doesn't work either. Is there anything I can do besides this:
f = files.get(row.field1)
if not f:
f = files[row.field1] = open(row.field1, 'w')

This use case is too complex for a defaultdict, which is why I don't believe that something like this exists in the Python stdlib. You can however easily write a generic "extended" defaultdict yourself, which passes the missing key to the callback:
from collections import defaultdict
class BetterDefaultDict(defaultdict):
def __missing__(self, key):
return self.setdefault(key, self.default_factory(key))
Usage:
>>> files = BetterDefaultDict(lambda key: open(key, 'w'))
>>> files['/tmp/test.py']
<open file '/tmp/test.py', mode 'w' at 0x7ff552ad6db0>
This works in Python 2.7+, don't know about older versions :) Also, don't forget to close those files again:
finally:
for f in files.values(): f.close()

You could wrap the get-and-open in a class object's __getitem__() pretty easily - something like:
class FileCache(object):
def __init__(self):
self.map = {}
def __getitem__(self,key):
if key not in self.map:
self.map[key] = open(key,'w')
return self.map.key

Another option for a subclass that should do what you need:
class LambdaDefaultDict(dict):
def get(self, key, default=None):
try:
return self[key]
except KeyError:
return default()
def setdefault(self, key, default=None):
if not self.has_key(key):
self[key] = default() if default else None
return self[key]
Or, perhaps more general - to allow defaults that are values or callable expressions:
class CallableDefaultDict(dict):
def get(self, key, default=None):
try:
return self[key]
except KeyError:
return default() if callable(default) else default
def setdefault(self, key, default=None):
if not self.has_key(key):
self[key] = default() if callable(default) else default
return self[key]

You can use defaultdict from the collections module
class FileCache(collections.defaultdict):
def __missing__(self, key):
fo = open(key, 'w')
self[key] = fo
return fo
Though it might be better to just do
files = {}
def get_file(f):
fo = files.get(f)
if fo is None:
fo = open(f, 'w')
files[f] = fo
return fo
for row in data:
f = get_file(row.field1)
f.write('stuff...')

This is the exact reason why dict[key] syntax raises KeyError:
files = {}
for row in data:
f = files.get(row.field1, lambda: open(row.field1, 'w'))
f.write('stuff...')
should become:
files = {}
for row in data:
try: f = files[row.field1]
except KeyError: f = open(row.field1, 'w')
f.write('stuff...')

Related

print key into txt file

So I am working on a script to generate serialnumbers for a product. I want to make a txt file where I the script prints the generated key. somehow it cant print in there but I don't know what I need to changes about it.
key = Key('aaaa-bbbb-cccc-dddd-1111')
fh = open('key.txt')
fh.write(Key)
Ok, based on your response, I've mocked up the Key class as follows. Without more information, it's not possible to give you a definitive answer, but hopefully this helps!
class Key:
def __init__(self, serial):
self.serial = serial
def process_serial(self):
# Your processing here
...
return processed_serial # This should be a string
Then to write to file, you can do:
key = Key('aaaa-bbbb-cccc-dddd-1111')
with open('key.txt', 'w') as f:
f.write(key.process_serial())
Alternatively, you can add a __str__ method to your class, which will specify what happens when you call the Python builtin str on your object.
class Key:
def __init__(self, serial):
self.serial = serial
def __str__(self):
out = ... # construct what you want to write to file
return out
Giving:
key = Key('aaaa-bbbb-cccc-dddd-1111')
with open('key.txt', 'w') as f:
f.write(str(key))
You might also consider adding this as a method to your Key class
class Key:
__init__(self, serial):
self.serial = serial
def process_serial(self):
# Your processing here
...
return processed_serial # This should be a string
def write(self, file_name):
with open(file_name, 'w') as f:
f.write(self.process_serial)
Try:
key = "Key('aaaa-bbbb-cccc-dddd-1111')"
fh = open('key.txt', "w")
fh.write(key)
To generate a text file that doesn't already exist you need to use "w" .
Try doing:
key = Key('aaaa-bbbb-cccc-dddd-1111')
with open('key.txt', 'w') as fh:
fh.write(key)
Hope that Helps!
Note: it must be in the with ... so it writes, if its not there the file is considered as closed.

How to add parameters to an object using variables

I am trying to put some variables into a container (object) using a loop.
I created the following class
class Box():
pass
now i want to initialize the class and add variables from my textfile to it. My textfile looks like:
a = 1
b = 2
c = 3
d = 4
I tried following code
vars = Box()
filename = ('inputfile.txt')
with open (filename) as f:
for line in f:
parts = line.split()
var_name = str(parts[0])
var_value = parts[2]
vars.var_name = var_value
I can't find out why this approach doesn't work.
I think what you are trying to do is use a class as a storage medium for data. As asongtoruin already said, you could use a dictionary.
Otherwise you need to make the class actually something:
class Box():
def __init__(self):
pass
def __getitem__(self, name):
return getattr(self, name)
def __setitem__(self, name, value):
return setattr(self, name, value)
Then you can use almost all of your code as it was:
vars = Box()
filename = ('inputfile.txt')
with open (filename) as f:
for line in f:
parts = line.split()
var_name = str(parts[0])
var_value = parts[2]
vars[var_name] = var_value
print(vars.a)
Why what you had didn't work was already explained by barak manos: vars.var_name does not use the value of the variable var_name, because the syntax calling a class method does not support this notation (for good reason).
When you do
vars.var_name = var_value
it's equal to adding the key 'var_name' to the dict vars.__dict__.
I think what you want can be achieved using setattr.
instead of vars.var_name = var_value, use:
setattr(vars, var_name, var_value)
Maybe try something like
vars[var_name + ''] = var_value
(No python code)
Why don't you use setattr() ?
setattr(Box, "var_name", var_value)

How to load into Globals in Python from a function

I have a function which load data into a dictionnary.
But, How can I load the dictionnary into Globals() inside a function.
Inside a function is important since we can do it easily outside on a script side.
def load237(filename):
filename = osp.abspath(filename)
old_cwd = os.getcwdu()
os.chdir(osp.dirname(filename))
error_message = None
try:
tar = tarfile.open(filename, "r")
tar.extractall()
pickle_filename = osp.splitext(filename)[0]+'.pickle'
data = cPickle.load(file(pickle_filename))
saved_arrays = {}
if load_array is not None:
try:
saved_arrays = data.pop('__saved_arrays__')
for (name, index), fname in saved_arrays.iteritems():
arr = np.load( osp.join(osp.dirname(filename), fname) )
if index is None:
data[name] = arr
elif isinstance(data[name], dict):
data[name][index] = arr
else:
data[name].insert(index, arr)
except KeyError:
pass
for fname in [pickle_filename]+[fn for fn in saved_arrays.itervalues()]:
os.remove(fname)
except (EOFError, ValueError), error:
error_message = unicode(error)
os.chdir(old_cwd)
return data, error_message
This one does not work (globals is local to the module/function...)
def load_inmemory(fpath):
globals().update(load237(fpath)[0])
You should really be storing those names on an object stored in a global and not as global variables. But you asked how to do it and so here is how:
Using Getting corresponding module from function with a for loop and setattr as modules do not support dictionary operations and it is possible to write the function as:
import sys
def load_inmemory():
module = sys.modules[load_inmemory.__module__]
for k, v in load237(fpath)[0].items():
setattr(module, k, v)
load_inmemory()
print x
I tested the following:
import sys
def func():
module = sys.modules[func.__module__]
for k,v in {'x':4}.items():
setattr(module, k, v)
func()
print x
Prints 4. Tested in Python 2.7.3.

Comparing list of values associated with a key in a dictionary to the values associated all other keys

Here is the code i wrote to compare the list of values associated with each key to all other keys in the dictionary... But it is taking a hell lot of time more some 10000 records in the csv file..Can any body help to optimize the code to execute in minimal time.. Don't worry about the external function call,it works fine.
import csv
import sys
file = sys.argv[1]
with open(file, 'rU') as inf:
csvreader=csv.DictReader(inf,delimiter=',')
result={}
temp = []
#Creating Dict
for r in csvreader:
name=[]
name.append(r['FIRST_NAME'])
name.append(r['LAST_NAME'])
name.append(r['ID'])
result.setdefault(r['GROUP_KEY'],[]).append(name)
#Processing the Dict
for key1 in result.keys():
temp.append(key1)
for key2 in result.keys():
if key1 != key2 and key2 not in ex:
for v1 in result[key1]:
for v2 in result[key2]:
score=name_match_score(v1,'',v2,'')[0] ####calling external function
if score > 0.90:
print v1[2],v2[2],score
Something like this will help. The goal is to reduce the number of raw calculations done in name_match_score by skipping redundant calculations and by caching calculations performed.
First, make your dictionary store a defaultdict of lists of tuples. Tuples are immutable so they can be used as keys in sets and dicts below.
from collections import defaultdict
import csv
import sys
file = sys.argv[1]
with open(file, 'rU') as inf:
csvreader=csv.DictReader(inf, delimiter=',')
result = defaultdict(list)
for r in csvreader:
name = (r['FIRST_NAME'], r['LAST_NAME'], r['ID'])
result[r['GROUP_KEY']].append(name)
Then, sort your keys to ensure you evaluate a pair of keys only once.
keys = sorted(result)
for i, key1 in enumerate(keys):
for key2 in keys[i+1:]:
And order v1 and v2 so that they form a unique key. This will help with caching.
for v1 in result[key1]:
for v2 in result[key2]:
v1, v2 = (min(v1, v2), max(v1, v2))
score=name_match_score(v1, v2)[0] ####calling external function
if score > 0.90:
print v1[2],v2[2],score
Then use a memoizing decorator to cache calculations:
class memoized(object):
'''Decorator. Caches a function's return value each time it is called.
If called later with the same arguments, the cached value is returned
(not reevaluated).
'''
def __init__(self, func):
self.func = func
self.cache = {}
def __call__(self, *args):
if not isinstance(args, collections.Hashable):
# uncacheable. a list, for instance.
# better to not cache than blow up.
return self.func(*args)
if args in self.cache:
return self.cache[args]
else:
value = self.func(*args)
self.cache[args] = value
return value
def __repr__(self):
'''Return the function's docstring.'''
return self.func.__doc__
def __get__(self, obj, objtype):
'''Support instance methods.'''
return functools.partial(self.__call__, obj)
And change name_match_score to use the decorator:
#memoized
def name_match_score(v1, v2):
# Whatever this does
return (0.75, )
This should minimize the number of raw calculations inside name_match_score that you do.

Generic arguments in recursive functions: terrible habit?

I catch myself doing this a lot. The example is simple, but, in practice, there are a lot of complex assignments to update data structures and conditions under which the second recursion is not called.
I'm working with mesh data. Points, Edges, and Faces are stored in separate dictionaries and "pointers" (dict keys) are heavily used.
import itertools
class Demo(object):
def __init__(self):
self.a = {}
self.b = {}
self.keygen = itertools.count()
def add_to_b(self, val):
new_key = next(self.keygen)
self.b[new_key] = val
return new_key
def recur_method(self, arg, argisval=True):
a_key = next(self.keygen)
if argisval is True:
# arg is a value
b_key = self.add_to_b(arg)
self.a[a_key] = b_key
self.recur_method(b_key, argisval=False)
else:
# arg is a key
self.a[a_key] = arg
demo = Demo()
demo.recur_method(2.2)
Is there a better way? short of cutting up all of my assignment code into seven different methods? Should I be worried about this anyway?
Try
def recur_method(self, key=None, val=None):
if key is None and val is None:
raise exception("You fail it")
If None is a valid input, then use a guard value:
sentinel = object()
def recur_method(self, key=sentinel, val=sentinel):
if key is sentinel and val is sentinel:
raise exception("You fail it")

Categories