I have a small problem with repeating myself by creating nested related objects.
I get a JSON from an API call which I convert it to a nested dictionary.
This nested dict has a lot of related objects which I have to check if their fields have a related_model in order to create the related object before creating the actual object...
I have 3 functions that are the same, but with different name:
get_or_create_object
get_or_create_related_object
get_or_create_subrelated_object
Here is the code:
def get_or_create_object(object_dict:dict, Klass):
# Main object
just_fields_dict = object_dict
just_related_objects_dict = {}
for key in object_dict.copy().keys():
key = _validate_field_name(key)
related_model = getattr(Klass, key).field.related_model
if related_model:
if isinstance(object_dict[key], list):
print(object_dict[key])
else:
value = _clean_object_dict(object_dict[key])
obj = get_or_create_related_object(object_dict=value, Klass=related_model)
just_related_objects_dict[key] = obj
just_fields_dict.pop(key, None)
composed_object_dict = {**just_fields_dict, **just_related_objects_dict}
obj, _ = Klass.objects.get_or_create(**composed_object_dict)
return obj
def get_or_create_related_object(Klass, object_dict):
# Related object to main object
just_fields_dict = object_dict
just_related_objects_dict = {}
for key in object_dict.copy().keys():
related_model = getattr(Klass, key).field.related_model
if related_model:
object_dict = _clean_object_dict(object_dict[key])
obj = get_or_create_subrelated_object(
Klass=related_model,
object_dict=object_dict
)
just_related_objects_dict[key] = obj
just_fields_dict.pop(key, None)
composed_object_dict = {**just_fields_dict, **just_related_objects_dict}
obj, _ = Klass.objects.get_or_create(**composed_object_dict)
return obj
def get_or_create_subrelated_object(Klass, object_dict):
# Related object to "Related object to main object"
# In other words: subrelated object
just_fields_dict = object_dict
just_related_objects_dict = {}
for key in object_dict.copy().keys():
related_model = getattr(Klass, key).field.related_model
if related_model:
object_dict = _clean_object_dict(object_dict[key])
just_related_objects_dict[key] = obj
just_fields_dict.pop(key, None)
obj = get_or_create_related_object(Klass=related_model, object_dict=object_dict )
composed_object_dict = {**just_fields_dict, **just_related_objects_dict}
obj, _ = Klass.objects.get_or_create(**composed_object_dict)
return obj
Does anyone have a better approach?
Thank you very much!
I checked the documentation, but there are just easy examples.
Related
I have a array which stores a object. I am trying to see if there are duplicate values in this object array, but only on one of the objects parameters (hexdigest).
How can I check for duplicates and record the entire object of duplicates I find?
# class to store hashes
class customclass:
def __init__(self, value, hexdigest):
self.value = value
self.hexdigest = hexdigest
# array to store class data
hash_array = []
hash_array.append(customclass(value=299, hexdigest='927'))
hash_array.append(customclass(value=207, hexdigest='92b'))
hash_array.append(customclass(value=113, hexdigest='951'))
hash_array.append(customclass(value=187, hexdigest='951'))
hash_array.append(customclass(value=205, hexdigest='998'))
# sort array
sorted_array = sorted(hash_array, key=attrgetter('hexdigest'))
# check for duplicate hexdigest's
newlist = []
duplist = []
for obj in sorted_array:
for jbo in newlist:
if obj.hexdigest not in jbo:
newlist.append(obj)
else:
duplist.append(obj)
hex_list = []
duplist = []
for obj in sorted_array:
if(obj.hexdigest in hex_list):
duplist.append(obj)
else:
hex_list.append(obj.hexdigest)
use this above block of code instead of the below one which you have implemented to find the list of duplicate object
newlist = []
duplist = []
for obj in sorted_array:
for jbo in newlist:
if obj.hexdigest not in jbo:
newlist.append(obj)
else:
duplist.append(obj)
Well, newlist is empty, so the inner for loop never runs, so nothing gets appended to newlist or duplist.
You may wish to group by the hexdigest attribute using itertools.groupby and a dictionary comprehension.
from operator import attrgetter
from itertools import groupby
class customclass:
def __init__(self, value, hexdigest):
self.value = value
self.hexdigest = hexdigest
hash_array = []
hash_array.append(customclass(value=299, hexdigest='927'))
hash_array.append(customclass(value=207, hexdigest='92b'))
hash_array.append(customclass(value=113, hexdigest='951'))
hash_array.append(customclass(value=187, hexdigest='951'))
hash_array.append(customclass(value=205, hexdigest='998'))
sorted_array = sorted(hash_array, key=attrgetter('hexdigest'))
# [<__main__.customclass object at 0x7f488d1a2a20>,
# <__main__.customclass object at 0x7f488d1a29b0>,
# <__main__.customclass object at 0x7f488d1a2b00>,
# <__main__.customclass object at 0x7f488d1a2b70>,
# <__main__.customclass object at 0x7f488d1a2c18>]
groups = groupby(sorted_array, key=attrgetter('hexdigest'))
{k: list(v) for k, v in groups}
# {'927': [<__main__.customclass object at 0x7f488d1a2a20>],
# '92b': [<__main__.customclass object at 0x7f488d1a29b0>],
# '951': [<__main__.customclass object at 0x7f488d1a2b00>,
# <__main__.customclass object at 0x7f488d1a2b70>],
# '998': [<__main__.customclass object at 0x7f488d1a2c18>]}
From there it's relatively easy to retrieve the unique and duplicate values.
It may be easier to visualize what's going on if you provide a more useful definition for __repr__.
class customclass:
def __init__(self, value, hexdigest):
self.value = value
self.hexdigest = hexdigest
def __repr__(self):
return f"<customclass value: {self.value}, hexdigest: {self.hexdigest}>"
Doing so, hash_array prints in the interactive interpreter as follows, with the exception of he newlines I added for sanity's sake.
[<customclass value: 299, hexdigest: 927>,
<customclass value: 207, hexdigest: 92b>,
<customclass value: 113, hexdigest: 951>,
<customclass value: 187, hexdigest: 951>,
<customclass value: 205, hexdigest: 998>]
I am getting the error below when trying to set a list of objects. This wasn't happening earlier with my data but now it is.
Is there a better method to writing the obj_list_dictize method to handle these kinds of problems?
Thanks!
Module ckan.lib.dictization:79 in obj_list_dictize
>> obj, capacity = obj
ValueError: too many values to unpack
def obj_list_dictize(obj_list, context, sort_key=lambda x:x):
'''Get a list of model object and represent it as a list of dicts'''
result_list = []
active = context.get('active', True)
for obj in obj_list:
if context.get('with_capacity'):
obj, capacity = obj
dictized = table_dictize(obj, context, capacity=capacity)
else:
dictized = table_dictize(obj, context)
if active and obj.state != 'active':
continue
result_list.append(dictized)
return sorted(result_list, key=sort_key)
I'm trying to get returned value from pipeline. I'm using yield generator to generate item.
And this is my code.
def get_or_create(model):
model_class = type(model)
created = False
try:
obj = model_class.objects.get(product_company=model.product_company, barcode=model.barcode)
except model_class.DoesNotExist:
created = True
obj = model # DjangoItem created a model for us.
obj.save()
return (obj, created)
def update_model(destination, source, commit=True):
pk = destination.pk
source_dict = model_to_dict(source)
for (key, value) in source_dict.items():
setattr(destination, key, value)
setattr(destination, 'pk', pk)
if commit:
destination.save()
return destination
class ProductItemPipeline(object):
def process_item(self, item, spider):
if isinstance(item, ProductItem):
item_model = item.instance
model, created = get_or_create(item_model)
item['cover_photo'] = item['files'][0]['path']
if created:
item.save()
for image in item['files']:
imageItem = ProductImageItem(image=image['path'], product=model)
imageItem.save()
for comment in item['comments']:
commentItem = CommentItem(comment=comment.comment, product=model)
commentItem.save()
return model
Also this is my spider.
item = ProductItem(name=name, price=price, barcode=barcode, file_urls=objectImages, product_url=response.url,product_company=company, comments = comments)
product = yield item
print type(product)
print "yield product"
And product type is returning nonetype
You don't understand python yield. You should use yield like return for return some function result. You can find more information about generators in this article
So your code will look like
item = ProductItem(name=name, price=price, barcode=barcode, file_urls=objectImages, product_url=response.url,product_company=company, comments = comments)
self.logger.debug(type(product))
self.logger.debug('yield product')
yield item
I have created some Python classes to use as multivariate data structures, which are then used for various tasks. In some instances, I like to populate the classes with various value sets. The default parameter filename "ho2.defaults" would look something like this:
name = 'ho2'
mass_option = 'h1o16'
permutation = 'odd'
parity = 'odd'
j_total = 10
lr = 40
br = 60
jmax = 60
mass_lr = 14578.471659
mass_br = 1781.041591
length_lr = ( 1.0, 11.0, 2.65 )
length_br = ( 0.0, 11.0, 2.46 )
use_spline = True
energy_units = 'au'
pes_zpe = -7.407998138300982E-2
pes_cutoff = 0.293994
Currently, I create a dictionary from reading the desired key,value pairs from file, and now I'd like a "pythonic" way of making those dictionary keys be class instance variable names, i.e.
# Instantiate Molecule Class
molecule = Molecule()
# Create Dictionary of default values
default_dict = read_dict_from_file(filename)
# Set populate class instance variables with dictionary values
for key,value in default_dict:
molecule.key = value
So the Class's instance variable "molecule.name" could be set with the dictionary key,value pair. I could do this by hand, but I'ms sure there is a better way to loop through it. In actuality, the dictionary could be large, and I'd rather allow the user to choose which values they want to populate, so the dictionary could change. What am I missing here?
You would use setattr: setattr(molecule, key, value)
The simple way is:
vars(molecule).update(default_dict)
This will clobber any pre-existing attributes though. For a more delicate approach try:
for name, value in default_dict.items():
if not hasattr(molecule, name):
setattr(molecule, name value)
I'd invert the logic so that the object dynamically answers questions:
class Settings(object):
ATTRS = {'foo', 'bar'}
def __init__(self, defaults):
self.__dict__['data'] = defaults.copy()
def __getattr__(self, key):
if key not in self.ATTRS or key not in self.data:
raise AttributeError("'{}' object has no attribute '{}'".format(
self.__class__.__name__, key))
return self.data[key]
def __setattr__(self, key, value):
self.data[key] = value
s = Settings({'a': 'b', 'foo': 'foo!', 'spam': 'eggs'})
print s.foo
try:
print s.spam
except AttributeError:
pass
else:
raise AssertionError("That should have failed because 'spam' isn't in Settings.ATTRS")
try:
print s.bar
except AttributeError:
pass
else:
raise AssertionError("That should have failed because 'bar' wasn't passed in")
class Molecule(settings):
ATTRS = {'name', 'mass_option', ...}
molecule = Molecule(default_dict)
I am using redis to try to save a request's session object. Based on how to store a complex object in redis (using redis-py), I have:
def get_object_redis(key,r):
saved = r.get(key)
obj = pickle.loads(saved)
return obj
redis = Redis()
s = get_object_redis('saved',redis)
I have situations where there is no saved session and 'saved' evaluates to None. In this case I get:
TypeError: must be string or buffer, not None
Whats the best way to deal with this?
There are several ways to deal with it. This is what they would have in common:
def get_object_redis(key,r):
saved = r.get(key)
if saved is None:
# maybe add code here
return ... # return something you expect
obj = pickle.loads(saved)
return obj
You need to make it clear what you expect if a key is not found.
Version 1
An example would be you just return None:
def get_object_redis(key,r):
saved = r.get(key)
if saved is None:
return None
obj = pickle.loads(saved)
return obj
redis = Redis()
s = get_object_redis('saved',redis)
s is then None. This may be bad because you need to handle that somewhere and you do not know whether it was not found or it was found and really None.
Version 2
You create an object, maybe based on the key, that you can construct because you know what lies behind a key.
class KeyWasNotFound(object):
# just an example class
# maybe you have something useful in mind
def __init__(self, key):
self.key = key
def get_object_redis(key,r):
saved = r.get(key)
if saved is None:
return KeyWasNotFound(key)
obj = pickle.loads(saved)
return obj
Usually, if identity is important, you would store the object after you created it, to return the same object for the key.
Version 3
TypeError is a very geneneric error. You can create your own error class. This would be the preferred way for me, because I do not like version 1 and do not have knowledge of which object would be useful to return.
class NoRedisObjectFoundForKey(KeyError):
pass
def get_object_redis(key,r):
saved = r.get(key)
if saved is None:
raise NoRedisObjectFoundForKey(key)
obj = pickle.loads(saved)
return obj