Object oriented programming with abstract class - python

I want to achieve the below:
def do_something(request):
company_name = request.get("company_name", DEFAULT_COMPANY)
data = request.get("data")
response = transform_data_according_to(data, company_name)
return response
I did the following for it:
class Transform(ABC):
def __init__(self, data):
self.data = data
#abstractmethod
def transform(self):
pass
class CompanyA(Transform):
def transform(self):
# do_transformation
return transformed_data
def do_something(request):
company_name = request.get("company_name", DEFAULT_COMPANY)
data = request.get("data")
if company_name == CompanyA:
response = CompanyA.transform(data)
return response
Instead i would like to do something like this using correct object oriented principles:
def do_something(request):
company_name = request.get("company_name", DEFAULT_COMPANY)
data = request.get("data")
response = Transform(data, company_name)
return response
I want to know where I might be thinking wrong in terms of the desired approach versus the implemented approach. Is the implemented approach correct, the if else checks can grow quite big in that case.

Thanks to teraflop
The simple, idiomatic way to do this in Python would be to look up the Transform subclass in a dictionary:
transform_classes = {
"CompanyA": CompanyA,
# ...
}
def do_something(request):
company_name = request.get("company_name", DEFAULT_COMPANY)
data = request.get("data")
transformer = transform_classes[company_name](data)
return transformer.transform()
If you prefer to be more rigorously object-oriented, you could wrap the dictionary in an object (e.g. TransformLookupByName) instead of accessing it directly.
There are also various kinds of metaprogramming magic you can use to build the dictionary automatically without having to name each subclass explicitly. For example, this will collect all of the Transform subclasses in the current source file:
transform_classes = {
k:v for k,v in globals().items()
if isinstance(v, type) and issubclass(v, Transform) and v != Transform
}

Related

Python - list comprehension as a decorator (including self)

I have two functions:
job_status is getting a response from boto3 api.
jobs_detailsis a list comprehension that performs job_status on each element of the input list.
I want to change jobs_details into a decorator of jobs_status but below solutions throws inner() takes 1 positional argument but 2 were given error.
Appreciate any comment/alternative approach to my issue. Thanks!
import boto3
class GlueClient:
def __init__(self):
self.glue_client = boto3.client('glue')
#self.envs = envs
def jobs_list(self):
response = self.glue_client.list_jobs()
result = response["JobNames"]
while "NextToken" in response:
response = self.glue_client.list_jobs(NextToken=response["NextToken"])
result.extend(response["JobNames"])
return [e for e in result if "jobs_xyz" in e]
#WHAT IS CURRENTLY
def job_status(self, job_name):
paginator = self.glue_client.get_paginator('get_job_runs')
response = paginator.paginate(JobName=job_name)
return response
def jobs_details(self, jobs):
return [self.job_status(e) for e in jobs]
#WHAT IS EXPECTED
def pass_by_list_comprehension(func):
def inner(list_of_val):
return [func(value) for value in list_of_val ]
return inner
#pass_by_list_comprehension
def job_status(self, job_name):
paginator = self.glue_client.get_paginator('get_job_runs')
response = paginator.paginate(JobName=job_name)
return response
glue_client = GlueClient()
jobs = glue_client.jobs_list()
jobs_status = glue_client.job_status(jobs)
print(jobs)
You want something like:
import boto3
from typing import Callable
def handle_iterable_input(func):
def inner(self, list_of_val):
return [func(self, value) for value in list_of_val]
return inner
class GlueClient:
def __init__(self):
self.glue_client = boto3.client('glue')
#self.envs = envs
def jobs_list(self):
response = self.glue_client.list_jobs()
result = response["JobNames"]
while "NextToken" in response:
response = self.glue_client.list_jobs(NextToken=response["NextToken"])
result.extend(response["JobNames"])
return [e for e in result if "jobs_xyz" in e]
#handle_iterable_input
def job_status(self, job_name):
paginator = self.glue_client.get_paginator('get_job_runs')
response = paginator.paginate(JobName=job_name)
return response
glue_client = GlueClient()
jobs = glue_client.jobs_list()
jobs_status = glue_client.job_status(jobs)
print(jobs)
This is the most basic way to make your decorator handle methods properly, by explicitly handling the passing of self. Note, it assumes the function being decorated will only take a single argument.
If all you want to do is make job_status iterate through a list of job names instead of operating on just one, something like this should work:
def jobs_status(self, job_names):
paginator = self.glue_client.get_paginator('get_job_runs')
return [paginator.paginate(JobName=job_name) for job_name in job_names]
Using a decorator to change what parameters a method expects seems like a bad idea.
Also, naming your class GlueClient would imply that it is a glue client. The fact that it has an attribute named glue_client makes me suspect you could probably choose a clearer name for one or both of them. (However, I'm not familiar with the package you're using.)

Pydantic get a fields type hint

I want to store metadata for my ML models in pydantic. Is there a proper way to access a fields type? I know you can do BaseModel.__fields__['my_field'].type_ but I assume there's a better way.
I want to make it so that if a BaseModel fails to instantiate it is very clear what data is required to create this missing fields and which methods to use. Something like this :
from pydantic import BaseModel
import pandas as pd
# basic model
class Metadata(BaseModel):
peaks_per_day: float
class PeaksPerDayType(float):
data_required = pd.Timedelta("180D")
data_type = "foo"
#classmethod
def determine(cls, data):
return cls(data)
# use our custom float
class Metadata(BaseModel):
peaks_per_day: PeaksPerDayType
def get_data(data_type, required_data):
# get enough of the appropriate data type
return [1]
# Initial data we have
metadata_json = {}
try:
metadata = Metadata(**metadata_json)
# peaks per day is missing
except Exception as e:
error_msg = e
missing_fields = error_msg.errors()
missing_fields = [missing_field['loc'][0] for missing_field in missing_fields]
# For each missing field use its type hint to find what data is required to
# determine it and access the method to determine the value
new_data = {}
for missing_field in missing_fields:
req_data = Metadata[missing_field].data_required
data_type = Metadata[missing_field].data_type
data = get_data(data_type=data_type, required_data=req_data)
new_data[missing_field] = Metadata[missing_field].determine(data)
metadata = Metadata(**metadata_json, **new_data)
In the case you dont need to handle nested classes, this should work
from pydantic import BaseModel, ValidationError
import typing
class PeaksPerDayType(float):
data_required = 123.22
data_type = "foo"
#classmethod
def determine(cls, data):
return cls(data)
# use our custom float
class Metadata(BaseModel):
peaks_per_day: PeaksPerDayType
def get_data(data_type, required_data):
# get enough of the appropriate data type
return required_data
metadata_json = {}
try:
Metadata(**metadata_json)
except ValidationError as e:
field_to_type = typing.get_type_hints(Metadata)
missing_fields = []
for error in e.errors():
if error['type']=='value_error.missing':
missing_fields.append(error['loc'][0])
else:
raise
new_data = {}
for field in missing_fields:
type_ = field_to_type[field]
new_data[field] = get_data(type_.data_type, type_.data_required)
print(Metadata(**metadata_json, **new_data))
peaks_per_day=123.22
Im not really sure whats the point of data_type or get_data, but I assume its some internal logic that you want to add

Most efficient way to handle big number of constants

I am writing a program that, depending on a certain values from an Excel table, makes an API call. There are 2 conditions from the table that will be checked:
Language
Provider
Depending on those two values a different set of constants is needed for the API call:
def run_workflow(provider, language, workflow):
if provider == 'xxxx' and language == 0:
wf_ready = provider_ready
wf_unverified = provider_unverified
wf_active = provider_active
wf_another = provider_another
wf_closed = provider_closed
wf_wrongid = provider_wrongid
elif provider == 'yyyy' and language == 0:
wf_ready = provider_ready
wf_unverified = provider_unverified
wf_active = provider_active
wf_another = provider_another
wf_closed = provider_closed
wf_wrongid = provider_wrongid
elif ...
if workflow == 'ready':
response = requests.post(API + wf_ready),headers=header, data=json.dumps(conversation))
elif workflow == 'unverified':
response = requests.post(API + wf_unverified),headers=header, data=json.dumps(conversation))
elif ...
There are 2 provider and 7 different languages and I am trying to figure out the most efficient (and Pythonic way) to handle this scenario and came up with creating a class for each language:
class Workflow_Language():
def english(self):
self.provider_unverified = 1112
self.provider_ready = 1113
self.provider_active = 1114
self.provider_vip = 1115
def russian(self):
self.provider_unverified = 1116
self.provider_ready = 1117
self.provider_active = 1118
self.provider_vip = 1119
def ...
...
Is there maybe a better way to handle this?
One way is to map constants to appropriate handlers:
class LanguageData:
def __init__(self, unverified, ready, active, vip):
self.unverified = unverified
self.ready = ready
self.active = active
self.vip = vip
def english():
return LanguageData(1,2,3,4)
def russian():
return LanguageData(5,6,7,8)
LANGUAGE_MAP = {'en': english, 'ru': russian}
I've made up 'en', 'ru' values for clarity. It seems that 0 is in your case? Also note that english and russian are standalone functions. Finally the LanguageData class is not mandatory, you can simply return a dictionary from those functions. But workin with attributes instead of string keys seems easier to maintain.
And then in the code:
def run_workflow(provider, language, workflow):
lang_data = LANGUAGE_MAP[language]()
if workflow == 'ready':
url = API + data.ready
elif workflow == 'unverified':
url = API + data.unverified
response = requests.post(url, headers=header, data=json.dumps(conversation))
Of course workflow can be wrapped in a similar way if there are more than 2 possible values.
Analogously for provider. Unless the action depends on both provider and language at the same time in which case you need a double map:
LANG_PROV_MAP = {
('en', 'xxxx'): first,
('ru', 'yyyy'): second,
}
def run_workflow(provider, language, workflow):
data = LANG_PROV_MAP[(provider, language)]()
...
The original code can be simplified with a tricky decorator:
LANGUAGE_MAP = {}
def language_handler(lang):
def wrapper(fn):
LANGUAGE_MAP[lang] = fn
return fn
return wrapper
#language_handler('en')
def handler():
return LanguageData(1,2,3,4)
#language_handler('ru')
def handler():
return LanguageData(5,6,7,8)
Also note that if the data is "constant" (i.e. doesn't depend on the context) then you can completely omit callables to make everything even simplier:
LANGUAGE_MAP = {
'en': LanguageData(1,2,3,4),
'ru': LanguageData(5,6,7,8),
}
def run_workflow(provider, language, workflow):
data = LANGUAGE_MAP[language]
...
The combination of the language and provider can compose the method name and the call will be invoked dynamically.
Example:
import sys
def provider1_lang2():
pass
def provider2_lang4():
pass
# get the provider / lang and call the method dynamically
provider = 'provider2'
lang = 'lang4'
method_name = '{}_{}'.format(provider,lang)
method = getattr(sys.modules[__name__], method_name)
method()

Understanding reflection in django for refactoring

I am working on my first Django project and I need to understand the way reflection is used in django.
I have the method category_autocomplete which I use with jQuery to get autocomplete for a category field.
I need autocomplete in some more places but on different things. I think it might be a good idea to make into a class for reuse.
I have started making the class but I am not sure how to proceed.
The problem is the way django uses the filter function. It has a parameter which goes like <param-name>_icontains. I can easily reproduce the lambda by using getattr and passing parameter name as a string but I cannot figure out how to use reflection to get the parameter name for the filter function.
Any idea how this can be done?
class Autocomplete():
#staticmethod
def get_json_autocomplete(self, cur_objects, func):
results = []
for cur_object in cur_objects:
results.append(func(cur_object))
return json.dumps(results)
#staticmethod
def autocomplete(self, request, class_name, attr_name):
term = request.GET.get('term', '')
data = Autocomplete.get_json_autocomplete(
#Problem here
class_name.objects.filter(attr_name=term),
lambda x: getattr(x, attr_name)
)
return HttpResponse(data, 'application/json')
def _get_json_autocomplete(cur_objects, func):
results = []
for cur_object in cur_objects:
results.append(func(cur_object))
return json.dumps(results)
def category_autocomplete(request):
term = request.GET.get('term', '')
data = _get_json_autocomplete(
Category.objects.filter(name__icontains=term),
lambda x: x.name
)
return HttpResponse(data, 'application/json')
What I believe you're looking for is **, take a look here and here.
So that part of your code could be:
def autocomplete(self, request, class_name, attr_name):
term = request.GET.get('term', '')
data = Autocomplete.get_json_autocomplete(
class_name.objects.filter(**{attr_name + '__icontains': term}),
lambda x: getattr(x, attr_name)
)
return HttpResponse(data, 'application/json')

How to modify ndb.Query object?

Let's assume that we the following ndb model:
class MyModel(ndb.Model):
x = ndb.StringProperty()
y = ndb.StringProperty()
z = ndb.StringProperty(repeated=True)
We have a method that creates a query for the above model, executes it and fetch the results. However, we want this query to be modified my other functions. Specifically, we have the following:
def method_a():
qry = MyModel.query()
values = {'query':qry}
method_b(**values)
entities = qry.fetch()
def method_b(**kwargs):
k = ['a', 'b', 'c']
qry = kwargs['query']
qry.filter(MyModel.z.IN(k))
The problem is that the Query object is immutable, and thus it cannot be modified by method_b. Also, based on the specific architecture of the code, we cannot have method_b to return the new Query to method_a.
Any ideas on how to achieve the aforementioned functionality in another way??
Update: Please check the architecture of my code as presented below:
First, in a configuration file we specify a list of modules and if they are enabled or not. These modules affect the filters of the query we want to execute.
testparams = {
'Test1': True,
'Test2': True,
'Test3': False,
'Test4': True
}
Then, we have a method somewhere in the code that makes a query after the appropriate modules have been executed. Thus, it seems like this:
def my_func():
qry = MyEntity.query()
# modules
query_wrapper = [qry]
values = {'param':'x', 'query_wrapper':query_wrapper} #other values also
execute_modules(**values)
# get query and add some more things, like ordering
entities = query_wrapper[0].fetch()
The execute_modules function is the following:
def execute_modules(**kwargs):
for k in config.testparams:
if config.testparams[k]:
if kwargs['param'] == 'x':
(globals()[k]).x(**kwargs)
elif kwargs['param'] == 'y':
(globals()[k]).y(**kwargs)
Finally, an indicative module is similar to the following:
class Test1():
#classmethod
def x(cls, *args, **kwargs):
qry = kwargs['query_wrapper'][0]
# do some stuff like adding filters
kwargs['query_wrapper'][0] = qry
Any proposals to modify this architecture to a better approach?
I'm not aware of a way to do this without having method_b either return or change a referenced parameter. You should use a technique to pass a variable by reference, like passing a class with parameters.
You can pass in the args in a refrence object such as a dict/list:
def modify_query(kwargs):
kwargs['qry'] = kwargs['qry'].filter(MyModel.z.IN(k))
qry = MyModel.query()
kwargs = {'qry': qry}
modify_query(kwargs)
result = kwargs['qry'].fetch()
It should be noted that this is an extremly dirty way to accomplish what you want to accomplish. Similarly, if you pass in a list with say one object, then you can modify the contents of said list (through assignment) to modify the object:
def modify_query(list_object):
list_object[0] = list_object[0].filter(...)
You can do some hack for replace it object by other. For example:
def f(args):
qry = args[0]
qry_new = qry.filter(Model.a == 2)
args[0] = qry_new
qry = Model.query()
args = [qry]
f(args)
qry = args[0]

Categories