Given this example code where we have a series of log processors, I can't help feeling there ought to be a more pythonic/efficient way of deciding which log processor to use to process some data:
class Component1ErrorLogProcessor:
def process(logToProcess):
# Do something with the logs
pass
class Component2ErrorLogProcessor:
def process(logToProcess):
# Do something with the logs
pass
class LogProcessor:
def __init__(self):
self.component1 = Component1ErrorLogProcessor()
self.component2 = Component2ErrorLogProcessor()
def process_line(self, line, component):
if component == "Component1Log-" or component == "[Component1]":
self.component1.process_errors(line)
elif component == "Component2Log-" or component == "[Component2]":
self.component2.process_errors(line)
I'd personally use the idea of registry, so you map each class to component names.
There are a bunch of different ways to go about this, here's a quick example by using a base class:
class ComponentLogProcessor(object):
_Mapping = {}
#classmethod
def register(cls, *component_names):
for name in component_names:
cls._Mapping[name] = cls
#classmethod
def cls_from_component(cls, component):
return cls._Mapping[component]
class Component1ErrorLogProcessor(ComponentLogProcessor):
def process(logToProcess):
# Do something with the logs
pass
Component1ErrorLogProcessor.register('Component1Log-', '[Component1]')
class Component2ErrorLogProcessor(ComponentLogProcessor):
def process(logToProcess):
# Do something with the logs
pass
Component2ErrorLogProcessor.register('Component2Log-', '[Component2]')
class LogProcessor:
def process_line(self, line, component):
ComponentLogProcessor.cls_from_component(component).process_errors(line)
Related
I'm trying to create threads to run a class method. However, when I try to pass one class to another, it tries to initialize the class and never gets threaded.
I'm taking a list of tuples and trying to pass that list to the cfThread class, along with the class method that I want to use. From here, I'd like to create a separate thread to run the classes method and take action on one of tuples from the list. The REPLACEME is a placeholder because the class is looking for a tuple but I don't have one to pass to it yet. My end goal is to be able to pass a target (class / function) to a thread class that can create it's own queue and manage the threads without having to manually do it.
Below is a simple example to hopefully do a better job of explaining what I'm trying to do.
#!/bin/python3.10
import concurrent.futures
class math:
def __init__(self, num) -> None:
self.num = num
def add(self):
return self.num[0] + self.num[1]
def sub(self):
return self.num[0] - self.num[1]
def mult(self):
return self.num[0] * self.num[1]
class cfThread:
def __init__(self, target, args):
self.target = target
self.args = args
def run(self):
results = []
with concurrent.futures.ThreadPoolExecutor(10) as execute:
threads = []
for num in self.args:
result = execute.submit(self.target, num)
threads.append(result)
for result in concurrent.futures.as_completed(threads):
results.append(result)
return results
if __name__ == '__main__':
numbers = [(1,2),(3,4),(5,6)]
results = cfThread(target=math(REPLACEME).add(), args=numbers).run()
print(results)
target has to be a callable; you want to wrap your call to add in a lambda expression.
results = cfThread(target=lambda x: math(x).add(), args=numbers)
I recently started to work with Python's classes, since I need to work with it through the use of OTree, a Python framework used for online experiment.
In one file, I define the pages that I want to be created, using classes. So essentially, in the OTree system, each class corresponds to a new page. The thing is, all pages (so classes) are basically the same, at the exception to some two parameters, as shown in the following code:
class Task1(Page):
form_model = 'player'
form_fields = ['Envie_WordsList_Toy']
def is_displayed(self):
return self.round_number == self.participant.vars['task_rounds'][1]
def vars_for_template(player):
WordsList_Toy= Constants.WordsList_Toy.copy()
random.shuffle(WordsList_Toy)
return dict(
WordsList_Toy=WordsList_Toy
)
#staticmethod
def live_method(player, data):
player.WTP_WordsList_Toy = int(data)
def before_next_page(self):
self.participant.vars['Envie_WordsList_Toy'] = self.player.Envie_WordsList_Toy
self.participant.vars['WTP_WordsList_Toy'] = self.player.WTP_WordsList_Toy
So here, the only thing that would change would be the name of the class, as well as the suffix of the variable WordsList_ used throughout this code, which is Toy.
Naively, what I tried to do is to define a function that would take those two parameters, such as this:
def page_creation(Task_Number,name_type):
class Task+str(Task_Number)(Page):
form_model = 'player'
form_fields = ['Envie_WordsList_'+str(name_type)]
def is_displayed(self):
return self.round_number == self.participant.vars['task_rounds'][1]
def vars_for_template(player):
WordsList_+str(name_type) = Constants.WordsList+str(name_type).copy()
random.shuffle(WordsList_+str(name_type))
return dict(
WordsList_+str(name_type)=WordsList_+str(name_type)
)
#staticmethod
def live_method(player, data):
player.WTP_WordsList_+str(name_type) = int(data)
def before_next_page(self):
self.participant.vars['Envie_WordsList_+str(name_type)'] = self.player.Envie_WordsList_+str(name_type)
self.participant.vars['WTP_WordsList_+str(name_type)'] = self.player.WTP_WordsList_+str(name_type)
Obviously, it does not work since I have the feeling that it is not possible to construct variables (or classes identifier) this way. I just started to really work on Python some weeks ago, so some of its aspects might escape me still. Could you help me on this issue? Thank you.
You can generate dynamic classes using the type constructor:
MyClass = type("MyClass", (BaseClass1, BaseClass2), {"attr1": "value1", ...})
Thus, according to your case, that would be:
cls = type(f"Task{TaskNumber}", (Page, ), {"form_fields": [f"Envive_WordList_{name_type}"], ...})
Note that you still have to construct your common methods like __init__, is_displayed and so on, as inner functions of the class factory:
def class_factory(*args, **kwargs):
...
def is_displayed(self):
return self.round_number == self.participant.vars['task_rounds']
def vars_for_template(player):
...
# Classmethod wrapping is done below
def live_method(player, data):
...
cls = type(..., {
"is_displayed": is_displayed,
"vars_for_template": vars_for_template,
"live_method": classmethod(live_method),
...,
}
#classmethod could be used as a function - {"live_method": classmethod(my_method)}
I have this main class
def main(args):
if type == train_pipeline_type:
strategy = TrainPipelineStrategy()
else:
strategy = TestPipelineStrategy()
for table in fetch_table_information_by_region(region):
split_required = DataUtils.load_from_dict(table, "split_required")
if split_required:
strategy.split(spark=spark, table_name=table_name,
data_loc=filtered_data_location, partition_column=partition_column,
split_output_dir= split_output_dir)
logger.info("Data Split for table : {} completed".format(table_name))
My TrainPipelineStrategy, and TestPipelineStrategy looks like this -
class PipelineTypeStrategy(object):
def partition_data(self, x):
# Something
def prepare_split_data(self, y):
# Something
def write_split_data(self, z):
# Something
def split(self, p):
# Something
class TrainPipelineStrategy(PipelineTypeStrategy):
""""""
class TestPipelineStrategy(PipelineTypeStrategy):
def write_split_data(self, y):
# Something else
My test case -
I need to test how many times split is called by mocking split functionality in main method.
Here is what i have tried -
#patch('module.PipelineTypeStrategy.TrainPipelineStrategy')
def test_split_data_main_split_data_call_count(self, fake_train):
fake_train_functions = mock.Mock()
fake_train_functions.split.return_value = None
fake_train.return_value = fake_train_functions
test_args = ["", "--x=6"]
SplitData.main(args=test_args)
assert fake_train_functions.split.call_count == 10
When i try to run my test, it creates the mock but ultimately ends up calling the actual split function. What am i doing wrong ?
The main issue with this code is that the way you set up the patch would be if TrainPipelineStrategy were a nested class of PipelineTypeStrategy, but TrainPipelineStrategy is a subclass of PipelineTypeStrategy.
Since TrainPipelineStrategy inherits from PipelineTypeStrategy it has access to split directly, so you can patch split without any reference to PipelineTypeStrategy (unless you specifically want to patch the version of split defined in PipelineTypeStrategy).
However, if you just want to mock the split method of the PipelineTypeStrategy class, you should use the patch.object decorator to mock just split instead of mocking the whole class as it's a bit more clean. Here's an example:
class TestClass(unittest.TestCase):
#patch.object(TrainPipelineStrategy, 'split', return_value=None)
def test_split_data_main_split_data_call_count(self, mock_split):
test_args = ["", "--x=6"]
SplitData.main(args=test_args)
self.assertEqual(mock_split.call_count, 10)
If you have seen my other question you will know I am having a very hard time at the moment with unit tests in Python. Two days of trying and I've made no progress.
In my method which is part of a Class there is several calls to a DAL.
car_registration = self.dal.cars.get_by_registration('121D121')
This DAL is configured in the base class. I want to completely overrid/mock these calls when running my unit tests and instead return predefined responses so I can continue with the method and ensure everything works as expected.
The method starts of with:
def change_registration(self):
body = json.loads(self.request.body)
registration = body['registration']
car = self.dal.cars.get_by_registration(registration)
My Python test file at the moment is:
class CarTestCase(unittest.TestCase):
def setUp(self):
self.car_controller = CarController()
def test_change_registrations(self):
self.car_controller.dal.cars.get_by_registration = MagicMock(return_value=3)
response = self.car_controller.change_registration()
I am expecting to get the response 3. However, an error is being thrown.
AttributeError: 'CarController' object has no attribute '_py_object'
It appears the mocking isn't working and it still trying to use the main DAL which isn't fully set up when using the unit tests. How do I prevent it for looking for the actual DAL but instead mocks?
I think you are not showing us the code that triggers the error because there is nothing wrong with your strategy. Using some imagination to mimic code we don't have I can write this and it runs with no problem:
import unittest
from unittest.mock import MagicMock
class CarList():
def get_by_registration(self, registration):
pass
class Dal:
def __init__(self):
self.cars = CarList()
pass
class CarController:
def __init__(self):
self.dal = Dal()
def change_registration(self):
registration = None
car = self.dal.cars.get_by_registration(registration)
return car
class CarTestCase(unittest.TestCase):
def setUp(self):
self.car_controller = CarController()
def test_change_registrations(self):
self.car_controller.dal.cars.get_by_registration =\
MagicMock(return_value=3)
result = self.car_controller.change_registration()
self.assertEqual(result, 3)
unittest.main()
Here my example:
# test_tool_file.py
import unittest
from unittest.mock import patch, Mock, call
import test_tools_file
class MyObject():
def __init__(self, data):
self.data = data
def get_data(self):
return self.data
def count(self):
return len(self.get_data())
class TestFile(unittest.TestCase):
""" Cas de tests.
"""
#patch("test_tools_file.MyObject.get_data")
def test_1(self, mock_get):
""" test_1
"""
mock_get.return_value = [1,2,3,4,5,6]
obj = MyObject(["12", "13"])
result = obj.count()
self.assertEqual(result, 6)
Personal opinion: I would suggest you to start simple. Use anything 'magic' once you understand what it provides you over the non-magic way. Using a non-magic solution tends to be easier to understand.
I think you have multiple simple solutions at hand. For what you tried to achieve, instead of:
self.car_controller.dal.cars.get_by_registration = MagicMock(return_value=3)
You could try:
self.car_controller.dal.cars.get_by_registration = lambda: 3
But you mentioned you want to replace all methods. In fact I would consider a 'simple' dependency injection. If you find it difficult to test, it might be a sign that the another design would be better (that's the idea of TDD - Test Driven Design). A simple dependency injection is one where you for example just pass dal to the constructor of CarController.
Here is a complete example with some variations of the test:
from unittest import TestCase
from unittest.mock import Mock
class SomeDal(object):
def get_something(self):
return 'something'
class SomeController(object):
def __init__(self, dal):
self.dal = dal
def upper_something(self):
return self.dal.get_something().upper()
class TestSomeController(TestCase):
def test_using_simple_patch(self):
controller = SomeController(SomeDal())
controller.dal.get_something = lambda: 'anything'
assert controller.upper_something() == 'ANYTHING'
def test_using_simple_patch_and_injection(self):
dal = SomeDal()
dal.get_something = lambda: 'anything'
controller = SomeController(dal)
assert controller.upper_something() == 'ANYTHING'
def test_using_simple_mock_class(self):
class MockDal(object):
def get_something(self):
return 'anything'
controller = SomeController(MockDal())
assert controller.upper_something() == 'ANYTHING'
def test_using_semi_magic_mock(self):
mock_dal = Mock(spec=SomeDal)
mock_dal.get_something.return_value = 'anything'
controller = SomeController(mock_dal)
assert controller.upper_something() == 'ANYTHING'
I have a script that takes a list of metrics as an input, and then fetches those metrics from the database to perform various operations with them.
My problem is that different clients get different subsets of the metrics, but I don't want to write a new IF block every time we add a new client. So right now, I have a large IF block that calls different functions based on whether the corresponding metric is in the list. What is the most elegant or Pythonic way of handling this?
Setup and function definitions:
clientOne = ['churn','penetration','bounce']
clientTwo = ['engagement','bounce']
def calcChurn(clientId):
churn = cursor.execute(sql to get churn)
[...]
return churn
def calcEngagement(clientId):
engagement = cursor.execute(sql to get engagement)
[...]
return engagement
Imagine three other functions in a similar format, so there is one function that corresponds to each unique metric. Now here is the block of code in the script that takes the list of metrics:
def scriptName(client, clientId):
if churn in client:
churn = calcChurn(clientId)
if engagement in client:
engagement = calcEngagement(clientId)
if penetration in client:
[...]
Generally, you'd create a mapping of names to functions and use that to calculate the stuff you want:
client_action_map = {
'churn': calcChurn,
'engagement': calcEngagement,
...
}
def scriptName(actions, clientId):
results = {}
for action in actions:
results[action] = client_action_map[action](clientId)
return results
You can create a class with static methods and use getattr to get the correct method. It's similar to what mgilson suggests but you essentially get the dict creation for free:
class Calculators:
#staticmethod
def calcChurn():
print("called calcChurn")
#staticmethod
def calcEngagement():
print("called calcEngagement")
#staticmethod
def calcPenetration():
print("called calcPenetration")
stats = ["churn", "engagement", "penetration", "churn", "churn", "engagement", "undefined"]
def capitalise(str):
return str[0].upper() + str[1:]
for stat in stats:
try:
getattr(Calculators, "calc" + capitalise(stat))()
except AttributeError as e:
print("Unknown statistic: " + stat)
called calcChurn
called calcEngagement
called calcPenetration
called calcChurn
called calcChurn
called calcEngagement
Unknown statistic: undefined
Perhaps it might make sense to encapsulate the required calls inside an object.
If it makes sense for your clients to be object and especially if many clients call the same set of functions to obtain metrics, then you could create a set of Client sub classes, which call a predefined set of the functions to obtain metrics.
It's a bit heavier than the mapping dict.
''' Stand alone functions for sql commands.
These definitions however dont really do anything.
'''
def calc_churn(clientId):
return 'result for calc_churn'
def calc_engagement(clientId):
return 'result for calc_engagement'
''' Client base object '''
class Client(object):
''' Base object allows list of functions
to be stored in client subclasses'''
def __init__(self, id):
self.id = id
self.metrics = []
self.args = []
def add_metrics(self, metrics, *args):
self.metrics.extend(metrics)
self.args = args
def execute_metrics(self):
return {m.__name__: m(*self.args) for m in self.metrics}
''' Specific sub classes '''
class Client1(Client):
def __init__(self, id):
''' define which methods are called for this class'''
super(Client1, self).__init__(id)
self.add_metrics([calc_churn], id)
class Client2(Client):
def __init__(self, id):
''' define which methods are called for this class'''
super(Client2, self).__init__(id)
self.add_metrics([calc_churn, calc_engagement], id)
''' create client objects and '''
c1 = Client1(1)
c2 = Client2(2)
for client in [c1, c2]:
print client.execute_metrics()
The result you will get from execute_metrics is a dict mapping the function name to its results for that client.