pytest: Run general test with dynamic input from files - python

I'm currently trying to write a python test module with pytest which reads several different information from a bunch of files. These informations shall be used as inputs (fixtures) for 2 general tests. These 2 tests shall be run for each fixture (so for each file).
What I want are dynamic fixtures generated from the content of an undefined amount of text files (jsons).
Then I have two pytest "test_*" functions which are fed with this undefined amount of fixtures.
Is such a thing possible with pytest? I already tried the lib pytest-cases, but I were unable to achieve my desired solution.
That is my current code base, where I want the yield to return a "for each *.json" fixture:
from pytest_cases import parametrize_with_cases, get_case_id
class Foo:
def matching_events(self):
# - glob for *.json files
# - read the rule files and events
# generate the fixture objects and return them for each *.json file
yield "test_rule_name", {}, {}
def mismatching_events(self):
# - glob for *.json files
# - read the rule files and events
# generate the fixture objects and return them for each *.json file
yield "test_rule_name", {}, {}
def case_id_generator(case_fun):
"""Custom test case id"""
return "#%s#" % case_fun
#parametrize_with_cases("rule_name, rule_definition, events", cases=Foo, prefix="matching_", ids=case_id_generator)
def test_rule_match(rule_name, rule_definition, events):
assert isinstance(rule_name, str)
assert isinstance(rule_definition, dict)
assert isinstance(events, dict)
# do some more things with the rule definition and evnets
#parametrize_with_cases("rule_name, rule_definition, events", cases=Foo, prefix="mismatching_", ids=case_id_generator)
def test_rule_mismatch(rule_name, rule_definition, events):
assert isinstance(rule_name, str)
assert isinstance(rule_definition, dict)
assert isinstance(events, dict)
# do some more things with the rule definition and evnets

Related

Mocking os.path.exists and os.makedirs returning AssertionError

I have a function like below.
# in retrieve_data.py
import os
def create_output_csv_file_path_and_name(output_folder='outputs') -> str:
"""
Creates an output folder in the project root if it doesn't already exist.
Then returns the path and name of the output CSV file, which will be used
to write the data.
"""
if not os.path.exists(output_folder):
os.makedirs(output_folder)
logging.info(f"New folder created for output file: " f"{output_folder}")
return os.path.join(output_folder, 'results.csv')
I also created a unit test file like below.
# in test_retrieve_data.py
class OutputCSVFilePathAndNameCreationTest(unittest.TestCase):
#patch('path.to.retrieve_data.os.path.exists')
#patch('path.to.retrieve_data.os.makedirs')
def test_create_output_csv_file_path_and_name_calls_exists_and_makedirs_once_when_output_folder_is_not_created_yet(
self,
os_path_exists_mock,
os_makedirs_mock
):
os_path_exists_mock.return_value = False
retrieve_cradle_profile_details.create_output_csv_file_path_and_name()
os_path_exists_mock.assert_called_once()
os_makedirs_mock.assert_called_once()
But when I run the above unit test, I get the following error.
def assert_called_once(self):
"""assert that the mock was called only once.
"""
if not self.call_count == 1:
msg = ("Expected '%s' to have been called once. Called %s times.%s"
% (self._mock_name or 'mock',
self.call_count,
self._calls_repr()))
raise AssertionError(msg)
AssertionError: Expected 'makedirs' to have been called once. Called 0 times.
I tried poking around with pdb.set_trace() in create_output_csv_file_path_and_name method and I'm sure it is receiving a mocked object for os.path.exists(), but the code never go pasts that os.path.exists(output_folder) check (output_folder was already created in the program folder but I do not use it for unit testing purpose and want to keep it alone). What could I possibly be doing wrong here to mock os.path.exists() and os.makedirs()? Thank you in advance for your answers!
You have the arguments to your test function reversed. When you have stacked decorators, like:
#patch("retrieve_data.os.path.exists")
#patch("retrieve_data.os.makedirs")
def test_create_output_csv_file_path_...():
They apply bottom to top, so you need to write:
#patch("retrieve_data.os.path.exists")
#patch("retrieve_data.os.makedirs")
def test_create_output_csv_file_path_and_name_calls_exists_and_makedirs_once_when_output_folder_is_not_created_yet(
self, os_makedirs_mock, os_path_exists_mock
):
With this change, if I have this in retrieve_data.py:
import os
import logging
def create_output_csv_file_path_and_name(output_folder='outputs') -> str:
"""
Creates an output folder in the project root if it doesn't already exist.
Then returns the path and name of the output CSV file, which will be used
to write the data.
"""
if not os.path.exists(output_folder):
os.makedirs(output_folder)
logging.info(f"New folder created for output file: " f"{output_folder}")
return os.path.join(output_folder, 'results.csv')
And this is test_retrieve_data.py:
import unittest
from unittest.mock import patch
import retrieve_data
class OutputCSVFilePathAndNameCreationTest(unittest.TestCase):
#patch("retrieve_data.os.path.exists")
#patch("retrieve_data.os.makedirs")
def test_create_output_csv_file_path_and_name_calls_exists_and_makedirs_once_when_output_folder_is_not_created_yet(
self, os_makedirs_mock, os_path_exists_mock
):
os_path_exists_mock.return_value = False
retrieve_data.create_output_csv_file_path_and_name()
os_path_exists_mock.assert_called_once()
os_makedirs_mock.assert_called_once()
Then the tests run successfully:
$ python -m unittest -v
test_create_output_csv_file_path_and_name_calls_exists_and_makedirs_once_when_output_folder_is_not_created_yet (test_retrieve_data.OutputCSVFilePathAndNameCreationTest.test_create_output_csv_file_path_and_name_calls_exists_and_makedirs_once_when_output_folder_is_not_created_yet) ... ok
----------------------------------------------------------------------
Ran 1 test in 0.001s
OK
Update I wanted to leave a comment on the diagnostics I performed here, because I didn't initially spot the reversed arguments, either, but the problem became immediately apparent when I added a breakpoint() the beginning of the test and printed out the values of the mocks:
(Pdb) p os_path_exists_mock
<MagicMock name='makedirs' id='140113966613456'>
(Pdb) p os_makedirs_mock
<MagicMock name='exists' id='140113966621072'>
The fact that the names were swapped made the underlying problem easy to spot.

Unittest simulate reading a yaml file with a mock

I try test a function which reads a file and returns the content of the file or returns none if the file is not found.
def read_yaml_from_cwd(file: str) -> Dict:
"""[reads a yaml file from current working directory]
Args:
file ([type]): [.yaml or .yml file]
Returns:
[type]: [Dictionary]
"""
path = os.path.join(Path.cwd().resolve(), file)
if os.path.isfile(path):
with open(path) as f:
content = yaml.load(f, Loader=SafeLoader)
return content
else:
return None
This is my test:
from unittest import mock, TestCase
from project import fs
class TextExamples(TestCase):
def test_read_yaml_from_cwd():
with mock.patch('os.listdir') as mocked_listdir:
mocked_listdir.return_value = ['test-config.yml']
val = fs.read_yaml_from_cwd("false-config.yml")
assert val == None
val2 = fs.read_yaml_from_cwd("false-config.yml")
assert val2 != None
I guess I am fundamentally doing something wrong with these tests and what these mocks do. Can somebody help me with this?
One possibility to test this is to patch both os.path.isfile and open. To patch open, there is already a special mock function, mock_open, which gives you the possibilty to set the contents of the mocked file. This means that you don't have to mock yaml.load, as this will return the mocked file content. This could look something like:
from unittest import mock, TestCase
from unittest.mock import mock_open
class YamlTest(TestCase):
#mock.patch("builtins.open", mock_open(read_data="data"))
#mock.patch("os.path.isfile")
def test_read_yaml_from_cwd(self, patched_isfile):
# valid file case
patched_isfile.return_value = True
result = read_yaml_from_cwd("some_file.yaml")
self.assertEqual("data", result)
# invalid file case
patched_isfile.return_value = False
result = read_yaml_from_cwd("some_file.yaml")
self.assertEqual(None, result)
In this case you test that the function returns the file content if you pass a valid file name, and None for an invalid file name, which is probably all you want to test here.
For completeness, and because I mentioned it in the comments: using pyfakefs instead would replace the file system with a fake file system, which you can handle like a real filesystem, in this case it could look like:
from pyfakefs import fake_filesystem_unittest
class YamlTest(fake_filesystem_unittest.TestCase):
def setUp(self) -> None:
self.setUpPyfakefs()
self.fs.create_file("some_file.yaml", contents="data")
def test_read_yaml_from_cwd(self):
# valid file case
result = read_yaml_from_cwd("some_file.yaml")
self.assertEqual("data", result)
# invalid file case
result = read_yaml_from_cwd("non_existing.yaml")
self.assertEqual(None, result)
This makes sense if you have many filesystem related tests, though in your case this would probably be overkill.
Disclaimer: I'm a contributor to pyfakefs.

Run pytest for each file in directory

I'm trying to build a routine that calls a Pytest class for each PDF document in current directoy... Let me explain
Lets say i have this test file
import pytest
class TestHeader:
#asserts...
class TestBody:
#asserts...
This script needs to test each pdf document in my cwd
Here is my best attempt:
import glob
import pytest
class TestHeader:
#asserts...
class TestBody:
#asserts...
filelist = glob.glob('*.pdf')
for file in filelist:
#magically call pytest for each file
How would i approach this?
EDIT: Complementing my question.
I have a huge function that extracts each document's data, lets call it extract_pdf
this function returns a tuple (header, body).
Current attempt looks like this:
import glob
import pytest
class TestHeader:
#asserts...
class TestBody:
#asserts...
filelist = glob.glob('*.pdf')
for file in filelist:
header, body = extract_pdf(file)
pytest.main(<pass header and body as args for pytest>)
I need to parse each document prior to testing. Can it be done this way?
The best way to do this through parameterization of the testcases dynamically..
This can be achieved using the pytest_generate_tests hook..
def pytest_generate_tests(metafunc):
filelist = glob.glob('*.pdf')
metafunc.parametrize("fileName", filelist )
NOTE: fileName should be one of the argument to your test function.
This will result in executing the testcase for each of the file in the directory and the testcase will be like
TestFunc[File1]
TestFunc[File2]
TestFunc[File3]
.
.
and so on..
This is expanding on the existing answer by #ArunKalirajaBaskaran.
The problem is that you have different test classes that want to use the same data, but you want to parse the data only once. If it is ok for you to read all data at once, you could read them into global variables and use these for parametrizing your tests:
def extract_data():
filenames = []
headers = []
bodies = []
for filename in glob.glob('*.pdf'):
header, body = extract_pdf(filename)
filenames.append(filename)
headers.append(header)
bodies.append(body)
return filenames, headers, bodies
filenames, headers, bodies = extract_data()
def pytest_generate_tests(metafunc):
if "header" in metafunc.fixturenames:
# use the filename as ID for better test names
metafunc.parametrize("header", headers, ids=filenames)
elif "body" in metafunc.fixturenames:
metafunc.parametrize("body", bodies, ids=filenames)
class TestHeader:
def test_1(header):
...
def test_2(header):
...
class TestBody:
def test_1(body):
...
This is the same as using
class TestHeader:
#pytest.mark.parametrize("header", headers, ids=filenames)
def test_1(header):
...
#pytest.mark.parametrize("header", headers, ids=filenames)
def test_2(header):
...
pytest_generate_tests just adds a bit of convenience so you don't have to repeat the parametrize decorator for each test.
The downside of this is of course that you will read in all of the data at once, which may cause a problem with memory usage if there is a lot of files. Your approach with pytest.main will not work, because that is the same as calling pytest on the command line with the given parameters. Parametrization can be done at the fixture level or on the test level (like here), but both need the parameters alreay evaluated at load time, so I don't see a possibility to do this lazily (apart from putting it all into one test). Maybe someone else has a better idea...

How to pass a value to a Pytest fixture

I am using Pytest to test an executable. This .exe file reads a configuration file on startup.
I have written a fixture to spawn this .exe file at the start of each test and closes it down at the end of the test. However, I cannot work out how to tell the fixture which configuration file to use. I want the fixture to copy a specified config file to a directory before spawning the .exe file.
#pytest.fixture
def session(request):
copy_config_file(specific_file) # how do I specify the file to use?
link = spawn_exe()
def fin():
close_down_exe()
return link
# needs to use config file foo.xml
def test_1(session):
session.talk_to_exe()
# needs to use config file bar.xml
def test_2(session):
session.talk_to_exe()
How do I tell the fixture to use foo.xml for test_1 function and bar.xml for test_2 function?
Thanks
John
One solution is to use pytest.mark for that:
import pytest
#pytest.fixture
def session(request):
m = request.node.get_closest_marker('session_config')
if m is None:
pytest.fail('please use "session_config" marker')
specific_file = m.args[0]
copy_config_file(specific_file)
link = spawn_exe()
yield link
close_down_exe(link)
#pytest.mark.session_config("foo.xml")
def test_1(session):
session.talk_to_exe()
#pytest.mark.session_config("bar.xml")
def test_2(session):
session.talk_to_exe()
Another approach would be to just change your session fixture slightly to delegate the creation of the link to the test function:
import pytest
#pytest.fixture
def session_factory(request):
links = []
def make_link(specific_file):
copy_config_file(specific_file)
link = spawn_exe()
links.append(link)
return link
yield make_link
for link in links:
close_down_exe(link)
def test_1(session_factory):
session = session_factory('foo.xml')
session.talk_to_exe()
def test_2(session):
session = session_factory('bar.xml')
session.talk_to_exe()
I prefer the latter as its simpler to understand and allows for more improvements later, for example, if you need to use #parametrize in a test based on the config value. Also notice the latter allows to spawn more than one executable in the same test.

Proper way to organize testcases that involve a data file for each testcase?

I'm writing a module that involves parsing html for data and creating an object from it. Basically, I want to create a set of testcases where each case is an html file paired with a golden/expected pickled object file.
As I make changes to the parser, I would like to run this test suite to ensure that each html page is parsed to equal the 'golden' file (essentially a regression suite)
I can see how to code this as a single test case, where I would load all file pairs from some directory and then iterate through them. But I believe this would end up being reported as a single test case, pass or fail. But I want a report that says, for example, 45/47 pages parsed successfully.
How do I arrange this?
I've done similar things with the unittest framework by writing a function which creates and returns a test class. This function can then take in whatever parameters you want and customise the test class accordingly. You can also customise the __doc__ attribute of the test function(s) to get customised messages when running the tests.
I quickly knocked up the following example code to illustrate this. Instead of doing any actual testing, it uses the random module to fail some tests for demonstration purposes. When created, the classes are inserted into the global namespace so that a call to unittest.main() will pick them up. Depending on how you run your tests, you may wish to do something different with the generated classes.
import os
import unittest
# Generate a test class for an individual file.
def make_test(filename):
class TestClass(unittest.TestCase):
def test_file(self):
# Do the actual testing here.
# parsed = do_my_parsing(filename)
# golden = load_golden(filename)
# self.assertEquals(parsed, golden, 'Parsing failed.')
# Randomly fail some tests.
import random
if not random.randint(0, 10):
self.assertEquals(0, 1, 'Parsing failed.')
# Set the docstring so we get nice test messages.
test_file.__doc__ = 'Test parsing of %s' % filename
return TestClass
# Create a single file test.
Test1 = make_test('file1.html')
# Create several tests from a list.
for i in range(2, 5):
globals()['Test%d' % i] = make_test('file%d.html' % i)
# Create them from a directory listing.
for dirname, subdirs, filenames in os.walk('tests'):
for f in filenames:
globals()['Test%s' % f] = make_test('%s/%s' % (dirname, f))
# If this file is being run, run all the tests.
if __name__ == '__main__':
unittest.main()
A sample run:
$ python tests.py -v
Test parsing of file1.html ... ok
Test parsing of file2.html ... ok
Test parsing of file3.html ... ok
Test parsing of file4.html ... ok
Test parsing of tests/file5.html ... ok
Test parsing of tests/file6.html ... FAIL
Test parsing of tests/file7.html ... ok
Test parsing of tests/file8.html ... ok
======================================================================
FAIL: Test parsing of tests/file6.html
----------------------------------------------------------------------
Traceback (most recent call last):
File "generic.py", line 16, in test_file
self.assertEquals(0, 1, 'Parsing failed.')
AssertionError: Parsing failed.
----------------------------------------------------------------------
Ran 8 tests in 0.004s
FAILED (failures=1)
The nose testing framework supports this. http://www.somethingaboutorange.com/mrl/projects/nose/
Also see here: How to generate dynamic (parametrized) unit tests in python?
Here's what I would do (untested):
files = os.listdir("/path/to/dir")
class SomeTests(unittest.TestCase):
def _compare_files(self, file_name):
with open('/path/to/dir/%s-golden' % file_name, 'r') as golden:
with open('/path/to/dir/%s-trial' % file_name, 'r') as trial:
assert golden.read() == trial.read()
def test_generator(file_name):
def test(self):
self._compare_files(file_name):
return test
if __name__ == '__main__':
for file_name in files:
test_name = 'test_%s' % file_name
test = test_generator(file_name)
setattr(SomeTests, test_name, test)
unittest.main()

Categories