How to mock a zip file - python

I want to mock a ZipFile. In particular, I need a mock
Which passes a zipfile.is_zipfile() test,
Returns a list of strings for zipfile.ZipFile().namelist(), and
Uses only the standard library.
The code I am testing looks for potential Python modules1 within a given zip archive (i.e. .py, .zip, and .whl files):
# utils.py
import zipfile
from pathlib import Path
def find_modules(archive=None):
"""Find modules within a given zip archive.
Inputs:
archive (str/Path): Zip archive
Returns:
list (str): List of module names as strings
"""
possible_ext = ['.py'. '.zip', '.whl']
modules = []
if zipfile.is_zipfile(archive):
paths = [Path(p) for p in zipfile.ZipFile(archive).namelist()]
modules = [p.stem for p in paths if p.stem != '__init__' and p.suffix in possible_ext]
return modules
Voodoo solution
I have cobbled together the following test:
# test_utils.py
from mypackage import utils
from unittest import mock
class TestFunctions():
MOCK_LISTING = ['single_file_module.py', 'dummy.txt',
'package_namespace.zip', 'wheel_namespace-0.1-py3-none-any.whl']
#mock.patch('zipfile.ZipFile')
#mock.patch('zipfile.is_zipfile')
def test_find_modules_return_value(self, mock_is_zipfile, mock_zipfile):
mock_is_zipfile.return_value = True
mock_zipfile.return_value.namelist.return_value = self.MOCK_LISTING
modules = utils.find_modules('dummy_archive.zip')
assert len(modules) == 3
def main():
"""Main function used to run tests manually.
Use PyTest to run tests in bulk.
"""
tc = TestFunctions()
tc.test_find_modules_return_value()
if __name__ == '__main__':
import time
start_time = time.time()
main()
print("\nThe chosen tests have all passed.")
print("--- %s seconds ---" % (time.time() - start_time))
Questions
I found that a #mock.path('zipfile.ZipFile') alone wouldn't meet my needs; it failed a zipfile.is_zipfile() test.
If I'm mocking a ZipFile object, shouldn't it automatically pass a zipfile.is_zipfile() test?
I found that I couldn't use the same approach to overriding is_zipfile as I did namelist. That is, an additional #mock.patch('zipfile.is_zipfile') was needed. My understanding is that because a ZipFile defines a context, the first return_value overrides the __enter__ of the context, and then the next namespace is the ZipFile method level. Why doesn't the same approach work for both is_zipfile and namelist?
# Test doesn't work
# Fails on: assert 0 == 3
# + where 0 = len([])
#mock.patch('zipfile.ZipFile')
def test_find_modules_return_value(self, mock_zipfile):
mock_zipfile.return_value.is_zipfile.return_value = True
mock_zipfile.return_value.namelist.return_value = self.MOCK_LISTING
modules = utils.find_modules('dummy_archive.zip')
assert len(modules) == 3
Maybe I'm getting too far off-base and there's a simpler way to mock a .zip archive?
EDIT
Based on #Don Kirby's answer, the pattern I found most intuitive was:
def test_find_modules_return_value(self):
# Create mock zipfile and override the is_zipfile function
with mock.patch('mypackage.utils.zipfile') as mock_zipfile:
mock_zipfile.is_zipfile.return_value = True
mock_zipfile.namelist.return_value = self.MOCK_LISTING
# Since a ZipFile is a separate object, which returns a zipfile (note
# that that's lowercase), we need to mock the ZipFile and have it return
# the zipfile mock previously created.
with mock_patch('mypackage.utils.zipfile.ZipFile') as mock_ZipFile:
mock_ZipFile.return_value = mock_zipfile
modules = utils.find_modules("/dummy/path/to/check.zip")
assert len(modules) == 3
1 It's assumed that .zip files may contain modules and that .zip and .whl will be handled in a different process. The file names are all we care about in this step.

You have to patch is_zipfile() separately from ZipFile, because is_zipfile() is a function, not a method of the ZipFile class. I suppose you might be able to patch the whole zipfile module by patching mypackage.utils.zipfile, but that seems way more confusing.
The zipfile source code might be useful.

Related

Python: always import the last revision in the directory

Imagine that we have the following Data Base structure with the data stored in python files ready to be imported:
data_base/
foo_data/
rev_1.py
rev_2.py
bar_data/
rev_1.py
rev_2.py
rev_3.py
In my main script, I would like to import the last revision of the data available in the folder. For example, instead of doing this:
from data_base.foo_data.rev_2 import foofoo
from data_base.bar_data.rev_3 import barbar
I want to call a method:
import_from_db(path='data_base.foo_data', attr='foofoo', rev='last')
import_from_db(path='data_base.bar_data', attr='barbar', rev='last')
I could take a relative path to the Data Base and use glob.glob to search the last revision, but for this, I should know the path to the data_base folder, which complicates things (imagine that the parent folder of the data_base is in sys.path so the from data_base.*** import will work)
Is there an efficient way to maybe retrieve a full path knowing only part of it (data_base.foo_data)? Other ideas?
I think it's better to install the last version.
but going on with your flow, you may use getattr on the module:
from data_base import foo_data
i = 0
while True:
try:
your_module = getattr(foo_data, f'rev_{i}')
except AttributeError:
break
i += 1
# Now your_module is the latest rev
#JohnDoriaN 's idea led me to a quite simple solution:
import os, glob
def import_from_db(import_path, attr, rev_id=None):
"""
"""
# Get all the modules/folders names
dir_list = import_path.split('.')
# Import the last module
exec(f"from {'.'.join(dir_list[:-1])} import {dir_list[-1]}")
db_parent = locals()[dir_list[-1]]
# Get an absolute path to corresponding to the db_parent folder
abs_path = db_parent.__path__._path[0]
rev_path = os.path.join(abs_path, 'rev_*.py')
rev_names = [os.path.basename(x) for x in glob.glob(rev_path)]
if rev_id is None:
revision = rev_names[-1]
else:
revision = rev_names[rev_id]
revision = revision.split('.')[0]
# import attribute
exec(f'from {import_path}.{revision} import {attr}', globals())
Some explanations:
Apparently (I didn't know this), we can import a folder as a module; this module has a __path__ attribute (found out using the built-in dir method).
glob.glob allows us to use regex expressions to search for a required pattern for files in the directory.
using exec without parameters will import only in the local namespace (namespace of the method) so without polluting the global namespace.
using exec with globals() allows us to import in the global namespace.

How do I use tmpdir with my pytest.fixture?

I have a unit tests class that is testing what is inside a txt file. I am using the tmpdir fixture with pytest. This is my current class:
from objects.TicketCounter import TicketCounter
from objects.ConfigReader import ConfigReader
import os
import pytest
class TestTicketCounter():
# #pytest.fixture(scope="module") #<---Could I use this instead of passing tmpdir each time?
# def my_filepath(self, tmpdir):
# return tmpdir.mkdir("sub").join("testCurrentTicketCount.txt")
def test_createNewTicketCountFile(self, tmpdir):
x = tmpdir.mkdir("sub").join("testCurrentTicketCount.txt") #<----Repeated
ticketCounter = TicketCounter(x)
assert os.path.getsize(x) > 0
def test_addOneTicketCounter(self, tmpdir):
x = tmpdir.mkdir("sub").join("testCurrentTicketCount.txt") #<----Repeated
ticketCounter = TicketCounter(x)
beforeCount = int(ticketCounter.readTicketCountFromFile())
ticketCounter.addOneTicketCounter()
afterCount = int(ticketCounter.readTicketCountFromFile())
assert beforeCount + 1 == afterCount
def test_readTicketCountFromFile(self, tmpdir):
x = tmpdir.mkdir("sub").join("testCurrentTicketCount.txt") #<----Repeated
ticketCounter = TicketCounter(x)
print(ticketCounter.readTicketCountFromFile())
assert int(ticketCounter.readTicketCountFromFile()) >= 0
I would like to get rid of the repeated code and pass in the same path each time with the fixture that I commented out, my_filepath. When I try to use the my_parser pytest fixture, I am getting an error, saying:
ScopeMismatch: You tried to access the 'function' scoped fixture 'tmpdir' with a 'module' scoped request object, involved factories
unit_tests\test_TicketCounter.py:12:
So you are not able to use tmpdir with a pytest fixture? Is it because tmpdir is a fixture?
Any thoughts on how I could cut out the repeated code and use a function or fixture to pass the path?
As the error message says, tmpdir is a function based fixture, e.g. it creates a new temp dir for each test, and deletes it after the test. Therefore you cannot use it in a module scoped fixture, that is instantiated only once after module load. If you could do that, your temp dir would be removed after the first test, and you would not be able to access it in the next test.
In your current code, the tmpdir fixture is used as a function scoped fixture, so a new directory is created for each test - what is usually wanted. You can use your fixture without problems if you remove the module scope:
#pytest.fixture
def my_filepath(self, tmpdir):
return tmpdir.mkdir("sub").join("testCurrentTicketCount.txt")
If you want to use the same temp dir in each test for some reason, you cannot use the tmpdir fixture. In this case you just can create your own tmp dir, for example:
import os
import tempfile
import shutil
#pytest.fixture(scope="module")
def my_filepath(self):
tmpdir = tempfile.mkdtemp()
subdir = os.path.join(tmpdir, "sub")
os.mkdir(subdir)
yield os.path.join(subdir, "testCurrentTicketCount.txt")
shutil.rmtree(tmpdir)

Patching a function in a file where it is defined

I am trying to learn unittest patching. I have a single file that both defines a function, then later uses that function. When I try to patch this function, its return value is giving me the real return value, not the patched return value.
How do I patch a function that is both defined and used in the same file? Note: I did try to follow the advice given here, but it didn't seem to solve my problem.
walk_dir.py
from os.path import dirname, join
from os import walk
from json import load
def get_config():
current_path =dirname(__file__)
with open(join(current_path, 'config', 'json', 'folder.json')) as json_file:
json_data = load(json_file)
return json_data['parent_dir']
def get_all_folders():
dir_to_walk = get_config()
for root, dir, _ in walk(dir_to_walk):
return [join(root, name) for name in dir]
test_walk_dir.py
from hello_world.walk_dir import get_all_folders
from unittest.mock import patch
#patch('walk_dir.get_config')
def test_get_all_folders(mock_get_config):
mock_get_config.return_value = 'C:\\temp\\test\\'
result = get_all_folders()
assert set(result) == set('C:\\temp\\test\\test_walk_dir')
Try declaring the patch in such way:
#patch('hello_world.walk_dir.get_config')
As you can see this answer to the question you linked, it's recommended that your import statements match your patch statements. In your case from hello_world.walk_dir import get_all_folders and #patch('walk_dir.get_config') doesn't match.

How to design a command line interface (CLI) which accepts Python functions or code?

I have a function that parses a given string with specific rules. I would like to design a CLI interface for this function. But the problem is I want that a user should be able to call this function via CLI using a READER & WRITER function of its own. To make it clear, here is a sample code and a demonstration of what I'm trying to explain.
# mylib.py
# piece of code that belongs to my lib
def parser(_id, text):
# parse the text & do some magic
return (_id, parsed_text)
# user-side code
def reader():
# read from a database
# or file or network or who knows where
yield (_id, text)
# user-side code
def writer(_id, text):
# write to somewhere
return True # or false depends on write action
A sample call should be something like this:
$ python mylib.py --reader <something-that-I-dont-know>
I don't want to use eval tricks but also I want that the user should be flexible while passing data to my library. Does this possible? Or should I try another approach?
With the help of #AlexHall, I've come up with the following solution:
import pathlib
import importlib.util
def load_module(filepath):
module_path = pathlib.Path(filepath)
abs_path = module_path.resolve()
module_name = module_path.stem
spec = importlib.util.spec_from_file_location(module_name, abs_path)
module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(module)
return module
Using this function, I am be able to import any valid python module exists in the filesystem even if the module is not in the path.
Here is a sample usage:
parser = make_parser(prog="tokenizer")
args = parser.parse_args()
module = load_module(args.writer) # if nothing is passed, default action defined in the parser
writer = module.writer
module = load_module(args.reader)
reader = module.reader
# do what you want to do with them

Dynamic importing of modules followed by instantiation of objects with a certain baseclass from said modules

I'm writing an application. No fancy GUI:s or anything, just a plain old console application. This application, lets call it App, needs to be able to load plugins on startup. So, naturally, i created a class for the plugins to inherit from:
class PluginBase(object):
def on_load(self):
pass
def on_unload(self):
pass
def do_work(self, data):
pass
The idea being that on startup, App would walk through the current dir, including subdirs, searching for modules containing classes that themselves are subclasses of PluginBase.
More code:
class PluginLoader(object):
def __init__(self, path, cls):
""" path=path to search (unused atm), cls=baseclass """
self.path=path
def search(self):
for root, dirs, files in os.walk('.'):
candidates = [fname for fname in files if fname.endswith('.py') \
and not fname.startswith('__')]
## this only works if the modules happen to be in the current working dir
## that is not important now, i'll fix that later
if candidates:
basename = os.path.split(os.getcwd())[1]
for c in candidates:
modname = os.path.splitext(c)[0]
modname = '{0}.{1}'.format(basename, modname)
__import__(mod)
module = sys.modules[mod]
After that last line in search I'd like to somehow a) find all classes in the newly loaded module, b) check if one or more of those classes are subclasses of PluginBase and c) (if b) instantiate that/those classes and add to App's list of loaded modules.
I've tried various combinations of issubclass and others, followed by a period of intense dir:ing and about an hour of panicked googling. I did find a similar approach to mine here and I tried just copy-pasting that but got an error saying that Python doesn't support imports by filename, at which point I kind of lost my concentration and as a result of that, this post was written.
I'm at my wits end here, all help appreciated.
You might do something like this:
for c in candidates:
modname = os.path.splitext(c)[0]
try:
module=__import__(modname) #<-- You can get the module this way
except (ImportError,NotImplementedError):
continue
for cls in dir(module): #<-- Loop over all objects in the module's namespace
cls=getattr(module,cls)
if (inspect.isclass(cls) # Make sure it is a class
and inspect.getmodule(cls)==module # Make sure it was defined in module, not just imported
and issubclass(cls,base)): # Make sure it is a subclass of base
# print('found in {f}: {c}'.format(f=module.__name__,c=cls))
classList.append(cls)
To test the above, I had to modify your code a bit; below is the full script.
import sys
import inspect
import os
class PluginBase(object): pass
def search(base):
for root, dirs, files in os.walk('.'):
candidates = [fname for fname in files if fname.endswith('.py')
and not fname.startswith('__')]
classList=[]
if candidates:
for c in candidates:
modname = os.path.splitext(c)[0]
try:
module=__import__(modname)
except (ImportError,NotImplementedError):
continue
for cls in dir(module):
cls=getattr(module,cls)
if (inspect.isclass(cls)
and inspect.getmodule(cls)==module
and issubclass(cls,base)):
# print('found in {f}: {c}'.format(f=module.__name__,c=cls))
classList.append(cls)
print(classList)
search(PluginBase)
You would make this a lot easier if you forced some constraints on the plugin writer, for example that all plugins must be packages that contain a load_plugin( app, config) function that returns a Plugin instance. Then all you have to do is try to import these packages and run the function.
Here is a meta-classier way to register the plugins:
Define PluginBase to be of type PluginType.
PluginType automatically registers any instance (class) in the plugins set.
plugin.py:
plugins=set()
class PluginType(type):
def __init__(cls, name, bases, attrs):
super(PluginType, cls).__init__(name, bases, attrs)
# print(cls, name,cls.__module__)
plugins.add(cls)
class PluginBase(object):
__metaclass__=PluginType
pass
This is the part that the user writes. Notice that there is nothing special here.
pluginDir/myplugin.py:
import plugin
class Foo(plugin.PluginBase):
pass
Here is what the search function might look like:
test.py:
import plugin
import os
import imp
def search(plugindir):
for root, dirs, files in os.walk(plugindir):
for fname in files:
modname = os.path.splitext(fname)[0]
try:
module=imp.load_source(modname,os.path.join(root,fname))
except Exception: continue
search('pluginDir')
print(plugin.plugins)
Running test.py yields
set([<class 'myplugin.Foo'>])
Could you use execfile() instead of import with a specified namespace dict, then iterate over that namespace with issubclass, etc?

Categories