Mocking open(file_name) in unit tests [duplicate] - python

This question already has answers here:
How do I mock the filesystem in Python unit tests?
(5 answers)
Closed 1 year ago.
I have a source code that opens a csv file and sets up a header to
value association. The source code is given below:
def ParseCsvFile(source):
"""Parse the csv file.
Args:
source: file to be parsed
Returns: the list of dictionary entities; each dictionary contains
attribute to value mapping or its equivalent.
"""
global rack_file
rack_type_file = None
try:
rack_file = source
rack_type_file = open(rack_file) # Need to mock this line.
headers = rack_type_file.readline().split(',')
length = len(headers)
reader = csv.reader(rack_type_file, delimiter=',')
attributes_list=[] # list of dictionaries.
for line in reader:
# More process to happeng. Converting the rack name to sequence.
attributes_list.append(dict((headers[i],
line[i]) for i in range(length)))
return attributes_list
except IOError, (errno, strerror):
logging.error("I/O error(%s): %s" % (errno, strerror))
except IndexError, (errno, strerror):
logging.error('Index Error(%s), %s' %(errno, strerror))
finally:
rack_type_file.close()
I am trying to mock the following statement
rack_type_file = open(rack_file)
How do I mock open(...) function?

This is admittedly an old question, hence some of the answers are outdated.
In the current version of the mock library there is a convenience function designed for precisely this purpose. Here's how it works:
>>> from mock import mock_open
>>> m = mock_open()
>>> with patch('__main__.open', m, create=True):
... with open('foo', 'w') as h:
... h.write('some stuff')
...
>>> m.mock_calls
[call('foo', 'w'),
call().__enter__(),
call().write('some stuff'),
call().__exit__(None, None, None)]
>>> m.assert_called_once_with('foo', 'w')
>>> handle = m()
>>> handle.write.assert_called_once_with('some stuff')
Documentation is here.

To mock built-in function open with mox use __builtin__ module:
import __builtin__ # unlike __builtins__ this must be imported
m = mox.Mox()
m.StubOutWithMock(__builtin__, 'open')
open('ftphelp.yml', 'rb').AndReturn(StringIO("fake file content"))
m.ReplayAll()
# call the code you want to test that calls `open`
m.VerifyAll()
m.UnsetStubs()
Note that __builtins__ is not always a module, it can be of type dict, please use __builtin__ (with no "s") module to refer to system built-in methods.
More about __builtin__ module: http://docs.python.org/library/builtin.html

There are two ways that I like to do this, depending on the situation.
If your unit test is going to call ParseCsvFile directly I would add a new kwarg to ParseCsvFile:
def ParseCsvFile(source, open=open):
# ...
rack_type_file = open(rack_file) # Need to mock this line.
Then your unit test can pass a different open_func in order to accomplish the mocking.
If your unit test calls some other function that in turn calls ParseCsvFile then passing around open_func just for tests is ugly. In that case I would use the mock module. This lets you alter a function by name and replace it with a Mock object.
# code.py
def open_func(name):
return open(name)
def ParseCsvFile(source):
# ...
rack_type_file = open_func(rack_file) # Need to mock this line.
# test.py
import unittest
import mock
from StringIO import StringIO
#mock.patch('code.open_func')
class ParseCsvTest(unittest.TestCase):
def test_parse(self, open_mock):
open_mock.return_value = StringIO("my,example,input")
# ...

Is simple with decorator (Python3):
def my_method():
with open(file="/1.txt", mode='r', encoding='utf-8') as file:
return file.read().strip()
#mock.patch("builtins.open", create=True)
def test_my_method(mock_open):
mock_open.side_effect = [
mock.mock_open(read_data="A").return_value
]
resA = my_method()
assert resA == "A"
mock_open.mock_calls == [mock.call(file="/1.txt", mode='r', encoding='utf-8')]

I took the liberty of re-writing your sample function:
Assume your function is located in a file named code.py
# code.py
import csv
import logging
def ParseCsvFile(source):
"""Parse the csv file.
Args:
source: file to be parsed
Returns: the list of dictionary entities; each dictionary contains
attribute to value mapping or its equivalent.
"""
global rack_file
rack_file = source
attributes_list = []
try:
rack_type_file = open(rack_file)
except IOError, (errno, strerror):
logging.error("I/O error(%s): %s", errno, strerror)
else:
reader = csv.DictReader(rack_type_file, delimiter=',')
attributes_list = [line for line in reader] # list of dictionaries
rack_type_file.close()
return attributes_list
A simple test case would be:
# your test file
import __builtin__
import unittest
import contextlib
from StringIO import StringIO
import mox
import code
#contextlib.contextmanager
def mox_replayer(mox_instance):
mox_instance.ReplayAll()
yield
mox_instance.VerifyAll()
class TestParseCSVFile(unittest.TestCase):
def setUp(self):
self.mox = mox.Mox()
def tearDown(self):
self.mox.UnsetStubs()
def test_parse_csv_file_returns_list_of_dicts(self):
TEST_FILE_NAME = 'foo.csv'
self.mox.StubOutWithMock(__builtin__, 'open')
open(TEST_FILE_NAME).AndReturn(StringIO("name,age\nfoo,13"))
with mox_replayer(self.mox):
result = code.ParseCsvFile(TEST_FILE_NAME)
self.assertEqual(result, [{'age': '13', 'name': 'foo'}]) # works!
if __name__ == '__main__':
unittest.main()
EDIT:
% /usr/bin/python2.6
Python 2.6.1 (r261:67515, Jun 24 2010, 21:47:49)
[GCC 4.2.1 (Apple Inc. build 5646)] on darwin
Type "help", "copyright", "credits" or "license" for more information.
>>> import __builtin__
>>> import mox
>>> mock = mox.Mox()
>>> mock.StubOutWithMock(__builtin__, 'open')
>>> mock.UnsetStubs()
Works fine on 2.6 using mox 0.53

Hi I was having a similar problem, and was tearing my hair out flipping between different mocking libraries. I finally found a solution that I am happy with, and maybe it might help you? In the end I went with the Mocker library http://labix.org/mocker and here is the code for mocking open:
from mocker import Mocker
from StringIO import StringIO
import __builtin__
mocker = Mocker()
sourceFile = 'myTestFile.txt'
__builtin__.open = mocker.mock()
__builtin__.open(sourceFile)
mocker.result(StringIO('this,is,a,test,file'))
<the rest of your test setup goes here>
mocker.replay()
ParseCsvFile(sourceFile)
mocker.restore()
mocker.verify()
Incidentaly the reason I went with Mocker is because I was testing a function which used open to read a file, and then used open again to overwrite the same file with new data. What I needed to be able to do was test the case where the initial file didn't exist, so set up a mock, that threw an IOError the first time, and then worked the second time. The setup for which looked like this:
from mocker import Mocker
import __builtin__
mocker = Mocker()
mockFileObject = mocker.mock()
__builtin__.open = mocker.mock()
__builtin__.open('previousState.pkl', 'r')
mocker.throw(IOError('Boom'))
__builtin__.open('previousState.pkl','w')
mocker.result(mockFileObject)
<rest of test setup >
mocker.replay()
<test>
mocker.restore() #required to restore the open method
mocker.verify()
Hope this helps!

#mock.patch decorator (2.7 example)
This is now much easier:
import your_script.py
import __builtin__
import mock
#mock.patch("__builtin__.open")
def test_example(self, mock_open):
your_script.your_method()
self.assertEqual(mock_open.call_count, 1)

>>> class A(object):
... def __init__(self):
... self.x = open('test.py')
...
>>> old_open = open
>>> def open(s):
... return "test\n"
...
>>> a = A()
>>> a.x
'test\n'
>>> open = old_open
>>> a = A()
>>> a.x
<open file 'test.py', mode 'r' at 0xb7736230>

Related

How to write python unit test for the print statement?

I am new to writing python unit tests, Please help me to write test cases for the below function which is having only print statements instead of return statements.
import os
from pwd import getpwuid
from grp import getgrgid
import time
def info(self):
if self.f: // if -f flag pass with file names then it will be trigger
for i in self.f:
owner = getpwuid(os.stat(i).st_uid).pw_name
group = getgrgid(os.stat(i).st_gid).gr_name
per = oct(os.stat(i).st_mode)[-3:]
t = os.stat(i).st_mtime
print("{} is owned by: {} , group by {} with "
"permission {} and last modified on {}"
.format(i, owner, group, per, time.ctime(t)))
You can use contextlib's redirect_stdout
import io
import unittest
from contextlib import redirect_stdout
import lib_containing_info
class TestInfo():
def test_info(self)
f = io.StringIO()
with redirect_stdout(f):
lib_containing_info.info() # function being tested
self.assertRegex(f.getvalue(), r"is owned by: ")

pytest-mock pathlib.Path.open

I need to mock pathlib.Path.open using pytest-mock.
The real open_func opens a yaml-file. The return value is a regular dict. How can I mock Path.open to just load another yaml-file called test-config.yaml?
My code is not working properly as conf will simply become a str ("test_config.yaml"). It should be a dict.
from pathlib import Path
import yaml
def open_func():
with Path.open(Path("./config.yaml")) as f:
return yaml.load(f, Loader=yaml.FullLoader)
def test_open_func(mocker):
mocker.patch("pathlib.Path.open", mocker.mock_open(read_data="test_config.yaml"))
conf = open_func()
assert isinstance(conf, dict)
EDIT:
To get closer to my real world problem, I am providing the following code. I have a class TryToMock, that basically takes two files as inputs. The method load_files simply loads these files (which are actually .yaml files) and returns the output. These .yaml files are really some configuration files.
In my unit tests, I will be calling TryToMocknumerous times through pytest's parametrize. Therefore, I would like to load the original configuration files via a fixture. Then I am able to monkeypatch some entries in my various tests before running load_files.
In order not to load the original files again, I need to mock the Path.open function in TryToMock. I would like to pass the monkeypatched yaml files instead (i.e. in the form of a dict). The difficulty is that I must discriminate between the two files. That is I can't simply mock the Path.open function with the same file content.
# TryToMock.py
from pathlib import Path
import yaml
# In my current working folder, I have to .yaml files containing the following
# content for illustrative purpose:
#
# file1.yaml = {'name': 'test1', 'file_type': 'yaml'}
# file2.yaml = {'schema': 'test2', 'currencies': ['EUR', 'USD', 'JPY']}
class TryToMock:
def __init__(self, file_to_mock_1, file_to_mock_2):
self._file_to_mock_1 = file_to_mock_1
self._file_to_mock_2 = file_to_mock_2
def load_files(self):
with Path.open(self._file_to_mock_1) as f:
file1 = yaml.load(f, Loader=yaml.FullLoader)
with Path.open(self._file_to_mock_2) as f:
file2 = yaml.load(f, Loader=yaml.FullLoader)
return file1, file2
# test_TryToMock.py
import os
from pathlib import Path
import pytest
import yaml
from tests import TryToMock
def yaml_files_for_test(yaml_content):
names = {"file1.yaml": file1_content, "file2.yaml": file2_content}
return os.path.join("./", names[os.path.basename(yaml_content)])
#pytest.fixture(scope="module")
def file1_content():
with Path.open(Path("./file1.yaml")) as f:
return yaml.load(f, Loader=yaml.FullLoader)
#pytest.fixture(scope="module")
def file2_content():
with Path.open(Path("./file2.yaml")) as f:
return yaml.load(f, Loader=yaml.FullLoader)
def test_try_to_mock(file1_content, file2_content, monkeypatch, mocker):
file_1 = Path("./file1.yaml")
file_2 = Path("./file2.yaml")
m = TryToMock.TryToMock(file_to_mock_1=file_1, file_to_mock_2=file_2)
# Change some items
monkeypatch.setitem(file1_content, "file_type", "json")
# Mocking - How does it work when I would like to use mock_open???
# How should the lambda function look like?
mocker.patch(
"pathlib.Path.open",
lambda x: mocker.mock_open(read_data=yaml_files_for_test(x)),
)
files = m.load_files()
assert files[0]["file_type"] == "json"
You have to provide the actual file contents to the read_data argument of mock_open. You can just create the data in your test:
test_yaml = """
foo:
bar:
- VAR: "MyVar"
"""
def test_open_func(mocker):
mocker.patch("pathlib.Path.open", mocker.mock_open(read_data=test_yaml))
conf = open_func()
assert conf == {'foo': {'bar': [{'VAR': 'MyVar'}]}}
Or you can read the data from your test file:
def test_open_func(mocker):
with open("my_fixture_path/test.yaml") as f:
contents = f.read()
mocker.patch("pathlib.Path.open", mocker.mock_open(read_data=contents))
conf = open_func()
assert isinstance(conf, dict)
The last case can be also re-written to replace the path argument in the open call by your test path:
def test_open_func(mocker):
mocker.patch("pathlib.Path.open", lambda path: open("test.yaml"))
conf = open_func()
assert isinstance(conf, dict)
or, if you have different test files for different configs, something like:
def yaml_path_for_test(yaml_path):
names = {
"config.yaml": "test.yaml",
...
}
return os.path.join(my_fixture_path, names[os.path.basename(yaml_path)])
def test_open_func3(mocker):
mocker.patch("pathlib.Path.open", lambda path: open(yaml_path_for_test(path)))
conf = open_func()
assert isinstance(conf, dict)
This is probably what you wanted to achieve in your test code.
UPDATE:
This is related to the second part of the question (after the edit). If you have the module-scoped fixtures that preload the fixture files as in the question, you can do something like this:
def test_open_func(mocker, file1_content, file2_content):
def yaml_files_for_test(path):
contents = {"file1.yaml": file1_content,
"file2.yaml": file2_content}
data = contents[os.path.basename(path)]
mock = mocker.mock_open(read_data=yaml.dump(data))
return mock.return_value
mocker.patch("pathlib.Path.open", yaml_files_for_test)
conf = open_func()
assert isinstance(conf, dict)
or, if you prefer not to use nested functions:
def yaml_files_for_test(path, mocker, content1, content2):
contents = {"file1.yaml": content1,
"file2.yaml": content2}
data = contents[os.path.basename(path)]
mock = mocker.mock_open(read_data=yaml.dump(data))
return mock.return_value
def test_open_func5(mocker, file1_content, file2_content):
mocker.patch("pathlib.Path.open",
lambda path: yaml_files_for_test(path, mocker,
file2_content, file2_content))
conf = open_func()
assert isinstance(conf, dict)

PyTest setup or teardown a variable

How can I re-instantiate a variable for each of my PyTests?
Specifically, I want to create a new StringIO() object each time.
My current code is this:
output = StringIO()
def info_to_string(text):
output.write(text)
def write_to_file(some_text):
for row in some_text:
info_to_string(row)
I need output to be set up each time there is a new test fixture.
Copy and pastable code to test:
from io import StringIO
import pytest
output = StringIO()
def info_to_string(text):
output.write(text)
def write_to_file(some_text):
for row in some_text:
info_to_string(row)
def test_1():
write_to_file(['hello', 'there', 'what', 'is', 'up'])
print(output)
assert output.getvalue() == "hellotherewhatisup"
def test_2():
write_to_file(['nothing', 'much'])
assert output.getvalue() == "nothingmuch"
#This will error as the output is "hellotherewhatisupnothingmuch" if run after test_1
So I would need a new output = stringIO() for each test.
Incase anyone sees this in the future, the way I did this was by creating a class, re-initialising it each fixture
class WriteToString:
def __init__(self):
self.output = StringIO()
def info_to_string(self, text):
self.output.write(text)
#pytest.fixture
def write_to_string():
return WriteToString()
and changing the tests to:
def test_2(write_to_string):
write_to_file(['nothing', 'much'])
assert write_to_string.output.getvalue() == "nothingmuch"
I'm not sure I quite understand your questions, but you can do something like below, which will create a new StringIO instance every time you pass it into your test function. If you want to return a different string every time, I don't think you're looking for a fixture, but just a generic function call that does the work for you.
import pytest
from StringIO import StringIO
#pytest.fixture(scope='function')
def info_to_string():
output = StringIO()
output.write('blah')
return output
def test_something(info_to_string):
assert isinstance(info_to_string, StringIO)
assert info_to_string.getvalue() == 'blah'
I think the easiest way to test file writing operation is to use tempfile.
But once StringIO is mentioned as a part of testing strategy my suggestion would be to split file writing into two parts. This gives a clean entry point for StringIO buffer.
from pathlib import Path
from io import StringIO
import pytest
# Separate your data export to 2 functions, one operating
# with a filename, other with a file object.
def to_file(filename, content):
path = Path(filename)
if not path.exists():
path.touch()
with path.open('a') as f_obj:
write_to_file(f_obj, content)
# This is code under test. It must not be connected to your
# test code in any way, even if you have some version of "monkey-patching"
# in mind.
def write_to_file(file_object, content):
"""Dump *content* list of strings to *filename* with *writer*."""
for x in content:
file_object.write(str(x))
# This is test with StringIO buffer
def test_write_to_file_with_buffer():
buffer = StringIO()
# note you make several calls to write_to_file in one test, not across tests
# this helps keep tests isolated/independent
write_to_file(buffer, ['hello', 'there', 'what', 'is', 'up'])
write_to_file(buffer, ['nothing', 'much'])
assert buffer.getvalue() == 'hellotherewhatisupnothingmuch'

How to fix mock_open differences in calls but not in end result

Using mock_open, I can capture the data from writes using the with [...] as construct. However, testing that what I have is correct is a little tricky. For example, I can do this:
>>> from mock import mock_open
>>> m = mock_open()
>>> with patch('__main__.open', m, create=True):
... with open('foo', 'w') as h:
... h.write('some stuff')
...
>>> m.mock_calls
[call('foo', 'w'),
call().__enter__(),
call().write('some stuff'),
call().__exit__(None, None, None)]
>>> m.assert_called_once_with('foo', 'w')
>>> handle = m()
>>> handle.write.assert_called_once_with('some stuff')
But I want to do compare what I think should have been written to what was. In effect something like this:
>>> expected = 'some stuff'
>>> assert(expected == m.all_that_was_written)
The problem I am facing with call is that different versions of json (2.0.9 vs 1.9) seem to print things differently. No, I cannot just update to the latest json.
The actual error I am getting is this:
E AssertionError: [call('Tool_000.json', 'w'),
call().__enter__(),
call().write('['),
call().write('\n '),
call().write('"1.0.0"'),
call().write(', \n '),
call().write('"2014-02-27 08:58:02"'),
call().write(', \n '),
call().write('"ook"'),
call().write('\n'),
call().write(']'),
call().__exit__(None, None, None)]
!=
[call('Tool_000.json', 'w'),
call().__enter__(),
call().write('[\n "1.0.0"'),
call().write(', \n "2014-02-27 08:58:02"'),
call().write(', \n "ook"'),
call().write('\n'),
call().write(']'),
call().__exit__(None, None, None)]
In effects, the calls are different but the end result is the same.
The code I am testing is fairly simple:
with open(get_new_file_name(), 'w') as fp:
json.dump(lst, fp)
So, creating another method that passes the file pointer seems overkill.
You can patch open() to return StringIO object and then check the contents.
with mock.patch('module_under_test.open', create=True) as mock_open:
stream = io.StringIO()
# patching to make getvalue() work after close() or __exit__()
stream.close = mock.Mock(return_value=None)
mock_open.return_value = stream
module_under_test.do_something() # this calls open()
contents = stream.getvalue()
assert(contents == expected)
Edit: added patch for stream.close to avoid exception on stream.getvalue().
mock_open is not fully featured yet. It works well if you are mocking files to be read but it does not yet have enough features for testing written files. The question clearly shows this deficiency.
My solution is to not use mock_open if you are testing the written content. Here is the alternative:
import six
import mock
import unittest
class GenTest(unittest.TestCase):
def test_open_mock(self):
io = six.BytesIO()
io_mock = mock.MagicMock(wraps=io)
io_mock.__enter__.return_value = io_mock
io_mock.close = mock.Mock() # optional
with mock.patch.object(six.moves.builtins, 'open', create=True, return_value=io_mock):
# test using with
with open('foo', 'w') as h:
expected = 'some stuff'
h.write(expected)
self.assertEquals(expected, io.getvalue())
# test using file handle directly
io.seek(0); io.truncate() # reset io
expected = 'other stuff'
open('bar', 'w').write(expected)
self.assertEquals(expected, io.getvalue())
# test getvalue after close
io.seek(0); io.truncate() # reset io
expected = 'closing stuff'
f = open('baz', 'w')
f.write(expected)
f.close()
self.assertEquals(expected, io.getvalue())
if __name__ == '__main__':
unittest.main()
Here's what I will do, write a method that returns the complete string from all calls of the write method.
class FileIOTestCase(unittest.TestCase):
""" For testing code involving file io operations """
def setUp(self):
""" patches the open function with a mock, to be undone after test. """
self.mo = mock_open()
patcher = patch("builtins.open", self.mo)
patcher.start()
self.addCleanup(patcher.stop)
def get_written_string(self):
return ''.join(c[0][0] for c in self.mo.return_value.write.call_args_list)
An example of how to use it
class TestWriteFile(FileIOTestCase):
def test_write_file__csv(self):
save.write_file("a,b\n1,2", "directory", "C6L")
self.mo.assert_called_once_with(os.path.join("directory", "C6L.csv"), 'w')
self.assertEqual(self.get_written_string(), "a,b\n1,2")

Return a list of imported Python modules used in a script?

I am writing a program that categorizes a list of Python files by which modules they import. As such I need to scan the collection of .py files ad return a list of which modules they import. As an example, if one of the files I import has the following lines:
import os
import sys, gtk
I would like it to return:
["os", "sys", "gtk"]
I played with modulefinder and wrote:
from modulefinder import ModuleFinder
finder = ModuleFinder()
finder.run_script('testscript.py')
print 'Loaded modules:'
for name, mod in finder.modules.iteritems():
print '%s ' % name,
but this returns more than just the modules used in the script. As an example in a script which merely has:
import os
print os.getenv('USERNAME')
The modules returned from the ModuleFinder script return:
tokenize heapq __future__ copy_reg sre_compile _collections cStringIO _sre functools random cPickle __builtin__ subprocess cmd gc __main__ operator array select _heapq _threading_local abc _bisect posixpath _random os2emxpath tempfile errno pprint binascii token sre_constants re _abcoll collections ntpath threading opcode _struct _warnings math shlex fcntl genericpath stat string warnings UserDict inspect repr struct sys pwd imp getopt readline copy bdb types strop _functools keyword thread StringIO bisect pickle signal traceback difflib marshal linecache itertools dummy_thread posix doctest unittest time sre_parse os pdb dis
...whereas I just want it to return 'os', as that was the module used in the script.
Can anyone help me achieve this?
UPDATE: I just want to clarify that I would like to do this without running the Python file being analyzed, and just scanning the code.
IMO the best way todo this is to use the http://furius.ca/snakefood/ package. The author has done all of the required work to get not only directly imported modules but it uses the AST to parse the code for runtime dependencies that a more static analysis would miss.
Worked up a command example to demonstrate:
sfood ./example.py | sfood-cluster > example.deps
That will generate a basic dependency file of each unique module. For even more detail use:
sfood -r -i ./example.py | sfood-cluster > example.deps
To walk a tree and find all imports, you can also do this in code:
Please NOTE - The AST chunks of this routine were lifted from the snakefood source which has this copyright: Copyright (C) 2001-2007 Martin Blais. All Rights Reserved.
import os
import compiler
from compiler.ast import Discard, Const
from compiler.visitor import ASTVisitor
def pyfiles(startPath):
r = []
d = os.path.abspath(startPath)
if os.path.exists(d) and os.path.isdir(d):
for root, dirs, files in os.walk(d):
for f in files:
n, ext = os.path.splitext(f)
if ext == '.py':
r.append([d, f])
return r
class ImportVisitor(object):
def __init__(self):
self.modules = []
self.recent = []
def visitImport(self, node):
self.accept_imports()
self.recent.extend((x[0], None, x[1] or x[0], node.lineno, 0)
for x in node.names)
def visitFrom(self, node):
self.accept_imports()
modname = node.modname
if modname == '__future__':
return # Ignore these.
for name, as_ in node.names:
if name == '*':
# We really don't know...
mod = (modname, None, None, node.lineno, node.level)
else:
mod = (modname, name, as_ or name, node.lineno, node.level)
self.recent.append(mod)
def default(self, node):
pragma = None
if self.recent:
if isinstance(node, Discard):
children = node.getChildren()
if len(children) == 1 and isinstance(children[0], Const):
const_node = children[0]
pragma = const_node.value
self.accept_imports(pragma)
def accept_imports(self, pragma=None):
self.modules.extend((m, r, l, n, lvl, pragma)
for (m, r, l, n, lvl) in self.recent)
self.recent = []
def finalize(self):
self.accept_imports()
return self.modules
class ImportWalker(ASTVisitor):
def __init__(self, visitor):
ASTVisitor.__init__(self)
self._visitor = visitor
def default(self, node, *args):
self._visitor.default(node)
ASTVisitor.default(self, node, *args)
def parse_python_source(fn):
contents = open(fn, 'rU').read()
ast = compiler.parse(contents)
vis = ImportVisitor()
compiler.walk(ast, vis, ImportWalker(vis))
return vis.finalize()
for d, f in pyfiles('/Users/bear/temp/foobar'):
print d, f
print parse_python_source(os.path.join(d, f))
I recently needed all the dependencies for a given python script and I took a different approach than the other answers. I only cared about top level module module names (eg, I wanted foo from import foo.bar).
This is the code using the ast module:
import ast
modules = set()
def visit_Import(node):
for name in node.names:
modules.add(name.name.split(".")[0])
def visit_ImportFrom(node):
# if node.module is missing it's a "from . import ..." statement
# if level > 0 it's a "from .submodule import ..." statement
if node.module is not None and node.level == 0:
modules.add(node.module.split(".")[0])
node_iter = ast.NodeVisitor()
node_iter.visit_Import = visit_Import
node_iter.visit_ImportFrom = visit_ImportFrom
Testing with a python file foo.py that contains:
# foo.py
import sys, os
import foo1
from foo2 import bar
from foo3 import bar as che
import foo4 as boo
import foo5.zoo
from foo6 import *
from . import foo7, foo8
from .foo12 import foo13
from foo9 import foo10, foo11
def do():
import bar1
from bar2 import foo
from bar3 import che as baz
I could get all the modules in foo.py by doing something like this:
with open("foo.py") as f:
node_iter.visit(ast.parse(f.read()))
print(modules)
which would give me this output:
set(['bar1', 'bar3', 'bar2', 'sys', 'foo9', 'foo4', 'foo5', 'foo6', 'os', 'foo1', 'foo2', 'foo3'])
You might want to try dis (pun intended):
import dis
from collections import defaultdict
from pprint import pprint
statements = """
from __future__ import (absolute_import,
division)
import os
import collections, itertools
from math import *
from gzip import open as gzip_open
from subprocess import check_output, Popen
"""
instructions = dis.get_instructions(statements)
imports = [__ for __ in instructions if 'IMPORT' in __.opname]
grouped = defaultdict(list)
for instr in imports:
grouped[instr.opname].append(instr.argval)
pprint(grouped)
outputs
defaultdict(<class 'list'>,
{'IMPORT_FROM': ['absolute_import',
'division',
'open',
'check_output',
'Popen'],
'IMPORT_NAME': ['__future__',
'os',
'collections',
'itertools',
'math',
'gzip',
'subprocess'],
'IMPORT_STAR': [None]})
Your imported modules are grouped['IMPORT_NAME'].
It depends how thorough you want to be. Used modules is a turing complete problem: some python code uses lazy importing to only import things they actually use on a particular run, some generate things to import dynamically (e.g. plugin systems).
python -v will trace import statements - its arguably the simplest thing to check.
This works - using importlib to actually import the module, and inspect to get the members :
#! /usr/bin/env python
#
# test.py
#
# Find Modules
#
import inspect, importlib as implib
if __name__ == "__main__":
mod = implib.import_module( "example" )
for i in inspect.getmembers(mod, inspect.ismodule ):
print i[0]
#! /usr/bin/env python
#
# example.py
#
import sys
from os import path
if __name__ == "__main__":
print "Hello World !!!!"
Output :
tony#laptop .../~:$ ./test.py
path
sys
I was looking for something similar and I found a gem in a package called PyScons. The Scanner does just what you want (in 7 lines), using an import_hook. Here is an abbreviated example:
import modulefinder, sys
class SingleFileModuleFinder(modulefinder.ModuleFinder):
def import_hook(self, name, caller, *arg, **kwarg):
if caller.__file__ == self.name:
# Only call the parent at the top level.
return modulefinder.ModuleFinder.import_hook(self, name, caller, *arg, **kwarg)
def __call__(self, node):
self.name = str(node)
self.run_script(self.name)
if __name__ == '__main__':
# Example entry, run with './script.py filename'
print 'looking for includes in %s' % sys.argv[1]
mf = SingleFileModuleFinder()
mf(sys.argv[1])
print '\n'.join(mf.modules.keys())
Well, you could always write a simple script that searches the file for import statements. This one finds all imported modules and files, including those imported in functions or classes:
def find_imports(toCheck):
"""
Given a filename, returns a list of modules imported by the program.
Only modules that can be imported from the current directory
will be included. This program does not run the code, so import statements
in if/else or try/except blocks will always be included.
"""
import imp
importedItems = []
with open(toCheck, 'r') as pyFile:
for line in pyFile:
# ignore comments
line = line.strip().partition("#")[0].partition("as")[0].split(' ')
if line[0] == "import":
for imported in line[1:]:
# remove commas (this doesn't check for commas if
# they're supposed to be there!
imported = imported.strip(", ")
try:
# check to see if the module can be imported
# (doesn't actually import - just finds it if it exists)
imp.find_module(imported)
# add to the list of items we imported
importedItems.append(imported)
except ImportError:
# ignore items that can't be imported
# (unless that isn't what you want?)
pass
return importedItems
toCheck = raw_input("Which file should be checked: ")
print find_imports(toCheck)
This doesn't do anything for from module import something style imports, though that could easily be added, depending on how you want to deal with those. It also doesn't do any syntax checking, so if you have some funny business like import sys gtk, os it will think you've imported all three modules even though the line is an error. It also doesn't deal with try/except type statements with regards to import - if it could be imported, this function will list it. It also doesn't deal well with multiple imports per line if you use the as keyword. The real issue here is that I'd have to write a full parser to really do this correctly. The given code works in many cases, as long as you understand there are definite corner cases.
One issue is that relative imports will fail if this script isn't in the same directory as the given file. You may want to add the directory of the given script to sys.path.
I know this is old but I was also looking for such a solution like OP did.
So I wrote this code to find imported modules by scripts in a folder.
It works with import abc and from abc import cde format. I hope it helps someone else.
import re
import os
def get_imported_modules(folder):
files = [f for f in os.listdir(folder) if f.endswith(".py")]
imports = []
for file in files:
with open(os.path.join(folder, file), mode="r") as f:
lines = f.read()
result = re.findall(r"(?<!from)import (\w+)[\n.]|from\s+(\w+)\s+import", lines)
for imp in result:
for i in imp:
if len(i):
if i not in imports:
imports.append(i)
return imports
Thanks Tony Suffolk for inspect, importlib samples ... I built this wee module and you're all welcome to use it if it helps you. Giving back, yaaaay!
import timeit
import os
import inspect, importlib as implib
import textwrap as twrap
def src_modules(filename):
assert (len(filename)>1)
mod = implib.import_module(filename.split(".")[0])
ml_alias = []
ml_actual = []
ml_together = []
ml_final = []
for i in inspect.getmembers(mod, inspect.ismodule):
ml_alias.append(i[0])
ml_actual.append((str(i[1]).split(" ")[1]))
ml_together = zip(ml_actual, ml_alias)
for t in ml_together:
(a,b) = t
ml_final.append(a+":="+b)
return ml_final
def l_to_str(itr):
assert(len(itr)>0)
itr.sort()
r_str = ""
for i in itr:
r_str += i+" "
return r_str
def src_info(filename, start_time=timeit.default_timer()):
assert (len(filename)>1)
filename_in = filename
filename = filename_in.split(".")[0]
if __name__ == filename:
output_module = filename
else:
output_module = __name__
print ("\n" + (80 * "#"))
print (" runtime ~= {0} ms".format(round(((timeit.default_timer() - start_time)*1000),3)))
print (" source file --> '{0}'".format(filename_in))
print (" output via --> '{0}'".format(output_module))
print (" modules used in '{0}':".format(filename))
print (" "+"\n ".join(twrap.wrap(l_to_str(src_modules(filename)), 75)))
print (80 * "#")
return ""
if __name__ == "__main__":
src_info(os.path.basename(__file__))
## how to use in X file:
#
# import print_src_info
# import os
#
# < ... your code ... >
#
# if __name__ == "__main__":
# print_src_info.src_info(os.path.basename(__file__))
## example output:
#
# ################################################################################
# runtime ~= 0.049 ms
# source file --> 'print_src_info.py'
# output via --> '__main__'
# modules used in 'print_src_info':
# 'importlib':=implib 'inspect':=inspect 'os':=os 'textwrap':=twrap
# 'timeit':=timeit
# ################################################################################
For the majority of scripts which only import modules at the top level, it is quite sufficient to load the file as a module, and scan its members for modules:
import sys,io,imp,types
scriptname = 'myfile.py'
with io.open(scriptname) as scriptfile:
code = compile(scriptfile.readall(),scriptname,'exec')
newmodule = imp.new_module('__main__')
exec(codeobj,newmodule.__dict__)
scriptmodules = [name for name in dir(newmodule) if isinstance(newmodule.__dict__[name],types.ModuleType)]
This simulates the module being run as a script, by setting the module's name to '__main__'. It should therefore also capture funky dynamic module loading. The only modules it won't capture are those which are imported only into local scopes.
It's actually working quite good with
print [key for key in locals().keys()
if isinstance(locals()[key], type(sys)) and not key.startswith('__')]
I understand that this post is VERY old but I have found an ideal solution.
I came up with this idea:
def find_modules(code):
modules = []
code = code.splitlines()
for item in code:
if item[:7] == "import " and ", " not in item:
if " as " in item:
modules.append(item[7:item.find(" as ")])
else:
modules.append(item[7:])
elif item[:5] == "from ":
modules.append(item[5:item.find(" import ")])
elif ", " in item:
item = item[7:].split(", ")
modules = modules+item
else:
print(item)
return modules
code = """
import foo
import bar
from baz import eggs
import mymodule as test
import hello, there, stack
"""
print(find_modules(code))
it does from, as, commas and normal import statements.
it requires no dependencies and works with other lines of code.
The above code prints:
['foo', 'bar', 'baz', 'mymodule', 'hello', 'there', 'stack']
Just put your code in the find_modules function.
I'm editing my original answer to say this. This is doable with a code snippet like the one below, but parsing the AST may be the best way to go.
def iter_imports(fd):
""" Yield only lines that appear to be imports from an iterable.
fd can be an open file, a list of lines, etc.
"""
for line in fd:
trimmed = line.strip()
if trimmed.startswith('import '):
yield trimmed
elif trimmed.startswith('from ') and ('import ' in trimmed):
yield trimmed
def main():
# File name to read.
filename = '/my/path/myfile.py'
# Safely open the file, exit on error
try:
with open(filename) as f:
# Iterate over the lines in this file, and generate a list of
# lines that appear to be imports.
import_lines = list(iter_imports(f))
except (IOError, OSError) as exIO:
print('Error opening file: {}\n{}'.format(filename, exIO))
return 1
else:
# From here, import_lines should be a list of lines like this:
# from module import thing
# import os, sys
# from module import *
# Do whatever you need to do with the import lines.
print('\n'.join(import_lines))
return 0
if __name__ == '__main__':
sys.exit(main())
Further string parsing will be needed to grab just the module names. This does not catch cases where multi-line strings or doc strings contain the words 'import ' or 'from X import '. This is why I suggested parsing the AST.

Categories