Most efficient way of cleaning up files after a test? - python

I have written some test cases which test a function I have written. The function is to simply count the number of files in a particular directory. Eventually I will have another function which will behave in a certain way depending how many files are in each directory. In this case I am working with two directories. This is my function:
dir_handler.py
from pathlib import Path
def count_files_in_dir(dirpath):
assert(dirpath.is_dir())
file_list = []
for file in dirpath.iterdir():
if file.is_file():
file_list.append(file)
return len(file_list)
And here are my test cases:
test_dir_handler.py
from imports import *
import os
from main.dir_handler import count_files_in_dir
class DirHandlerTests(unittest.TestCase):
def test_return_count_of_zero_when_no_file_exists_in_input_dir(self):
self.assertEqual(0, count_files_in_dir(INPUT_FILE_PATH))
def test_return_count_of_zero_when_no_file_exists_in_output_dir(self):
self.assertEqual(0, count_files_in_dir(OUTPUT_FILE_PATH))
def test_return_count_of_one_when_one_file_exists_in_input_dir(self):
with open(str(INPUT_FILE_PATH)+ "/"+"input.csv", "w") as file:
self.assertEqual(1, count_files_in_dir(INPUT_FILE_PATH))
def test_return_count_of_one_when_one_file_exists_in_output_dir(self):
with open(str(OUTPUT_FILE_PATH)+ "/"+"output.csv", "w") as file:
self.assertEqual(1, count_files_in_dir(OUTPUT_FILE_PATH))
def test_return_count_of_two_when_two_files_exists_in_output_dir(self):
with open(str(OUTPUT_FILE_PATH)+ "/"+"output.csv", "w") as file:
with open(str(OUTPUT_FILE_PATH)+ "/"+"output2.csv", "w") as file:
self.assertEqual(2, count_files_in_dir(OUTPUT_FILE_PATH))
#clearing up testing files at the end of test
def tearDown(self):
try:
os.remove(str(INPUT_FILE_PATH)+ "/"+"input.csv")
except FileNotFoundError as e:
pass
try:
os.remove(str(OUTPUT_FILE_PATH)+ "/"+"output.csv")
except FileNotFoundError as e:
pass
try:
os.remove(str(OUTPUT_FILE_PATH)+ "/"+"output2.csv")
except FileNotFoundError as e:
pass
if __name__ == '__main__':
unittest.main()
As you can see I am having to remove "input2.csv" and "output2.csv" individually which is not very effecient. Both INPUT_FILE_PATH and OUTPUT_FILE_PATH are under the same directory "files". All tests pass but I would like recommendations on the best way of cleaning INPUT_FILE_PATH and OUTPUT_FILE_PATH directories at the end of my tests. Thank you
EDIT:
Using #rockport's suggestion I have implemented a setUp / tearDown method. The code works as desired but is still quite messy. It clears up both output_file folder and input_file folder at the end of the test. Also I have implemented pathlib instead of os because I will be running and editing the code on mac and windows. Here are some changed to my code
def setUp(self):
self.input_file = INPUT_FILE_PATH.joinpath("input.csv")
self.output_file = OUTPUT_FILE_PATH.joinpath("output.csv")
self.output_file2 = OUTPUT_FILE_PATH.joinpath("output2.csv")
def test_return_count_of_one_when_one_file_exists_in_output_dir(self):
with self.output_file.open(mode='w') as file:
self.assertEqual(1, count_files_in_dir(OUTPUT_FILE_PATH))
def test_return_count_of_two_when_two_files_exist_in_output_dir(self):
with self.output_file.open(mode='w') as file:
with self.output_file2.open(mode='w') as file:
self.assertEqual(2, count_files_in_dir(OUTPUT_FILE_PATH))
def tearDown(self):
for file in INPUT_FILE_PATH.iterdir():
try:
file.unlink()
except FileNotFoundError as e:
pass
for file in OUTPUT_FILE_PATH.iterdir():
try:
file.unlink()
except FileNotFoundError as e:
pass

What you want is shutil.rmtree which will delete the whole directory, including any sub-directories and files in it. After that, you can recreate the directory with os.mkdir or os.makedirs. Here is an example:
import os
import shutil
shutil.rmtree(INPUT_FILE_PATH)
os.mkdir(INPUT_FILE_PATH)

If you dont want to delete the whole tree, you could simply append the paths to a list and iterate over the list to remove every path in there
pathlist = []
def test_return_count_of_one_when_one_file_exists_in_output_dir(self):
path = str(OUTPUT_FILE_PATH) + "/output.csv"
pathlist.append(path)
with open(path, "w") as file:
self.assertEqual(1, count_files_in_dir(OUTPUT_FILE_PATH))
And then:
for path in pathlist:
try:
os.remove(path)
except FileNotFoundError:
pass

Best practice is to implement a setUp method, where you create your files in a temporary folder. Then you run your actual test(s) on this folder. Finally removal is not necessary.
Unittests should not rely on the environment, like files outside the test folder. This is why we use fixtures in testing.
The simplification you are asking for could be
def tearDown(self):
for p, f in ((INPUT_FILE_PATH, "input.csv"),
(OUTPUT_FILE_PATH, "output.csv"),
(OUTPUT_FILE_PATH, "output2.csv")):
try:
os.remove(str(p) + "/" + f)
except FileNotFoundError:
pass
For your edit, why not just:
def test_return_count_of_one_when_one_file_exists_in_output_dir(self):
self.assertEqual(1, count_files_in_dir(OUTPUT_FILE_PATH))

Related

Relative paths in unittest python to run tests individually and as a whole

There are already several answered questions to this topic but actually not addressing my problem.
I'm using PyCharm 2021.2 and I want to be able to run unit tests both, individually and as a whole. This is important because if I have many tests and only some of them fail, I usually debug my code and would like to run only my failed tests to see if my debugging was successful. Only then do I want to re-run all the tests.
My setup is as follows:
So, for instance, I want to:
Right-click the folder tests and run all tests in this folder (in my example screenshot, there is only one test_MyClass.py but usually here would be many such tests).
Right-click an individual test, e.g. test_MyClass.py, and run it on its own.
Both possibilities usually work fine. However, when my single tests use some relative paths, for instance, to read some test assets (in my case from the folder containing_folder/tests/testassets), only the option 1) works. The option 2) runs into a FileNotFoundError: No such file or directory.
The code to reproduce this behavior is:
MyClass.py:
class MyClass:
_content = None
def set_content(self, content):
self._content = content
def get_content(self):
return self._content
test_MyClass.py:
import unittest
import io
from ..MyClass import MyClass
class MyClassTests(unittest.TestCase):
myClassInstance = None
#classmethod
def setUpClass(cls):
cls.myClassInstance = MyClass()
def get_file(self, use_case):
path_to_file = "testassets/" + use_case + ".txt"
with io.open(path_to_file, 'r', encoding="utf-8") as file:
file = file.read()
return file
def test_uc_file1(self):
file_content = self.get_file("uc_1")
self.myClassInstance.set_content(file_content)
self.assertEquals("test1", self.myClassInstance.get_content())
def test_uc_file2(self):
file_content = self.get_file("uc_2")
self.myClassInstance.set_content(file_content)
self.assertEquals("test2", self.myClassInstance.get_content())
It seems that path_to_file = "testassets/" + use_case + ".txt" only works as a relative path in the 1) option, but not in the 2) option.
How can I recognize programmatically, which option 1) or 2) I'm starting a test in PyCharm? And which path would then I have to choose for option 2)? I tried ../testassets, ../../testassets, ../../, , ../ but none of them worked for option 2).
Ok, I found how to accomplish what I want.
First of all, I got rid of relative paths when importing. Instead of from ..MyClass import MyClass I use simply from MyClass import MyClass.
Second, my methods setUpClass and get_file now look like this:
#classmethod
def setUpClass(cls):
cls.path = os.path.normpath(os.path.abspath(__file__)) # requires import os
if os.path.isfile(cls.path):
cls.path = os.path.dirname(cls.path)
cls.myClassInstance = MyClass()
def get_file(self, use_case):
path_to_file = self.path + "/testassets/" + use_case + ".txt"
with io.open(path_to_file, 'r', encoding="utf-8") as file:
file = file.read()
return file
The point is that os.path.abspath(__file__) returns a root path of either the directory containing_folder/tests if I choose option 1) to start all tests or the filename containing_folder/tests/test_MyClass.py if I choose option 2) to start a single test. In the if statement
if os.path.isfile(cls.path):
cls.path = os.path.dirname(cls.path)
I generalize both special cases to get the root directory of all the tests and easily find the test assets relative to them.

path does not exist after being created with tmp_path fixture

Edit: here's a git repo for easy testing:
https://gitlab.com/qualisign/ugit-bdd/
I want to refactor some repeated code from a step_def file to a conftest.py file. Here's what the step_def looks like:
#scenario('../features/CLI.feature',
'store file in object database')
def test_file_stored_by_content_address():
pass
#given("a file exists at some full path within a ugit dir", target_fixture="file_exists_at_path")
def file_exists_at_path(file_within_ugit_dir):
return file_within_ugit_dir
#when("I enter ugit hash-object followed by that path")
def file_gets_hashed(file_exists_at_path):
dir_name = os.path.dirname(file_exists_at_path)
base_name = os.path.basename(file_exists_at_path)
os.chdir(dir_name)
os.system(f'ugit hash-object {base_name}')
#then("this object is stored in a content-addressed location in the subdirectory .ugit/objects")
def object_saved_in_db(file_within_ugit_dir, file_hashed):
with open(file_hashed, "rb") as f:
contents = f.read()
with open(file_path, "rb") as hf:
assert hf.read() == f.read()
And here's the conftest.py:
import os
import subprocess
import hashlib
import pytest
from pytest_bdd import scenario, given, when, then, parsers
WISE_WORDS = "Don\\'t be a fool! I\\'ll call you later."
#pytest.fixture(scope="session")
def is_ugit_dir(tmp_path_factory):
path = tmp_path_factory.mktemp('data')
os.chdir(path)
subprocess.run(['ugit', 'init'])
return path
#pytest.fixture
def file_within_ugit_dir(is_ugit_dir):
path = is_ugit_dir
full_path = f'{path}/wise_words.txt'
os.system(f'echo {WISE_WORDS} > wise_words.txt')
return full_path
#pytest.fixture
def file_hashed(is_ugit_dir, file_within_ugit_dir):
"""
Returns the full path to a hash-object within the objects database
"""
subprocess.run(['ugit', 'hash-object', file_within_ugit_dir])
# there should now be a file with a sha1 content-address in the following directory
objects_dir = os.path.dirname(is_ugit_dir)+'/.ugit/objects/'
with open(file_within_ugit_dir, "rb") as f:
# first calculate the hash
sha_hash = hashlib.sha1 (f.read()).hexdigest ()
return objects_dir+sha_hash
When I run the test, it seems that the temporary directory is not being kept open between steps:
t-74/.ugit/objects/7b5ee3d8d42c66048125a3937a0170ffdaf7b272'
#then("this object is stored in a content-addressed location in the subdirectory .ugit/objects")
def object_saved_in_db(file_hashed):
> with open(file_hashed, "rb") as f:
E FileNotFoundError: [Errno 2] No such file or directory: '/private/var/folders/m2/99x5jvw95ll6sbtgvj5md9700000gp/T/pytest-of-davidjoseph/pytest-74/.ugit/objects/7b5ee3d8d42c66048125a3937a0170ffdaf7b272'
/Users/davidjoseph/projects/ugit-bdd/tests/step_defs/test_cli.py:43: FileNotFoundError
-------------------------------------- Captured stdout call ---------------------------------------
Initialized empty ugit repository in /private/var/folders/m2/99x5jvw95ll6sbtgvj5md9700000gp/T/pytest-of-davidjoseph/pytest-74/data1/.ugit
7b5ee3d8d42c66048125a3937a0170ffdaf7b272
Is there any way to kee this temp directory open to be reused between fixtures in the conftest.py file, and eventually in the step_def file?
Changing the scope of the is_ugit_dir fixture to "session" as suggested in the comment is sufficient; all the rest are the errors in your own code:
path = tmp_path_factory.mktemp('data')
os.chdir(path)
subprocess.run(['ugit', 'init'])
You change the current working directory to /tmp/pytest-smth/data and invoke ugit init in there - I assume the tool creates repository metadata at /tmp/pytest-smth/data/.ugit then. Later, you use
objects_dir = os.path.dirname(is_ugit_dir)+'/.ugit/objects/'
to create the objects dir - this will get you /tmp/pytest-smth/.ugit/objects. No wonder this directory doesn't exist. Changing it to e.g. objects_dir = is_ugit_dir / '.ugit' / 'objects' fixes the first error. As a follow-up, the return of file_hashed fixture has to be changed to objects_dir / sha_hash to work with pathlib paths.
contents = f.read()
with open(file_path, "rb") as hf:
assert hf.read() == f.read()
Aside that file_path is not defined (I guess this should be file_within_ugit_dir), you are reading the file into contents and then again. Why that? Either rewind the file via f.seek(0) before invoking f.read() again or use contents for comparison.
Here's the full working code, with minimal necessary changes:
conftest.py
import os
import subprocess
import hashlib
import pytest
from pytest_bdd import scenario, given, when, then, parsers
WISE_WORDS = "Don\\'t be a fool! I\\'ll call you later."
#pytest.fixture(scope="session")
def is_ugit_dir(tmp_path_factory):
path = tmp_path_factory.mktemp('data')
os.chdir(path)
subprocess.run(['ugit', 'init'])
return path
#pytest.fixture
def file_within_ugit_dir(is_ugit_dir):
path = is_ugit_dir
full_path = path / 'wise_words.txt'
os.system(f'echo {WISE_WORDS} > wise_words.txt')
return full_path
#pytest.fixture
def file_hashed(is_ugit_dir, file_within_ugit_dir):
"""
Returns the full path to a hash-object within the objects database
"""
subprocess.run(['ugit', 'hash-object', file_within_ugit_dir])
# there should now be a file with a sha1 content-address in the following directory
objects_dir = is_ugit_dir / '.ugit' / 'objects'
with open(file_within_ugit_dir, "rb") as f:
# first calculate the hash
data = b'blob\x00' + f.read() # prepend the object type
sha_hash = hashlib.sha1(data).hexdigest()
return objects_dir / sha_hash
step_def.py
import os
from pytest_bdd import scenario, given, when, then, parsers
#scenario('features/CLI.feature', 'store file in object database')
def test_file_stored_by_content_address():
pass
#given("a file exists at some full path within a ugit dir", target_fixture="file_exists_at_path")
def file_exists_at_path(file_within_ugit_dir):
return file_within_ugit_dir
#when("I enter ugit hash-object followed by that path")
def file_gets_hashed(file_exists_at_path):
dir_name = os.path.dirname(file_exists_at_path)
base_name = os.path.basename(file_exists_at_path)
os.chdir(dir_name)
os.system(f'ugit hash-object {base_name}')
#then("this object is stored in a content-addressed location in the subdirectory .ugit/objects")
def object_saved_in_db(file_within_ugit_dir, file_hashed):
with open(file_hashed, "rb") as f:
contents = f.read().strip(b"blob\x00")
with open(file_within_ugit_dir, "rb") as hf:
assert hf.read() == contents
I would say that you have logic issue in your code.
According to the test scenario the fixture file_hashed must return a path to the existing file containing hash. One can see it here:
#then("this object is stored in a content-addressed location in the subdirectory .ugit/objects")
def object_saved_in_db(file_within_ugit_dir, file_hashed):
with open(file_hashed, "rb") as f:
contents = f.read()
with open(file_path, "rb") as hf:
assert hf.read() == f.read()
In the conftest.py you are not creating the file containing hash. You are creating a dummy link instead, and because there is nothing on that link, you get FileNotFoundError. Error is here (you code does not create a hash file):
#pytest.fixture
def file_hashed(is_ugit_dir, file_within_ugit_dir):
objects_dir = os.path.dirname(is_ugit_dir)+'/.ugit/objects/'
with open(file_within_ugit_dir, "rb") as f:
# first calculate the hash
sha_hash = hashlib.sha1 (f.read()).hexdigest ()
return objects_dir+sha_hash

How to create directories and sub directories efficiently and elegantly in Python 2.7?

I am trying to create a bunch of directories and sub directories at a specific location in my PC. My process is something like this:
Check if there's any directory with the same directory name. Skip if so.
If not, create the directory and the pre-defined sub directories under that directory.
This is the code I came up with using os module:
def Test():
main_dir = ["FolderA", "FolderB"]
common_dir = ["SubFolder1", "SubFolder2", "SubFolder3"]
for dir1 in main_dir:
if not os.path.isdir(dir1):
for dir2 in common_dir:
os.makedirs("%s/%s" %(dir1,dir2))
I am wondering if there's any better way to do this very same task (probably shorter, more efficient and more pythonic)?
Python follows the philosophy
It is better to ask for forgiveness than to ask for permission.
So rather than checking isdir, you would simply catch the exception thrown if the leaf directory already exists:
def Test():
main_dir = ["FolderA", "FolderB"]
common_dir = ["SubFolder1", "SubFolder2", "SubFolder3"]
for dir1 in main_dir:
for dir2 in common_dir:
try: os.makedirs(os.path.join(dir1,dir2))
except OSError: pass
You can also replace string interpolation "%s/%s" %(dir1,dir2) with os.path.join(dir1, dir2)
Another more succinct way is to do the cartesian product instead of using two nested for-loops:
for dir1, dir2 in itertools.product(main_dir, common_dir):
try: os.makedirs(os.path.join(dir1,dir2))
except OSError: pass
How about:
import os
from itertools import starmap
def Test():
main_dir = ["FolderA", "FolderB"]
common_dir = ["SubFolder1", "SubFolder2", "SubFolder3"]
map(os.makedirs, starmap(os.path.join, zip(main_dir, common_dir)))
And if we're worried about os.makedirs() throwing errors:
import os
from itertools import starmap
def safe_makedirs(*args):
try:
return os.makedirs(*args)
except:
pass # Ignore errors; for example if the paths already exist!
def Test():
main_dir = ["FolderA", "FolderB"]
common_dir = ["SubFolder1", "SubFolder2", "SubFolder3"]
map(safe_makedirs, starmap(os.path.join, zip(main_dir, common_dir)))

Accessing Dynamically-Named Directory in Python

I'm currently putting together a script in Python which will do the following:-
Create a directory in my Dropbox folder called 'Spartacus'
Create a subdirectory in this location with the naming convention of the date and time of creation
Within this directory, create a file called iprecord.txt and information will then be written to this file.
Here is my code thusfar using Python v2.7 on Windows 7:-
import os
import time
import platform
import urllib
current_dir = os.getcwd()
targetname = "Spartacus"
target_dir = os.path.join(current_dir, targetname)
timenow = time.strftime("\%d-%b-%Y %H-%M-%S")
def directoryVerification():
os.chdir(current_dir)
try:
os.mkdir('Spartacus')
except OSError:
pass
try:
os.system('attrib +h Spartacus')
except OSError:
pass
def gatherEvidence():
os.chdir(target_dir)
try:
evidential_dir = os.mkdir(target_dir + timenow)
os.chdir(evidential_dir)
except OSError:
pass
f = iprecord.txt
with f as open:
ip_addr = urllib.urlopen('http://www.biranchi.com/ip.php').read()
f.write("IP Address:\t %s\t %s" % ip_addr, time.strftime("\%d-%b-%Y %H-%M-%S"))
x = directoryVerification()
y = gatherEvidence()
I keep on getting an error in line 26 whereby it cannot resolve the full path to the dynamically named directory (date and time) one. I've printed out the value of 'evidential_dir' and it shows as being Null.
Any pointers as to where I am going wrong? Thanks
PS: Any other advice on my code to improve it would be appreciated
PPS: Any advice on how to locate the default directory for 'Dropbox'? Is there a way of scanning a file system for a directory called 'Dropbox' and capturing the path?
os.mkdir() does not return a pathname as you might be thinking. It seems like you do inconsistent methods of the same thing at different spots of your code.
Try this:
evidential_dir = os.path.join(target_dir, timenow)
os.mkdir(evidential_dir)
And fix your other line:
f = "iprecord.txt"
os.mkdir doesn't return anything.
evidential_dir = target_dir + timenow
try:
os.mkdir(evidential_dir)
except OSError:
pass
os.chdir(evidential_dir)

shutil moving files keeping the same directory structure

I want to move a lot of files. the path to these files is stored in a list. I want to keep the whole directory structure but want to move them to a different folder.
So for example the files are
D:\test\test1\test1.txt
D:\test\test1\test2.txt
I want to move them to C:\ from D:\ and keep the directory structure. How should I go about doing it?
this is the code i have, it is not working
import os, fnmatch
import shutil
f=open('test_logs.txt','r') #logs where filenames are stored with filenames as first entry
for line in f:
filename=line.split()
output_file="C:" + filename[0].lstrip("D:")
shutil.move(filename[0],output_file)
I read the file name fine and I can generate the destination filename fine but when I run it, it gives me an error saying "No such file or directory" (and gives the path of the output filename).
I think you want something like this:
import sys
import os
import shutil
# terminology:
# path = full path to a file, i.e. directory + file name
# directory = directory, possibly starting with a drive
# file name = the last component of the path
sourcedrive = 'D:'
destdrive = 'C:'
log_list_file = open('test_logs.txt', 'r')
for line in log_list_file:
sourcepath = line.split()[0] # XXX is this correct?
if sourcepath.startswith(sourcedrive):
destpath = sourcepath.replace(sourcedrive, destdrive, 1)
else:
print >>sys.stderr, 'Skipping %s: Not on %s' % (sourcepath, sourcedrive)
continue
destdir = os.path.dirname(destpath)
if not os.path.isdir(destdir):
try:
os.makedirs(destdir)
except (OSError, IOError, Error) as e:
print >>sys.stderr, 'Error making %s: %s' % (destdir, e)
continue
try:
shutil.move(sourcepath, destpath)
except (OSError, IOError, Error) as e:
print >>sys.stderr, 'Error moving %s to %s: %s' % (sourcepath, destpath, e)
Do you also want to remove the source directory if it's empty?
Update: Ah, ok, I see the problem -- shutil.move won't copy to a nonexistent directory. To do what you're trying to do, you have to create the new directory tree first. Since it's a bit safer to use a built-in move function than to roll your own copy-and-delete procedure, you could do this:
with open('test_logs.txt','r') as f:
files_to_copy = [line.split()[0] for line in f]
paths_to_copy = set(os.path.split(filename)[0] for filename in files_to_copy)
def ignore_files(path, names, ptc=paths_to_copy):
return [name for name in names if os.path.join(path, name) not in ptc]
shutil.copytree(src, dst, ignore=ignore_files)
for filename in files_to_copy:
output_file="C:" + filename.lstrip("D:")
shutil.move(filename, output_file)
Let me know if that doesn't work
Original Post: If you want to move only some of the files, your best bet is to use shutil.copytree's ignore keyword. Assuming your list of files includes full paths and directories (i.e. ['D:\test\test1\test1.txt', 'D:\test\test1\test2.txt', 'D:\test\test1']), create an ignore_files function and use it like this:
files_to_copy = ['D:\test\test1\test1.txt', 'D:\test\test1\test2.txt', 'D:\test\test1']
def ignore_files(path, names, ftc=files_to_copy):
return [name for name in names if os.path.join(path, name) not in ftc]
shutil.copytree(src, dst, ignore=ignore_files)
Then you can just delete the files in files_to_copy:
for f in files_to_copy:
try:
os.remove(f)
except OSError: # can't remove() a directory, so pass
pass
I tested this -- make certain that you include the paths you want to copy as well as the files in files_to_copy -- otherwise, this will delete files without copying them.

Categories