path does not exist after being created with tmp_path fixture - python

Edit: here's a git repo for easy testing:
https://gitlab.com/qualisign/ugit-bdd/
I want to refactor some repeated code from a step_def file to a conftest.py file. Here's what the step_def looks like:
#scenario('../features/CLI.feature',
'store file in object database')
def test_file_stored_by_content_address():
pass
#given("a file exists at some full path within a ugit dir", target_fixture="file_exists_at_path")
def file_exists_at_path(file_within_ugit_dir):
return file_within_ugit_dir
#when("I enter ugit hash-object followed by that path")
def file_gets_hashed(file_exists_at_path):
dir_name = os.path.dirname(file_exists_at_path)
base_name = os.path.basename(file_exists_at_path)
os.chdir(dir_name)
os.system(f'ugit hash-object {base_name}')
#then("this object is stored in a content-addressed location in the subdirectory .ugit/objects")
def object_saved_in_db(file_within_ugit_dir, file_hashed):
with open(file_hashed, "rb") as f:
contents = f.read()
with open(file_path, "rb") as hf:
assert hf.read() == f.read()
And here's the conftest.py:
import os
import subprocess
import hashlib
import pytest
from pytest_bdd import scenario, given, when, then, parsers
WISE_WORDS = "Don\\'t be a fool! I\\'ll call you later."
#pytest.fixture(scope="session")
def is_ugit_dir(tmp_path_factory):
path = tmp_path_factory.mktemp('data')
os.chdir(path)
subprocess.run(['ugit', 'init'])
return path
#pytest.fixture
def file_within_ugit_dir(is_ugit_dir):
path = is_ugit_dir
full_path = f'{path}/wise_words.txt'
os.system(f'echo {WISE_WORDS} > wise_words.txt')
return full_path
#pytest.fixture
def file_hashed(is_ugit_dir, file_within_ugit_dir):
"""
Returns the full path to a hash-object within the objects database
"""
subprocess.run(['ugit', 'hash-object', file_within_ugit_dir])
# there should now be a file with a sha1 content-address in the following directory
objects_dir = os.path.dirname(is_ugit_dir)+'/.ugit/objects/'
with open(file_within_ugit_dir, "rb") as f:
# first calculate the hash
sha_hash = hashlib.sha1 (f.read()).hexdigest ()
return objects_dir+sha_hash
When I run the test, it seems that the temporary directory is not being kept open between steps:
t-74/.ugit/objects/7b5ee3d8d42c66048125a3937a0170ffdaf7b272'
#then("this object is stored in a content-addressed location in the subdirectory .ugit/objects")
def object_saved_in_db(file_hashed):
> with open(file_hashed, "rb") as f:
E FileNotFoundError: [Errno 2] No such file or directory: '/private/var/folders/m2/99x5jvw95ll6sbtgvj5md9700000gp/T/pytest-of-davidjoseph/pytest-74/.ugit/objects/7b5ee3d8d42c66048125a3937a0170ffdaf7b272'
/Users/davidjoseph/projects/ugit-bdd/tests/step_defs/test_cli.py:43: FileNotFoundError
-------------------------------------- Captured stdout call ---------------------------------------
Initialized empty ugit repository in /private/var/folders/m2/99x5jvw95ll6sbtgvj5md9700000gp/T/pytest-of-davidjoseph/pytest-74/data1/.ugit
7b5ee3d8d42c66048125a3937a0170ffdaf7b272
Is there any way to kee this temp directory open to be reused between fixtures in the conftest.py file, and eventually in the step_def file?

Changing the scope of the is_ugit_dir fixture to "session" as suggested in the comment is sufficient; all the rest are the errors in your own code:
path = tmp_path_factory.mktemp('data')
os.chdir(path)
subprocess.run(['ugit', 'init'])
You change the current working directory to /tmp/pytest-smth/data and invoke ugit init in there - I assume the tool creates repository metadata at /tmp/pytest-smth/data/.ugit then. Later, you use
objects_dir = os.path.dirname(is_ugit_dir)+'/.ugit/objects/'
to create the objects dir - this will get you /tmp/pytest-smth/.ugit/objects. No wonder this directory doesn't exist. Changing it to e.g. objects_dir = is_ugit_dir / '.ugit' / 'objects' fixes the first error. As a follow-up, the return of file_hashed fixture has to be changed to objects_dir / sha_hash to work with pathlib paths.
contents = f.read()
with open(file_path, "rb") as hf:
assert hf.read() == f.read()
Aside that file_path is not defined (I guess this should be file_within_ugit_dir), you are reading the file into contents and then again. Why that? Either rewind the file via f.seek(0) before invoking f.read() again or use contents for comparison.
Here's the full working code, with minimal necessary changes:
conftest.py
import os
import subprocess
import hashlib
import pytest
from pytest_bdd import scenario, given, when, then, parsers
WISE_WORDS = "Don\\'t be a fool! I\\'ll call you later."
#pytest.fixture(scope="session")
def is_ugit_dir(tmp_path_factory):
path = tmp_path_factory.mktemp('data')
os.chdir(path)
subprocess.run(['ugit', 'init'])
return path
#pytest.fixture
def file_within_ugit_dir(is_ugit_dir):
path = is_ugit_dir
full_path = path / 'wise_words.txt'
os.system(f'echo {WISE_WORDS} > wise_words.txt')
return full_path
#pytest.fixture
def file_hashed(is_ugit_dir, file_within_ugit_dir):
"""
Returns the full path to a hash-object within the objects database
"""
subprocess.run(['ugit', 'hash-object', file_within_ugit_dir])
# there should now be a file with a sha1 content-address in the following directory
objects_dir = is_ugit_dir / '.ugit' / 'objects'
with open(file_within_ugit_dir, "rb") as f:
# first calculate the hash
data = b'blob\x00' + f.read() # prepend the object type
sha_hash = hashlib.sha1(data).hexdigest()
return objects_dir / sha_hash
step_def.py
import os
from pytest_bdd import scenario, given, when, then, parsers
#scenario('features/CLI.feature', 'store file in object database')
def test_file_stored_by_content_address():
pass
#given("a file exists at some full path within a ugit dir", target_fixture="file_exists_at_path")
def file_exists_at_path(file_within_ugit_dir):
return file_within_ugit_dir
#when("I enter ugit hash-object followed by that path")
def file_gets_hashed(file_exists_at_path):
dir_name = os.path.dirname(file_exists_at_path)
base_name = os.path.basename(file_exists_at_path)
os.chdir(dir_name)
os.system(f'ugit hash-object {base_name}')
#then("this object is stored in a content-addressed location in the subdirectory .ugit/objects")
def object_saved_in_db(file_within_ugit_dir, file_hashed):
with open(file_hashed, "rb") as f:
contents = f.read().strip(b"blob\x00")
with open(file_within_ugit_dir, "rb") as hf:
assert hf.read() == contents

I would say that you have logic issue in your code.
According to the test scenario the fixture file_hashed must return a path to the existing file containing hash. One can see it here:
#then("this object is stored in a content-addressed location in the subdirectory .ugit/objects")
def object_saved_in_db(file_within_ugit_dir, file_hashed):
with open(file_hashed, "rb") as f:
contents = f.read()
with open(file_path, "rb") as hf:
assert hf.read() == f.read()
In the conftest.py you are not creating the file containing hash. You are creating a dummy link instead, and because there is nothing on that link, you get FileNotFoundError. Error is here (you code does not create a hash file):
#pytest.fixture
def file_hashed(is_ugit_dir, file_within_ugit_dir):
objects_dir = os.path.dirname(is_ugit_dir)+'/.ugit/objects/'
with open(file_within_ugit_dir, "rb") as f:
# first calculate the hash
sha_hash = hashlib.sha1 (f.read()).hexdigest ()
return objects_dir+sha_hash

Related

Most efficient way of cleaning up files after a test?

I have written some test cases which test a function I have written. The function is to simply count the number of files in a particular directory. Eventually I will have another function which will behave in a certain way depending how many files are in each directory. In this case I am working with two directories. This is my function:
dir_handler.py
from pathlib import Path
def count_files_in_dir(dirpath):
assert(dirpath.is_dir())
file_list = []
for file in dirpath.iterdir():
if file.is_file():
file_list.append(file)
return len(file_list)
And here are my test cases:
test_dir_handler.py
from imports import *
import os
from main.dir_handler import count_files_in_dir
class DirHandlerTests(unittest.TestCase):
def test_return_count_of_zero_when_no_file_exists_in_input_dir(self):
self.assertEqual(0, count_files_in_dir(INPUT_FILE_PATH))
def test_return_count_of_zero_when_no_file_exists_in_output_dir(self):
self.assertEqual(0, count_files_in_dir(OUTPUT_FILE_PATH))
def test_return_count_of_one_when_one_file_exists_in_input_dir(self):
with open(str(INPUT_FILE_PATH)+ "/"+"input.csv", "w") as file:
self.assertEqual(1, count_files_in_dir(INPUT_FILE_PATH))
def test_return_count_of_one_when_one_file_exists_in_output_dir(self):
with open(str(OUTPUT_FILE_PATH)+ "/"+"output.csv", "w") as file:
self.assertEqual(1, count_files_in_dir(OUTPUT_FILE_PATH))
def test_return_count_of_two_when_two_files_exists_in_output_dir(self):
with open(str(OUTPUT_FILE_PATH)+ "/"+"output.csv", "w") as file:
with open(str(OUTPUT_FILE_PATH)+ "/"+"output2.csv", "w") as file:
self.assertEqual(2, count_files_in_dir(OUTPUT_FILE_PATH))
#clearing up testing files at the end of test
def tearDown(self):
try:
os.remove(str(INPUT_FILE_PATH)+ "/"+"input.csv")
except FileNotFoundError as e:
pass
try:
os.remove(str(OUTPUT_FILE_PATH)+ "/"+"output.csv")
except FileNotFoundError as e:
pass
try:
os.remove(str(OUTPUT_FILE_PATH)+ "/"+"output2.csv")
except FileNotFoundError as e:
pass
if __name__ == '__main__':
unittest.main()
As you can see I am having to remove "input2.csv" and "output2.csv" individually which is not very effecient. Both INPUT_FILE_PATH and OUTPUT_FILE_PATH are under the same directory "files". All tests pass but I would like recommendations on the best way of cleaning INPUT_FILE_PATH and OUTPUT_FILE_PATH directories at the end of my tests. Thank you
EDIT:
Using #rockport's suggestion I have implemented a setUp / tearDown method. The code works as desired but is still quite messy. It clears up both output_file folder and input_file folder at the end of the test. Also I have implemented pathlib instead of os because I will be running and editing the code on mac and windows. Here are some changed to my code
def setUp(self):
self.input_file = INPUT_FILE_PATH.joinpath("input.csv")
self.output_file = OUTPUT_FILE_PATH.joinpath("output.csv")
self.output_file2 = OUTPUT_FILE_PATH.joinpath("output2.csv")
def test_return_count_of_one_when_one_file_exists_in_output_dir(self):
with self.output_file.open(mode='w') as file:
self.assertEqual(1, count_files_in_dir(OUTPUT_FILE_PATH))
def test_return_count_of_two_when_two_files_exist_in_output_dir(self):
with self.output_file.open(mode='w') as file:
with self.output_file2.open(mode='w') as file:
self.assertEqual(2, count_files_in_dir(OUTPUT_FILE_PATH))
def tearDown(self):
for file in INPUT_FILE_PATH.iterdir():
try:
file.unlink()
except FileNotFoundError as e:
pass
for file in OUTPUT_FILE_PATH.iterdir():
try:
file.unlink()
except FileNotFoundError as e:
pass
What you want is shutil.rmtree which will delete the whole directory, including any sub-directories and files in it. After that, you can recreate the directory with os.mkdir or os.makedirs. Here is an example:
import os
import shutil
shutil.rmtree(INPUT_FILE_PATH)
os.mkdir(INPUT_FILE_PATH)
If you dont want to delete the whole tree, you could simply append the paths to a list and iterate over the list to remove every path in there
pathlist = []
def test_return_count_of_one_when_one_file_exists_in_output_dir(self):
path = str(OUTPUT_FILE_PATH) + "/output.csv"
pathlist.append(path)
with open(path, "w") as file:
self.assertEqual(1, count_files_in_dir(OUTPUT_FILE_PATH))
And then:
for path in pathlist:
try:
os.remove(path)
except FileNotFoundError:
pass
Best practice is to implement a setUp method, where you create your files in a temporary folder. Then you run your actual test(s) on this folder. Finally removal is not necessary.
Unittests should not rely on the environment, like files outside the test folder. This is why we use fixtures in testing.
The simplification you are asking for could be
def tearDown(self):
for p, f in ((INPUT_FILE_PATH, "input.csv"),
(OUTPUT_FILE_PATH, "output.csv"),
(OUTPUT_FILE_PATH, "output2.csv")):
try:
os.remove(str(p) + "/" + f)
except FileNotFoundError:
pass
For your edit, why not just:
def test_return_count_of_one_when_one_file_exists_in_output_dir(self):
self.assertEqual(1, count_files_in_dir(OUTPUT_FILE_PATH))

Cant change string value to variable with string value

I upload file to dropbox api, but it post on dropbox all directories from my computer since root folder. I mean you have folder of your project inside folder home, than user until you go to file sours folder. If I cut that structure library can't see that it is file, not string and give mistake message.
My code is:
def upload_file(project_id, filename, dropbox_token):
dbx = dropbox.Dropbox(dropbox_token)
file_path = os.path.abspath(filename)
with open(filename, "rb") as f:
dbx.files_upload(f.read(), file_path, mute=True)
link = dbx.files_get_temporary_link(path=file_path).link
return link
It works, but I need something like:
file_path = os.path.abspath(filename)
chunks = file_path.split("/")
name, dir = chunks[-1], chunks[-2]
which gives me mistake like:
dropbox.exceptions.ApiError: ApiError('433249b1617c031b29c3a7f4f3bf3847', GetTemporaryLinkError('path', LookupError('not_found', None)))
How could I make only parent folder and filename in the path?
For example if I have
/home/user/project/file.txt
I need
/project/file.txt
you have /home/user/project/file.txt and you need /project/file.txt
I would split according to os default separator (so it would work with windows paths as well), then reformat only the 2 last parts with the proper format (sep+path) and join that.
import os
#os.sep = "/" # if you want to test that on Windows
s = "/home/user/project/file.txt"
path_end = "".join(["{}{}".format(os.sep,x) for x in s.split(os.sep)[-2:]])
result:
/project/file.txt
I assume the following code should works:
def upload_file(project_id, filename, dropbox_token):
dbx = dropbox.Dropbox(dropbox_token)
abs_path = os.path.abspath(filename)
directory, file = os.path.split(abs_path)
_, directory = os.path.split(directory)
dropbox_path = os.path.join(directory, file)
with open(abs_path, "rb") as f:
dbx.files_upload(f.read(), dropbox_path, mute=True)
link = dbx.files_get_temporary_link(path=dropbox_path).link
return link

File Open Fails When W+ Argument Used

I have...
datadir = os.path.dirname(__file__) + '/../some/place'
session_file = '/user_session.json'
with open(datadir + session_file, 'r') as data_file:
data = json.load(data_file)
print data
And this works as expected. I can load the json in my json file and access it.
I want to use the w+ argument so that if the file does not exist it gets created (albeit blank).
Except when I use w+ the load fails with the error below and the file is overwritten with a blank one.
ValueError('No JSON object could be decoded',)
How can I create the file if it's not there, but read it if it is, without failing like this?
You want to check if the file exists and react accordingly:
import json
import os.path
datadir = os.path.dirname(__file__)
session_file = 'user_session.json'
path = os.path.join(datadir, '..', 'some', 'place', session_file)
# If the file exists, read the data.
if os.path.exists(path):
with open(path, 'r') as f:
data = json.load(f)
print data
else:
with open(path, 'w+') as f:
# Initialize the session file as you see fit.
# you can't use json.load(f) here as the file was just created,
# and so it would not decode into JSON, thus raising the same error
# you are running into.
Note the use of os.path.join here; this is a better way to construct file paths rather than concatenating strings. Essentially, using os.path.join ensures that the file path will still contain valid slashes regardless of your operating system.
try to test whether the file is there
import os.path
import os
datadir = os.path.dirname(__file__) + '/../some/place'
session_file = '/user_session.json'
path = datadir + session_file
if os.path.exists(path ):
open(path, 'w+').close()
with open( path , 'r') as data_file:
data = json.load(data_file)
print data

How do I zip a file in Python without adding folders?

Currently I use the following code:
import zipfile
root_path = 'C:/data/'
def zipping_sql_database():
zf = zipfile.ZipFile(root_path + 'file.zip', mode='w')
try:
zf.write(root_path + "file.db")
finally:
zf.close()
At the moment it creates a zip-file but the zip files contains the whole folder named 'data' that then contains the 'file.db'. How can I create a zip-file only contains 'file.db' and not the file in a folder?
I found that I get the right behavior via:
import zipfile
root_path = 'C:/data/'
def zipping_sql_database():
zf = zipfile.ZipFile(root_path + 'file.zip', mode='w')
try:
zf.write(root_path + "file.db", "file.db")
finally:
zf.close()

IPython Search PYTHONPATH for file

Is there any way to make it so that IPython searches through your system path to find a file if only the name of the file is given?
I have a file foo_060112.dat that lives in a folder containing a lot of data files that are uniquely named in a folder in my path. I want to be able to simply call a load or open function on this file without specifying the full path---is it possible?
You can write your own function to search through the path and return an open file object according to the mode you select:
import sys
import os
def my_open(filename, mode):
for path in sys.path:
try:
return open(os.path.join(path, filename), mode)
except IOError:
continue
return None
Example: my_open('foo_060112.dat', 'rb')
I thinks this may help you:
import os,sys
path = os.sytem('locate foo_060112.dat')
print path
or you can also use :
_SOURCE_FILE_PATH = '/home/admin/'
_each_folder = os.walk("%s/" %(_SOURCE_FILE_PATH))
for x in _each_folder:
print x
if x[1] == []:
for y in x[2]:
if y == 'locate foo_060112.dat'
f = open(y,'r')
data = f.read()

Categories