Same tests over many similar data files

Same tests over many similar data files - python

With python and unittest I have this structure of test directory:
tests/
__init__.py
test_001.py
data/
data_001_in.py
data_001_out.py
where
data_001_in.py : the input data to use in the functions to test
data_001_out.py : the output data expected from the function to test
I have the inputs and outputs in python dictionaries because it is easier for me than using json, sqlite, etc.
I try use a set of input/output data with the same format and apply the test over each pair of data:
tests/
__init__.py
test_001.py
data/
data_001_in.py
data_001_out.py
data_002_in.py
data_002_out.py
data_003_in.py
data_003_out.py
Is there any package/approach to make this task more easier?

inspirated in the question nose, unittest.TestCase and metaclass: auto-generated test_* methods not discovered, I solved with a metaclass. First, I change the directory data structure to
├── data
│   ├── __init__.py
│   ├── data001
│   │   ├── __init__.py
│   │   ├── datain.py
│   │   ├── dataout.py
│   └── data002
│   ├── __init__.py
│   ├── datain.py
│   ├── dataout.py
└── metatest.py
Second, I make a metaclass for create new test with the data in the subdirectories and base tests.
import unittest
import os
import copy
def data_dir():
return os.path.join(os.path.dirname(__file__), 'data')
def get_subdirs(dir_name):
""" retorna subdirectorios con path completo"""
subdirs = []
for f in os.listdir(dir_name):
f_path = os.path.join(dir_name, f)
if os.path.isdir(f_path):
subdirs.append(f)
return subdirs
def get_data_subdirs():
return get_subdirs(data_dir())
def data_py_load(file_name):
""" carga diccionario data desde archivo .py """
name = file_name.split('.py')[0]
path_name = 'data.' + name
exec_str = "from {} import *".format(path_name)
exec(exec_str)
return data
class TestDirectories(type):
def __new__(cls, name, bases, attrs):
subdirs = get_data_subdirs()
callables = dict([
(meth_name, meth) for (meth_name, meth) in attrs.items() if
meth_name.startswith('_test')
])
data = {}
for d in subdirs:
data[d] = {}
data[d]['name'] = d
out_path = "{}.dataout.py".format(d)
data[d]['out'] = data_py_load(out_path)
var_path = "{}.datain.py".format(d)
data[d]['in'] = data_py_load(var_path)
for meth_name, meth in callables.items():
for d in subdirs:
new_meth_name = meth_name[1:]
# name of test to add, _test to test
test_name = "{}_{}".format(new_meth_name, d)
# deep copy for dictionaries
testeable = lambda self, func=meth, args=copy.deepcopy(data[d]): func(self, args)
attrs[test_name] = testeable
return type.__new__(cls, name, bases, attrs)
class TestData(unittest.TestCase):
__metaclass__ = TestDirectories
def _test_name(self, data):
in_name = data['in']['name']
out_name = data['out']['name']
print in_name, out_name
self.assertEquals(in_name, out_name)
if __name__ == '__main__':
unittest.main(verbosity=2)
And, when I run
$ python metatest.py
test_name_data001 (__main__.TestData) ... Alice Alice
ok
test_name_data002 (__main__.TestData) ... Bob Bob
ok
----------------------------------------------------------------------
Ran 2 tests in 0.001s
OK

Related

pytest how to mock functions from another module

I have a directory structure:
├── src
│ └── chkfixt
│ ├── __init__.py
│ ├── config.py
│ ├── main.py
│ └── util.py
└── tests
└── test_chkfixt.py
Files have contents as:
config.py
APP_NAME = 'not_mocked'
util.py
from pathlib import Path
from chkfixt.config import APP_NAME
def get_app_dir() -> str:
return Path(APP_NAME)
def get_metadata_pickle_file_path() -> Path:
app_dir = get_app_dir()
config_path = Path(app_dir) / "metadata.pkl"
return config_path
def save_metadata_to_pickle_file():
pickle_file = get_metadata_pickle_file_path()
print(f"saved to {pickle_file}")
main.py
from chkfixt.util import (get_metadata_pickle_file_path,
save_metadata_to_pickle_file)
print(f"pkl_file = {get_metadata_pickle_file_path()}")
save_metadata_to_pickle_file()
If I run main.py output is like this:
(deleteme) user#server:~/tmp/chkfixt$ python src/chkfixt/main.py
pkl_file = not_mocked/metadata.pkl
saved to not_mocked/metadata.pkl
In pytest, I need a different pkl_file for each test, so I have to mock get_metadata_pickle_file_path() function.
My first attempt in test_chkfixt.py was (ignore monkeypatch repetition for now) like this:
import pytest
from rich import print
from chkfixt.main import (get_metadata_pickle_file_path,
save_metadata_to_pickle_file)
#pytest.fixture
def mocked_pkl_file(tmp_path):
return tmp_path / "metadata.pkl"
def test_mocked_get_pkl(mocked_pkl_file, monkeypatch):
monkeypatch.setattr(
"chkfixt.main",
"get_metadata_pickle_file_path",
lambda: str(mocked_pkl_file),
)
print(get_metadata_pickle_file_path())
def test_mocked_save_pkl(mocked_pkl_file, monkeypatch):
monkeypatch.setattr(
"chkfixt.main",
"get_metadata_pickle_file_path",
lambda: str(mocked_pkl_file),
)
print(save_metadata_to_pickle_file())
But that produced errors:
_______________________________________________________________________________________________________________________________ test_mocked_get_pkl ________________________________________________________________________________________________________________________________
mocked_pkl_file = PosixPath('/tmp/pytest-of-user/pytest-89/test_mocked_get_pkl0/metadata.pkl'), monkeypatch = <_pytest.monkeypatch.MonkeyPatch object at 0x7ff900732ec0>
def test_mocked_get_pkl(mocked_pkl_file, monkeypatch):
> monkeypatch.setattr(
"chkfixt.main",
"get_metadata_pickle_file_path",
lambda: str(mocked_pkl_file),
)
E AttributeError: 'chkfixt.main' has no attribute 'get_metadata_pickle_file_path'
tests/test_chkfixt.py:14: AttributeError
_______________________________________________________________________________________________________________________________ test_mocked_save_pkl _______________________________________________________________________________________________________________________________
mocked_pkl_file = PosixPath('/tmp/pytest-of-user/pytest-89/test_mocked_save_pkl0/metadata.pkl'), monkeypatch = <_pytest.monkeypatch.MonkeyPatch object at 0x7ff9007b5db0>
def test_mocked_save_pkl(mocked_pkl_file, monkeypatch):
> monkeypatch.setattr(
"chkfixt.main",
"get_metadata_pickle_file_path",
lambda: str(mocked_pkl_file),
)
E AttributeError: 'chkfixt.main' has no attribute 'get_metadata_pickle_file_path'
tests/test_chkfixt.py:23: AttributeError
Here, I am confused with error that is saying 'chkfixt.main' has no attribute 'get_metadata_pickle_file_path'.
If I change test functions like this (merged "chkfixt.main", "get_metadata_pickle_file_path" to "chkfixt.main.get_metadata_pickle_file_path"):
def test_mocked_get_pkl(mocked_pkl_file, monkeypatch):
monkeypatch.setattr(
"chkfixt.main.get_metadata_pickle_file_path",
lambda: str(mocked_pkl_file),
)
print(get_metadata_pickle_file_path())
def test_mocked_save_pkl(mocked_pkl_file, monkeypatch):
monkeypatch.setattr(
"chkfixt.main.get_metadata_pickle_file_path",
lambda: str(mocked_pkl_file),
)
print(save_metadata_to_pickle_file())
I am not getting errors anymore, but test output is still not_mocked/metadata.pkl:
platform linux -- Python 3.10.4, pytest-7.1.2, pluggy-1.0.0
rootdir: /home/user/tmp/chkfixt
plugins: forked-1.4.0, cov-3.0.0, black-0.3.12, mypy-0.9.1, xdist-2.5.0, flake8-1.1.1
collecting ... pkl_file = not_mocked/metadata.pkl
saved to not_mocked/metadata.pkl
collected 2 items
tests/test_chkfixt.py not_mocked/metadata.pkl
.saved to not_mocked/metadata.pkl
None
.
As output shows, both paths are not_mocked/metadata.pkl, but I expect them to be from mocked_pkl_file fixture.
How to mock main.py get_metadata_pickle_file_path and save_metadata_to_pickle_file functions to use path from mocked_pkl_file fixture.

You should be patching the imported objects, not strings. Also, import the module, don't use from .. import ... statements, so that pytest is able to access the module it is patching:
import chkfixt.main
def test_mocked_save_pkl(mocked_pkl_file, monkeypatch):
monkeypatch.setattr(
chkfixt.main,
"get_metadata_pickle_file_path",
lambda: str(mocked_pkl_file),
)
See the how to monkey patch docs for more examples.

Build a tree from a list of objects

I am trying to build a tree from a list of objects, which are characterised by several properties.
Each object can have 3 properties that play a role in building a tree, that is velocity altitude exposure.
#Data structure
class Test():
pass
tests = []
for el in data:
test = Test()
test.velocity = el[0] #100km/h or 120km/h or 140km/h
test.altitude = el[1] #20m or 40m or 60m
test.exposure = el[2] #10uSv or 20uSv or 30uSv
#more data is assigned which is not shown as irrelevant
tests.append(test)
# I am trying to build a data tree like the one below.
# Obviously it would be different than this one and depend on actual data input.
# Example built statically using anytree
Tests
├── 100km/h
│ ├── 20m
│ │ └── 10uSv
│ ├── 40m
│ │ └── 10uSv
│ └── 60m
│ └── 20uSv
├── 120km/h
│ ├── 40m
│ │ ├── 20uSv
│ │ └── 30uSv
│ └── 60m
│ ├── 20uSv
│ └── 30uSv
└── 140km/h
└── 20m
└── 30uSv
Although this problem looks simple (might not be) I just can not figure it out.
Cheers!

You dont provide original data, so I tried to guess them.
I started with constructing nested dicts and then converting them to anytree format with recursive function.
Here is the code:
import itertools
import anytree
data = [
['100km/h', '20m', '10uSv'],
['100km/h', '40m', '10uSv'],
['100km/h', '60m', '20uSv'],
['120km/h', '40m', '20uSv'],
['120km/h', '40m', '30uSv'],
['120km/h', '60m', '20uSv'],
['120km/h', '60m', '30uSv'],
['140km/h', '20m', '30uSv'],
]
# Construct nested dicts
res = {'Tests': {}}
for a, b, c in data:
res['Tests'].setdefault(a, {}).setdefault(b, {}).setdefault(c, True)
__import__('pprint').pprint(res)
# {'Tests': {'100km/h': {'20m': {'10uSv': True},
# '40m': {'10uSv': True},
# '60m': {'20uSv': True}},
# '120km/h': {'40m': {'20uSv': True, '30uSv': True},
# '60m': {'20uSv': True, '30uSv': True}},
# '140km/h': {'20m': {'30uSv': True}}}}
# Convert nested dicts to anytree format
def dict_to_tree(d, parent=None):
if isinstance(d, bool):
return
for key in d:
node = anytree.Node(key, parent)
dict_to_tree(d[key], node)
root = anytree.Node('Tests')
dict_to_tree(res['Tests'], root)
for pre, fill, node in anytree.RenderTree(root):
print(f'{pre}{node.name}')
# Tests
# ├── 100km/h
# │ ├── 20m
# │ │ └── 10uSv
# │ ├── 40m
# │ │ └── 10uSv
# │ └── 60m
# │ └── 20uSv
# ├── 120km/h
# │ ├── 40m
# │ │ ├── 20uSv
# │ │ └── 30uSv
# │ └── 60m
# │ ├── 20uSv
# │ └── 30uSv
# └── 140km/h
# └── 20m
# └── 30uSv

Automatically save files in a particular folder using python

I have a pandas dataframe df that looks like this:
user date lat lon
Peter 2019-03-04 37.749798 -122.492301
Peter 2019-03-04 37.751028 -122.492291
Peter 2019-03-04 37.751698 -122.491701
Peter 2019-03-04 37.751800 -122.489748
David 2019-03-04 38.122893 -122.242051
David 2019-03-04 38.124108 -122.239079
David 2019-03-04 38.127434 -122.238672
Emma 2019-03-04 37.893635 -122.076538
Joyce 2019-03-04 37.536206 -121.997080
Joyce 2019-03-04 37.538044 -121.993153
Joyce 2019-03-04 37.540443 -121.991415
Using the loop below I am able to create four separate .html files that are named after the users in the user column (Peter.html, David.html, Emma.html, Joyce.html) and show the coordinates per user on a Folium map.
def create_user_map(user):
#create the map
return map
for user in users:
user_map = create_user_map(user)
user_file = f"{user}.html"
user_map.save(user_file)
Now I would like to automatically store these files in the folder structure below using python. How can I extend the loop above to achieve this?
Report/
├── Reports per date/
│ ├── 2019-03-01/
│ ├── 2019-03-02/
│ ├── 2019-03-03/
│ └── 2019-03-04/
│ └── Users/
│ └── Peter/
│ └── Peter.html
│ └── David/
│ └── David.html
│ └── Emma/
│ └── Emma.html
│ └── Joyce/
│ └── Joyce.html
│
└── Reports per month/

First, you've got to make the directories, you can automate this as follows:
import os
directories = ["Reports", "Reports/Reports per date"]
for directory in required_directories: # check if the directory exists
if not os.path.isdir(directory):
os.mkdir(directory) # create the directory
You would also need to do the same thing for the date:
from datetime import datetime
date = datetime.today().strftime("%Y-%m-%d") # just an example for date
if not os.path.isdir(f"Reports/Reports per date/{date}"):
os.mkdir(directory) # create the directory
'''
Then for each user, you need to create the directory, then save the file:
'''
for user in users:
user_map = create_user_map(user)
if not os.path.isdir(f"Reports/Reports per date/{date}/{user}"):
os.mkdir(directory) # create the directory
user_file = f"Reports/Reports per date/{date}/{user}/{user}.html"
user_map.save(user_file)
Putting everything together:
import os
from datetime import datetime
def create_user_map(user):
#create the map
return map
# initialize directories
directories = ["Reports", "Reports/Reports per date"]
for directory in required_directories:
if not os.path.isdir(directory):
os.mkdir(directory)
# initialize date directory
date = datetime.today().strftime("%Y-%m-%d")
if not os.path.isdir(f"Reports/Reports per date/{date}"):
os.mkdir(directory)
# for each user create the map then save html file
for user in users:
user_map = create_user_map(user)
if not os.path.isdir(f"Reports/Reports per date/{date}/{user}"):
os.mkdir(directory)
user_file = f"Reports/Reports per date/{date}/{user}/{user}.html"
user_map.save(user_file)

I assume that at some point you've pulled the date for each entry, likely when you're creating the map, so lets call that variable 'user_date'
import os
for user in users:
user_map = create_user_map(user)
if os.direxists('./%/'%(user_date)):
user_file = f"./%s/{user}.html"%(user_date)
else:
os.mkdir('./%/'%(user_date))
user_file = f"./%s/{user}.html"%(user_date)
user_map.save(user_file)
This should create the directory for each date (if it does not already exist), set the user_file, and then save it.

importlib.reload() not realoading

I am trying to create a leetcode like online judge. I need to reload the submission module but import.reload() does not work.
The code:
class Test:
current_exercise = None
current_name = None
def _import(self, exercise):
exercise = 'exercise' # for testing
if exercise == self.current_name:
module = sys.modules[f'puzzles.{exercise}']
self.current_exercise = importlib.reload(module) # <---- not working
else:
self.current_name = exercise
self.current_exercise = __import__(f'puzzles.{exercise}').exercise
def _test(self, exercise):
solution = self._import(exercise)
print(self.current_exercise.main())
if __name__=='__main__':
import shutil
t= Test()
# first run
t._test('exercise')
# copy another solution.py for reload test
shutil.copy(f"./puzzles/other_exercise/solution.py", f"./puzzles/exercise/solution.py")
# second run
t._test('exercise')
My directory;
.
├── codetest.py
├── puzzles
│   ├── __init__.py
│   ├── exercise
│   │   ├── __init__.py
│   │   ├── solution.py
│ ├── other_exercise
│ │ ├── __init__.py
│ │ ├── solution.py
exercise/solution.py:
def main():
print('EXERCISE')
exercise/init.py
from .solution import main
from .test import cases
other_exercise/solution.py:
def main():
print('OTHER EXERCISE')
Output:
> EXERCISE
> EXERCISE # <--- not sucessfull, should be 'OTHER EXERCISE'

This works:
import sys
import time
import importlib
class Test:
current_exercise = None
current_name = None
def _import(self, exercise):
if exercise == self.current_name:
self.current_exercise.solution = importlib.reload(self.current_exercise.solution)
else:
self.current_name = exercise
self.current_exercise = importlib.import_module(f'puzzles.{exercise}')
print('mod',self.current_exercise)
print('nam',self.current_exercise.__name__)
print('fil',self.current_exercise.__file__)
print('pkg',self.current_exercise.__package__)
def _test(self, exercise):
solution = self._import(exercise)
print(self.current_exercise.solution.main())
if __name__=='__main__':
import shutil
shutil.copy(f"./puzzles/exercise/solution.0", f"./puzzles/exercise/solution.py")
t= Test()
# first run
t._test('exercise')
# copy another solution.py for reload test
shutil.copy("./puzzles/other_exercise/solution.py", "./puzzles/exercise/solution.py")
print(open("./puzzles/exercise/solution.py").read())
# second run
t._test('exercise')

I went with an alternative; load solution.py as a text and create a module from that string. The module is not registered in sys.modules and can be overwritten. However imp is deprecated.
import imp
class Test:
current_exercise = None
def _import(self, exercise):
# load module code
with open(f'./puzzles/{exercise}/solution.py') as f:
code = f.read()
# register/create the module
self.current_exercise = imp.new_module('mymodule')
# import/fill the module
exec(code, self.current_exercise.__dict__)
def _test(self, exercise):
self._import(exercise)
print(self.current_exercise.main())

Copy images with EXIF(time) to new destination from several directories

I have a problem, below is my root tree:
In my root I have that:
---dir1---sub1(images exif time 10:05:05 to 10:09:55)
---sub2(images exif time 10:11:15 to 10:15:42)
---sub3(images exif time 10:22:15 to 10:24:41)
---sub4(images exif time 10:28:15 to 10:35:40)
---dir2---sub1(images exif time 10:05:06 to 10:09:57)
---sub2(images exif time 10:11:15 to 10:15:40)
---sub3(images exif time 10:22:15 to 10:24:43)
---sub4(images exif time 10:28:15 to 10:35:40)
---sub5(images exif time 10:40:15 to 10:43:40)
---dir3---sub1(images exif time 10:05:05 to 10:09:54)
---sub2(images exif time 10:11:15 to 10:15:40)
---sub3(images exif time 10:22:15 to 10:24:41)
---sub4(images exif time 10:28:15 to 10:35:40)
---sub5(images exif time 10:40:15 to 10:43:42)
---dir4---sub1(images exif time 10:05:06 to 10:09:57)
---sub2(images exif time 10:11:15 to 10:15:40)
---sub3(images exif time 10:22:15 to 10:24:43)
---sub4(images exif time 10:28:15 to 10:35:40)
---sub5(images exif time 10:40:15 to 10:43:40)
---dir5---sub1(images exif time 10:05:05 to 10:09:54)
---sub2(images exif time 10:11:15 to 10:15:40)
---sub3(images exif time 10:22:15 to 10:24:41)
---sub4(images exif time 10:28:15 to 10:35:40)
---sub5(images exif time 10:40:15 to 10:43:42)
I have 5 dirs in my root and each contains sub-folders(with images) number of sub-folders it's not a same all the time, What I want to do is from first - dir1 get sub1 and put it to new destination folder after go to next dir (dir2) scan sub-folders to check exif(time) if its a same as sub1 from dir1 and copy it to same directory after go to next dir3 and do same for all others dir-s and subfolders, and after create newdir2 go and take sub2 from dir1 and do again same loop till end...
something like:
---newdir1---sub1(from dir1)
---sub1(from dir2)
---sub1(from dir3)
---sub1(from dir4)
---sub1(from dir5)
---newdir2---sub2(from dir1)
---sub2(from dir2)
---sub2(from dir3)
---sub2(from dir4)
---sub2(from dir5)
---newdir3---sub3(from dir1)
---sub3(from dir2)
---sub3(from dir3)
---sub3(from dir4)
---sub3(from dir5)
---newdir4---sub4(from dir1)
---sub4(from dir2)
---sub4(from dir3)
---sub4(from dir4)
---sub4(from dir5)
---newdir5---sub5(from dir2)
---sub5(from dir3)
---sub5(from dir4)
---sub5(from dir5)
I have a part of script with sort my images to dictionary by some time interval, how I can join it to my script?? to get my sub-s with same key to same dir ??:
import os
import exifread
from datetime import datetime, timedelta
TIME_RANGE = 2
src_root = 'F:\gopro_egouts\gopro_img_test\\2018-03-06'
dst_root = src_root + '-copie'
src_dirs_dict = {}
for cam_dir in os.listdir(src_root):
laps_root = os.path.join(src_root, cam_dir)
for lap_dir in os.listdir(laps_root):
files_root = os.path.join(laps_root, lap_dir)
min_time = None
max_time = None
for cam_file in os.listdir(files_root):
with open(os.path.join(files_root, cam_file), 'rb') as f:
tags = exifread.process_file(f, details=False, stop_tag="EXIF DateTimeOriginal")
time_taken = tags.get("EXIF DateTimeOriginal")
if time_taken:
file_time = datetime.strptime(str(time_taken), '%Y:%m:%d %H:%M:%S')
if min_time is not None:
if file_time < min_time:
min_time = file_time
else:
min_time = file_time
if max_time is not None:
if file_time > max_time:
max_time = file_time
else:
max_time = file_time
is_key = None
for key in src_dirs_dict.keys():
if (min_time >= key[0] and min_time < key[1]) \
or (max_time >= key[0] and max_time < key[1]):
is_key = key
break
min_time = min_time.replace(second=0)
max_time = min_time + timedelta(minutes=TIME_RANGE)
if is_key:
key_min, key_max = is_key
if min_time < key_min:
key_min = min_time
if max_time > key_max:
key_max = max_time
new_key = (key_min, key_max)
if new_key == is_key:
src_dirs_dict[new_key].append(files_root)
else:
src_dirs_dict[new_key] = src_dirs_dict.pop(is_key) + [files_root]
else:
new_key = (min_time, max_time)
src_dirs_dict[new_key] = [files_root]
print(src_dirs_dict)
My print showing me that:
{(datetime.datetime(2018, 3, 6, 10, 31), datetime.datetime(2018, 3, 6, 10, 32)): ['F:\\gopro_egouts\\gopro_img_test\\2018-03-06\\CAM0101 1\\Time Lapse 3',...
I have a working script with working well but taking a sub-folders one by one , and when some Time-lapse is missing, there I have a problem, his mixing my sub-s(automatically taking next one from next dir with wrong time), where I have to add my exif script from above to here(how modify it)... how to join it together???
Any help will be appreciated.
from collections import defaultdict
import shutil
import os
import re
src_root = r'F:\gp\gp_test\\2018-03-06'
dst_root = src_root + '-copie'
#os.makedirs(dst_root, exist_ok=True)
src_dirname, src_folders, _ = next(os.walk(src_root))
src_folders = sorted(src_folders)
src_folders = [os.path.join(src_root, folder) for folder in src_folders]
print(src_folders)
job = defaultdict(list)
print('mes {} dossier cam'.format(len(src_folders)))
for folder in src_folders:
print()
dirname, src_sub_folders, _ = next(os.walk(os.path.join(src_dirname, folder)))
src_sub_folders = sorted(src_sub_folders, key=lambda x: [re.search(r'(\D+)', x).group(1)] + list(map(int, re.findall(r'\d+', x))))
print("mes 5 CAM avec {} time laps '{}'".format(len(src_sub_folders), folder))
for index, sub_folder in enumerate(src_sub_folders, start=1):
job['Time Lapse-{}'.format(index)].append(os.path.join(dirname, sub_folder))
#print()
for dst_folder, src_folders in sorted(job.items()):
for index, src_folder in enumerate(src_folders, start=1):
dst_new_folder = os.path.join(dst_root, dst_folder, 'CAM-{}'.format(index))
print('{} -> {}'.format(src_folder, dst_new_folder))
shutil.copytree(src_folder, dst_new_folder)
#shutil.rmtree(src_root)
for root, dirs, files in os.walk(dst_root):
for f in files:
prefix = os.path.basename(root)
prefix1 = os.path.basename(src_root)
os.rename(os.path.join(root, f), os.path.join(root, "{}-{}-{}".format(prefix1, prefix, f)))
print("images rennomer ")
print("fini")
print("dossier supprimé")
I'm really sorry if that will be not to clear for users, but English it's not my strongest language ...

In a nutshell, you have images of the same set of events shot on a number of cameras.
Currently, they are grouped first by camera, then by event:
├── Camera1
│   ├── Event1
│   ├── Event2
│   ├── Event3
│   ├── Event4
│   └── Event5
├── Camera2
│   ├── Event1
│   ├── Event2
│   ├── Event3
│   ├── Event4
│   └── Event5
├── Camera3
│   ├── Event1
│   ├── Event2
│   ├── Event3
│   ├── Event4
│   └── Event5
├── Camera4
│   ├── Event1
│   ├── Event2
│   ├── Event3
│   ├── Event4
│   └── Event5
└── Camera5
├── Event1
├── Event2
├── Event3
├── Event4
└── Event5
... where some events may be missing and the event numbering may not match because one or more events may not be recorded by all cameras.
And you want the same set of images grouped first by event, then by camera:
├── Event1
│   ├── Camera1
│   ├── Camera2
│   ├── Camera3
│   ├── Camera4
│   └── Camera5
├── Event2
│   ├── Camera1
│   ├── Camera2
│   ├── Camera3
│   ├── Camera4
│   └── Camera5
├── Event3
│   ├── Camera1
│   ├── Camera2
│   ├── Camera3
│   ├── Camera4
│   └── Camera5
├── Event4
│   ├── Camera1
│   ├── Camera2
│   ├── Camera3
│   ├── Camera4
│   └── Camera5
└── Event5
├── Camera1
├── Camera2
├── Camera3
├── Camera4
└── Camera5
Here's an idea... I am kind of "thinking aloud" in pseudo-code:
Create the output directories {Event1..EventN}/Camera{1..N}
MissingDirectory=false
for each input directory Camera{1..N}
if this directory has the full number of subdirectories
copy all subdirectories to output area
else
MissingDirectory=true
end if
end for
if MissingDirectory
for each output Event directory
get times of all files from all cameras for current event
sort list and take median time of current event
end for
for each un-copied input directory
get the mean time of all the files in it
assign this directory's files to output directory with nearest median time
end for
endif
You can convert your EXIF times to pure seconds since midnight (s) with:
s = (hours*3600) + (minutes*60) + seconds
Here's a way to get the time (in seconds since midnight) that an image was taken:
import exifread
def getImageTime(filename):
"Read EXIF data of given file and return time in seconds since midnight"
f=open(filename,'rb')
tags=exifread.process_file(f)
DateTime=tags["EXIF DateTimeOriginal"].printable
# DateTime looks like: "2013:03:09 08:59:50"
Time=DateTime.split()[-1]
# Time looks like: "08:59:50"
h,m,s=Time.split(":")
# Return seconds since midnight: 32390
return (int(h)*3600) + (int(m)*60) + int(s)
s=getImageTime("image.jpg")
print(s)
After some more thought, this won't work very well if one of the cameras is set, say, 20 minutes different from the others, since all its images from all the sequences will tend to get put into the first or last directory. Needs some more thought...

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Same tests over many similar data files - python

Related

pytest how to mock functions from another module

Build a tree from a list of objects

Automatically save files in a particular folder using python

importlib.reload() not realoading

Copy images with EXIF(time) to new destination from several directories

Categories

Resources