release-06-00-14/doxygen/tests_2test__backends_8py_source.html

 #!/usr/bin/env python3

 # -*- coding: utf-8 -*-


 """

 Unit tests for the backends.py classes and functions. This is a little difficult to test due to

 the Batch system backends not really being testable when not running on specific hosts with bsub/qsub

 installed. But we can try to implement tests using the Local multiprocessing backend and testing the

 basic behaviour of the classes so that they will fail if we modify assumptions in the future.

 """


 from basf2 import find_file


 import unittest

 from unittest import TestCase

 import shutil

 from pathlib import Path


 from caf.backends import ArgumentsSplitter, Job, MaxFilesSplitter, MaxSubjobsSplitter

 from caf.backends import ArgumentsGenerator, range_arguments, SplitterError


 # A testing directory so that we can do cleanup

 test_dir = Path("test_backends").absolute()

 # A simple bash script for testing jobs

 test_script = Path(find_file("calibration/examples/job_submission/test_script.sh")).absolute()


 class TestJob(TestCase):

     """

     UnitTest for the `caf.backends.Job` class

     """


     def setUp(self):

         """

         Create useful objects for each test and the teardown

         """

         # We will deliberately use strings and not Path objects for the job attributes so that we can later

         # check that they are converted to Paths by the class itself.

         name1 = 'TestJob1'

         job1 = Job(name1)  # Set up this one manually setting attributes

         job1.working_dir = Path(test_dir, job1.name, "working_dir").absolute().as_posix()

         job1.output_dir = Path(test_dir, job1.name, "output_dir").absolute().as_posix()

         job1.cmd = ["bash", test_script.name]

         job1.input_sandbox_files = [test_script.as_posix()]

         self.job1job1 = job1


         name2 = 'TestJob2'

         job_dict = {}

         job_dict["name"] = name2

         job_dict["working_dir"] = Path(test_dir, name2, "working_dir").as_posix()

         job_dict["output_dir"] = Path(test_dir, name2, "output_dir").as_posix()

         job_dict["output_patterns"] = []

         job_dict["cmd"] = ["bash", test_script.name]

         job_dict["args"] = []

         job_dict["input_sandbox_files"] = [test_script.as_posix()]

         job_dict["input_files"] = []

         job_dict["setup_cmds"] = []

         job_dict["backend_args"] = {}

         job_dict["subjobs"] = [{"id": i, "input_files": [], "args": [str(i)]} for i in range(4)]

         self.job2_dictjob2_dict = job_dict

         self.job2job2 = Job(name2, job_dict=job_dict)  # Set up this one from a dictionary


         # Create a directory just in case we need it for each test so that we can delete everything easily at the end

         test_dir.mkdir(parents=True, exist_ok=False)


     def test_dict_setup(self):

         self.maxDiffmaxDiff = None  # If this test fails you will need to see the diff of a large dictionary

         self.assertEqual(len(self.job2job2.subjobs), 4)

         self.assertEqual(self.job2_dictjob2_dict, self.job2job2.job_dict)

         self.job2_dictjob2_dict["subjobs"].pop()

         del self.job2job2.subjobs[3]

         self.assertEqual(self.job2_dictjob2_dict, self.job2job2.job_dict)


     def test_job_json_serialise(self):

         json_path = Path(test_dir, "job2.json")

         self.job2job2.dump_to_json(json_path)

         job2_copy = Job.from_json(json_path)

         self.assertEqual(self.job2job2.job_dict, job2_copy.job_dict)


     def test_status(self):

         """

         The Jobs haven't been run so they should be in the 'init' status.

         They also shouldn't throw exceptions due to missing result objects.

         """

         self.assertEqual(self.job1job1.status, "init")

         self.assertEqual(self.job2job2.status, "init")

         self.assertFalse(self.job1job1.ready())

         self.assertFalse(self.job2job2.ready())

         self.assertEqual(self.job1job1.update_status(), "init")

         self.assertEqual(self.job2job2.update_status(), "init")

         for subjob in self.job2job2.subjobs.values():

             self.assertEqual(subjob.status, "init")

             self.assertFalse(subjob.ready())

             self.assertEqual(subjob.update_status(), "init")


     def test_path_object_conversion(self):

         """

         Make sure that the two ways of setting up Job objects correctly converted attributes to be Paths instead of strings.

         """

         self.assertIsInstance(self.job1job1.output_dir, Path)

         self.assertIsInstance(self.job1job1.working_dir, Path)

         for path in self.job1job1.input_sandbox_files:

             self.assertIsInstance(path, Path)

         for path in self.job1job1.input_files:

             self.assertIsInstance(path, Path)


         self.assertIsInstance(self.job2job2.output_dir, Path)

         self.assertIsInstance(self.job2job2.working_dir, Path)

         for path in self.job2job2.input_sandbox_files:

             self.assertIsInstance(path, Path)

         for path in self.job2job2.input_files:

             self.assertIsInstance(path, Path)


         for subjob in self.job2job2.subjobs.values():

             self.assertIsInstance(subjob.output_dir, Path)

             self.assertIsInstance(subjob.working_dir, Path)


     def test_subjob_splitting(self):

         """

         Test the creation of SubJobs and assignment of input data files via splitter classes.

         """

         self.assertIsNone(self.job1job1.splitter)

         self.assertIsNone(self.job2job2.splitter)

         # Set the splitter for job1

         self.job1job1.max_files_per_subjob = 2

         self.assertIsInstance(self.job1job1.splitter, MaxFilesSplitter)

         self.assertEqual(self.job1job1.splitter.max_files_per_subjob, 2)

         self.job1job1.max_subjobs = 3

         self.assertIsInstance(self.job1job1.splitter, MaxSubjobsSplitter)

         self.assertEqual(self.job1job1.splitter.max_subjobs, 3)


         # Generate some empty input files

         for i in range(5):

             input_file = Path(test_dir, f"{i}.txt")

             input_file.touch(exist_ok=False)

             self.job1job1.input_files.append(input_file)


         self.job1job1.splitter = MaxFilesSplitter(max_files_per_subjob=2)

         self.job1job1.splitter.create_subjobs(self.job1job1)

         self.assertEqual(len(self.job1job1.subjobs), 3)  # Did the splitter create the number of jobs we expect?

         for i, subjob in self.job1job1.subjobs.items():

             self.assertTrue((len(subjob.input_files) == 2 or len(subjob.input_files) == 1))


         self.job1job1.subjobs = {}

         self.job1job1.splitter = MaxSubjobsSplitter(max_subjobs=4)

         self.job1job1.splitter.create_subjobs(self.job1job1)

         self.assertEqual(len(self.job1job1.subjobs), 4)  # Did the splitter create the number of jobs we expect?

         for i, subjob in self.job1job1.subjobs.items():

             self.assertTrue((len(subjob.input_files) == 2 or len(subjob.input_files) == 1))


         # Does the ArgumentSplitter create jobs

         self.job1job1.subjobs = {}

         arg_gen = ArgumentsGenerator(range_arguments, 3, stop=12, step=2)

         self.job1job1.splitter = ArgumentsSplitter(arguments_generator=arg_gen, max_subjobs=10)

         self.job1job1.splitter.create_subjobs(self.job1job1)

         self.assertEqual(len(self.job1job1.subjobs), 5)

         for (i, subjob), arg in zip(self.job1job1.subjobs.items(), range(3, 12, 2)):

             # Does each subjob receive the correct setup

             self.assertEqual(self.job1job1.input_files, subjob.input_files)

             self.assertEqual(arg, subjob.args[0])


         # Does max_jobs prevent infinite subjob numbers

         self.job1job1.subjobs = {}

         self.job1job1.splitter = ArgumentsSplitter(arguments_generator=arg_gen, max_subjobs=2)

         self.assertRaises(SplitterError, self.job1job1.splitter.create_subjobs, self.job1job1)


     def test_input_sandbox_copy(self):

         """

         Does the copy of files/directories for the input sandbox work correctly?

         """

         # We create a directory to add to the Job's input sandbox list, to tes if directories + contents are copied.

         input_sandbox_dir = Path(test_dir, "test_input_sandbox_dir")

         input_sandbox_dir.mkdir(parents=True, exist_ok=False)

         for i in range(2):

             input_file = Path(input_sandbox_dir, f"{i}.txt")

             input_file.touch(exist_ok=False)

         # Instead of identifying every file, we just use the whole directory

         self.job1job1.input_sandbox_files.append(input_sandbox_dir)

         # Manually create the working dir first (normally done by the Backend)

         self.job1job1.working_dir.mkdir(parents=True, exist_ok=False)

         self.job1job1.copy_input_sandbox_files_to_working_dir()


         # We expect the original script and the above extra files + parent directory to be copied to the working directory.

         expected_paths = []

         expected_paths.append(Path(self.job1job1.working_dir, test_script.name))

         expected_paths.append(Path(self.job1job1.working_dir, "test_input_sandbox_dir"))

         for i in range(2):

             path = Path(self.job1job1.working_dir, "test_input_sandbox_dir", f"{i}.txt")

             expected_paths.append(path)


         # Now check that every path in the working directory is one we expect to be there

         for p in self.job1job1.working_dir.rglob("*"):

             self.assertIn(p, expected_paths)


     def tearDown(self):

         """

         Removes files/directories that were created during these tests

         """

         shutil.rmtree(test_dir)


 def main():

     unittest.main()


 if __name__ == '__main__':

     main()

test_backends.TestJob
Definition: test_backends.py:35

test_backends.TestJob.test_subjob_splitting
def test_subjob_splitting(self)
Definition: test_backends.py:125

test_backends.TestJob.setUp
def setUp(self)
Definition: test_backends.py:40

test_backends.TestJob.tearDown
def tearDown(self)
Definition: test_backends.py:202

test_backends.TestJob.job1
job1
Definition: test_backends.py:52

test_backends.TestJob.maxDiff
maxDiff
Definition: test_backends.py:74

test_backends.TestJob.test_path_object_conversion
def test_path_object_conversion(self)
Definition: test_backends.py:103

test_backends.TestJob.job2_dict
job2_dict
Definition: test_backends.py:67

test_backends.TestJob.test_status
def test_status(self)
Definition: test_backends.py:87

test_backends.TestJob.job2
job2
Definition: test_backends.py:68

test_backends.TestJob.test_input_sandbox_copy
def test_input_sandbox_copy(self)
Definition: test_backends.py:174

main
int main(int argc, char **argv)
Run all tests.
Definition: test_main.cc:75