Belle II Software  release-08-01-10
caf_htcondor_backend.py
1 
12 
13 # Not all of the configuration is strictly necessary, it's just to show some options
14 
15 import basf2 as b2
16 
17 import os
18 import sys
19 
20 from ROOT.Belle2 import TestCalibrationAlgorithm
21 from caf.framework import Calibration, CAF
22 from caf import backends
23 
24 b2.set_log_level(b2.LogLevel.INFO)
25 
26 
27 def main(argv):
28  if len(argv) == 1:
29  data_dir = argv[0]
30  else:
31  print("Usage: python3 caf_htcondor_backend.py <data directory>")
32  sys.exit(1)
33 
34 
38  input_files_test = [os.path.join(os.path.abspath(data_dir), '*.root')]
39 
40  from caf.strategies import SequentialRunByRun
41 
44  calibrations = []
45  for i in range(1, 3):
46  col_test = b2.register_module('CaTest')
47  col_test.set_name('Test{}'.format(i)) # Sets the prefix of the collected data in the datastore
48  col_test.param('spread', 15) # Proportional to the probability of algorithm requesting iteration
49  col_test.param('granularity', 'run') # Allows us to execute algorithm over all data, in one big IoV
50 
51  alg_test = TestCalibrationAlgorithm()
52  # Since we're using several instances of the same test algorithm here, we still want the database entries to have
53  # different names. TestCalibrationAlgorithm outputs to the database using the prefix name so we change it
54  # slightly for each calibration. Not something you'd usually have to do.
55  alg_test.setPrefix('Test{}'.format(i)) # Must be the same as collector prefix
56 
57  cal_test = Calibration(name='TestCalibration{}'.format(i),
58  collector=col_test,
59  algorithms=alg_test,
60  input_files=input_files_test)
61 
62  # Some optional configuration ####
63  # By default all input files are placed in one big job (-1), this allows you to specify a maxmimum so that
64  # subjobs for each set of input files will be created
65  cal_test.max_files_per_collector_job = 1
66  # Some backends can have arguments passed to them e.g. requested job memory
67  cal_test.backend_args = {"request_memory": "2 GB"}
68  # The maximium iteration number you will be allowed to reach before the Calibration just completes
69  cal_test.max_iterations = 2
70  # Since we're using the LSF batch system we'll up the heartbeat from the default to query for when the jobs are all finished
71  # No point spamming it
72  cal_test.heartbeat = 15
73  # The interval in seconds between full updates of the remaining collector jobs, default = 300
74  # Checking every subjob can be a long process when you have a lot of them so it's best not to do it too often
75  # After this interval the finished/remaining collector jobs will be printed
76  cal_test.collector_full_update_interval = 30
77  # Choosing an AlgorithmStrategy for each algorithm (here we just use the same for all of them)
78  cal_test.strategies = SequentialRunByRun
79  # The collector output file patterns you want to make sure are tracked by the CAF. By default only CollectorOutput.root
80  # is used. All files are passed to the Algorithm.data_input function in order to set the input files of the algorithm
81  cal_test.output_patterns.append("*.mille")
82 
83  calibrations.append(cal_test)
84 
85 
87  cal_fw = CAF()
88  # Add in our list of calibrations
89  for cal in calibrations:
90  cal_fw.add_calibration(cal)
91  # Use the HTCondor backend setup, can view the default options in calibration/data/backends.cfg
92  cal_fw.backend = backends.HTCondor()
93  # Start running
94  cal_fw.run()
95  print("End of CAF processing.")
96 
97 
98 if __name__ == "__main__":
99  main(sys.argv[1:])
Definition: main.py:1
int main(int argc, char **argv)
Run all tests.
Definition: test_main.cc:91