Belle II Software development
runners.py
1#!/usr/bin/env python3
2
3# disable doxygen check for this file
4# @cond
5
6
13
14from abc import ABC, abstractmethod
15import time
16from basf2 import B2DEBUG, B2ERROR, B2INFO
17import multiprocessing
18
19
20class Runner(ABC):
21 """
22 Abstract Base Class for Runner type object.
23 """
24 @abstractmethod
25 def run(self):
26 """
27 """
28
29
30class AlgorithmsRunner(Runner):
31 """
32 Base class for `AlgorithmsRunner` classes. Defines the necessary information that will be provided to every
33 `AlgorithmsRunner` used by the `framework.CAF`
34
35 An `AlgorithmsRunner` will be given a list of `framework.Algorithm` objects defined during the setup of a
36 `framework.Calibration` instance. The `AlgorithmsRunner` describes how to run each of the `strategies.AlgorithmStrategy`
37 objects. As an example, assume that a single `framework.Calibration` was given and list of two `framework.Algorithm`
38 instances to run.
39
40 In this example the chosen :py:meth:`AlgorithmsRunner.run()` is simple and just loops over the list of `caf.framework.Algorithm`
41 calling each one's :py:meth:`caf.strategies.AlgorithmStrategy.run()` methods in order. Thereby generating a localdb with the only communication between the `strategies.AlgorithmStrategy` instances coming from the
42 database payloads being available from one algorithm to the next.
43
44 But you could imagine a more complex situation. The `AlgorithmsRunner` might take the first `framework.Algorithm` and
45 call its `AlgorithmStrategy.run` for only the first (exp,run) in the collected data. Then it might not commit the payloads
46 to a localdb but instead pass some calculated values to the next algorithm to run on the same IoV. Then it might go back
47 and re-run the first AlgorithmStrategy with new information and commit payloads this time. Then move onto the next IoV.
48
49 Hopefully you can see that while the default provided `AlgorithmsRunner` and `AlgorithmStrategy` classes should be good for
50 most situations, you have lot of freedom to define your own strategy if needed. By following the basic requirements for the
51 interface to the `framework.CAF` you can easily plugin a different special case, or mix and match a custom class with
52 default CAF ones.
53
54 The run(self) method should be defined for every derived `AlgorithmsRunner`. It will be called once and only once for each
55 iteration of (collector -> algorithm).
56
57 Input files are automatically given via the `framework.Calibration.output_patterns` which constructs
58 a list of all files in the collector output directories that match the output_patterns. If you have multiple types of
59 output data it is your job to filter through the input files and assign them correctly.
60
61 A list of local database paths are given to the `AlgorithmsRunner` based on the `framework.Calibration` dependencies and
62 any overall database chain given to the Calibration before running.
63 By default you can call the "setup_algorithm" transition of the `caf.state_machines.AlgorithmMachine` to automatically
64 set a database chain based on this list.
65 But you have freedom to not call this at all in `run`, or to implement a different method to deal with this.
66 """
67
68 FAILED = "FAILED"
69 COMPLETED = "COMPLETED"
70
71 def __init__(self, name):
72 """
73 """
74
75 self.name = name
76
77 self.input_files = []
78
79 self.database_chain = []
80
81 self.dependent_databases = []
82
83 self.output_database_dir = ""
84
85 self.results = {}
86
87 self.final_state = None
88
89 self.algorithms = None
90
91 self.output_dir = ""
92
93
94class SeqAlgorithmsRunner(AlgorithmsRunner):
95 """
96 """
97
98 def __init__(self, name):
99 """
100 """
101 super().__init__(name)
102
103 def run(self, iov, iteration):
104 """
105 """
106 from caf.strategies import AlgorithmStrategy
107 B2INFO(f"SequentialAlgorithmsRunner begun for Calibration {self.name}.")
108 # First we do the setup of algorithm strategies
109 strategies = []
110 for algorithm in self.algorithms:
111 # Need to create an instance of the requested strategy and set the attributes
112 strategy = algorithm.strategy(algorithm)
113 # Now add all the necessary parameters for a strategy to run
114 strategy_params = {}
115 strategy_params["database_chain"] = self.database_chain
116 strategy_params["dependent_databases"] = self.dependent_databases
117 strategy_params["output_dir"] = self.output_dir
118 strategy_params["output_database_dir"] = self.output_database_dir
119 strategy_params["input_files"] = self.input_files
120 strategy_params["ignored_runs"] = self.ignored_runs
121 strategy.setup_from_dict(strategy_params)
122 strategies.append(strategy)
123
124 # We then fork off a copy of this python process so that we don't affect the original with logging changes
125 ctx = multiprocessing.get_context("fork")
126 for strategy in strategies:
127 queue = multiprocessing.SimpleQueue()
128 child = ctx.Process(target=SeqAlgorithmsRunner._run_strategy,
129 args=(strategy, iov, iteration, queue))
130
131 self.results[strategy.algorithm.name] = []
132 B2INFO(f"Starting subprocess of AlgorithmStrategy for {strategy.algorithm.name}.")
133 B2INFO("Logging will be diverted into algorithm output.")
134 child.start()
135 final_state = None
136 final_loop = False
137
138 B2INFO(f"Collecting results for {strategy.algorithm.name}.")
139 while True:
140 # Do we have results?
141 while not queue.empty():
142 output = queue.get()
143 B2DEBUG(29, f"Result from queue was {output}")
144 if output["type"] == "result":
145 self.results[strategy.algorithm.name].append(output["value"])
146 elif output["type"] == "final_state":
147 final_state = output["value"]
148 else:
149 raise RunnerError(f"Unknown result output: {output}")
150
151 # Still alive but not results at the moment? Wait a few seconds before checking.
152 if child.is_alive():
153 time.sleep(5)
154 continue
155 else:
156 # Reached a good ending of strategy
157 if final_state:
158 # Check the exitcode for failed Process()
159 if child.exitcode == 0:
160 B2INFO(f"AlgorithmStrategy subprocess for {strategy.algorithm.name} exited")
161 break
162 else:
163 raise RunnerError(f"Error during subprocess of AlgorithmStrategy for {strategy.algorithm.name}")
164 # It might be possible that the subprocess has finished but all results weren't gathered yet.
165 else:
166 # Go around once more since all results should be in the queue waiting
167 if not final_loop:
168 final_loop = True
169 continue
170 else:
171 raise RunnerError(f"Strategy for {strategy.algorithm.name} "
172 "exited subprocess but without a final state!")
173
174 # Exit early and don't continue strategies as this one failed
175 if final_state == AlgorithmStrategy.FAILED:
176 B2ERROR(f"AlgorithmStrategy for {strategy.algorithm.name} failed. We will not proceed with any more algorithms")
177 self.final_state = self.FAILED
178 break
179
180 B2DEBUG(29, f"Finished subprocess of AlgorithmStrategy for {strategy.algorithm.name}")
181
182 if self.final_state != self.FAILED:
183 B2INFO(f"SequentialAlgorithmsRunner finished for Calibration {self.name}")
184 self.final_state = self.COMPLETED
185
186 @staticmethod
187 def _run_strategy(strategy, iov, iteration, queue):
188 """Runs the AlgorithmStrategy sends back the results"""
189 strategy.run(iov, iteration, queue)
190 # Get the return codes of the algorithm for the IoVs found by the Process
191 B2INFO(f"Finished Strategy for {strategy.algorithm.name}.")
192
193
194class RunnerError(Exception):
195 """
196 Base exception class for Runners """
197
198# @endcond
199