Belle II Software development
CalibrationAlgorithm.h
1/**************************************************************************
2 * basf2 (Belle II Analysis Software Framework) *
3 * Author: The Belle II Collaboration *
4 * *
5 * See git log for contributors and copyright holders. *
6 * This file is licensed under LGPL-3.0, see LICENSE.md. *
7 **************************************************************************/
8
9#pragma once
10#include <Python.h>
11#include <memory>
12#include <string>
13#include <vector>
14#include <map>
15#include <any>
16#include <utility>
17#include <list>
18#include <nlohmann/json.hpp>
19#include <TClonesArray.h>
20#include <TDirectory.h>
21#include <TFile.h>
22#include <TTree.h>
23#include <framework/database/Database.h>
24#include <framework/database/IntervalOfValidity.h>
25#include <framework/logging/Logger.h>
26#include <calibration/Utilities.h>
27#include <calibration/dataobjects/RunRange.h>
28
29namespace Belle2 {
38 public:
40 enum EResult {
46 };
47
54 public:
55 ExecutionData() {};
56 ~ExecutionData() {};
58 void reset()
59 {
60 B2DEBUG(100, "Resetting ExecutionData of algorithm");
61 m_requestedRuns.clear();
62 m_iteration = -1;
64 m_payloads.clear();
67 }
70 {
71 m_mapCalibData.clear();
72 }
74 const std::vector<Calibration::ExpRun>& getRequestedRuns() const {return m_requestedRuns;}
76 void setRequestedRuns(const std::vector<Calibration::ExpRun>& requestedRuns) {m_requestedRuns = requestedRuns;}
78 int getIteration() const {return m_iteration;}
80 void setIteration(int iteration)
81 {
82 B2DEBUG(29, "Setting Iteration of Algorithm to " << iteration);
83 m_iteration = iteration;
84 }
86 EResult getResult() const {return m_result;}
88 void setResult(EResult result) {m_result = result;}
90 void setRequestedIov(const IntervalOfValidity& iov = IntervalOfValidity(0, 0, -1, -1)) {m_iov = iov;}
92 const IntervalOfValidity& getRequestedIov() const {return m_iov;}
94 std::list<Database::DBImportQuery>& getPayloads() {return m_payloads;}
96 std::list<Database::DBImportQuery> getPayloadValues() {return m_payloads;}
98 std::shared_ptr<TNamed> getCalibObj(const std::string& name, const RunRange& runRange) const
99 {
100 auto it = m_mapCalibData.find(std::make_pair(name, runRange));
101 if (it == m_mapCalibData.end()) {
102 return nullptr;
103 }
104 return it->second;
105 }
107 void setCalibObj(const std::string& name, const RunRange& runRange, const std::shared_ptr<TNamed>& objectPtr)
108 {
109 m_mapCalibData[std::make_pair(name, runRange)] = objectPtr;
110 }
111
112 private:
114 std::vector<Calibration::ExpRun> m_requestedRuns{};
116 int m_iteration{ -1};
122 std::list<Database::DBImportQuery> m_payloads{};
131 std::map<std::pair<std::string, RunRange>, std::shared_ptr<TNamed>> m_mapCalibData;
132 };
133
140 explicit CalibrationAlgorithm(const std::string& collectorModuleName) : m_prefix(collectorModuleName) {}
141
144
146 std::string getPrefix() const {return m_prefix;}
147
149 bool checkPyExpRun(PyObject* pyObj);
150
152 Calibration::ExpRun convertPyExpRun(PyObject* pyObj);
153
164 std::string getCollectorName() const {return getPrefix();}
165
167 void setPrefix(const std::string& prefix) {m_prefix = prefix;}
168
170 void setInputFileNames(PyObject* inputFileNames);
171
173 PyObject* getInputFileNames();
174
176 std::vector<Calibration::ExpRun> getRunListFromAllData() const;
177
180
183
186
188 std::string getGranularity() const {return m_granularityOfData;};
189
198 EResult execute(std::vector<Calibration::ExpRun> runs = {}, int iteration = 0, IntervalOfValidity iov = IntervalOfValidity());
199
201 EResult execute(PyObject* runs, int iteration = 0, IntervalOfValidity iov = IntervalOfValidity());
202
204 std::list<Database::DBImportQuery>& getPayloads() {return m_data.getPayloads();}
205
207 std::list<Database::DBImportQuery> getPayloadValues() {return m_data.getPayloadValues();}
208
210 bool commit();
211
213 bool commit(std::list<Database::DBImportQuery> payloads);
214
216 const std::string& getDescription() const {return m_description;}
217
219 // of JSON object creation. Failure probably means that your JSON string was badly formatted.
220 bool loadInputJson(const std::string& jsonString);
221
223 const std::string dumpOutputJson() const {return m_jsonExecutionOutput.dump();}
224
226 // areas of the CAF. Basically you search for features in the data that you want to find and make sure that the CAF knows
227 // there is a boundary where payloads should probably start/end. The output boundaries should be the starting ExpRun
228 // of the new boundary.
229 const std::vector<Calibration::ExpRun> findPayloadBoundaries(std::vector<Calibration::ExpRun> runs, int iteration = 0);
230
231 protected:
232 // Developers implement this function ------------
233
235 virtual EResult calibrate() = 0;
236
238 // Implementing this is optional because most people will never call findPayloadBoundaries in their CAF job.
239 // It returns false by default so that the boundaries vector is empty if you forgot to implement this.
240 //
241 // We omit the names of arguments here so that we don't generate lots of compiler warnings in algorithms that don't
242 // implement this function.
243 virtual bool isBoundaryRequired(const Calibration::ExpRun& /*currentRun*/)
244 {
245 B2ERROR("You didn't implement a isBoundaryRequired() member function in your CalibrationAlgorithm but you are calling it!");
246 return false;
247 }
248
250 // We omit the names of arguments here so that we don't generate lots of compiler warnings in algorithms that don't
251 // implement this function.
252 virtual void boundaryFindingSetup(std::vector<Calibration::ExpRun> /*runs*/, int /*iteration = 0*/) {};
253
255 // This runs right after 'findPayloadBoundaries' and is supposed to correct any changes you made in 'boundaryFindingSetup'
256 // or 'isBoundaryRequired'.
257 virtual void boundaryFindingTearDown() {};
258
260 // calling findPayloadBoundaries, before boundaryFindingSetup is called.
261 std::vector<Calibration::ExpRun> m_boundaries;
262
263 // Helpers ---------------- Data retrieval -------
264
266 const std::vector<Calibration::ExpRun>& getRunList() const {return m_data.getRequestedRuns();}
267
269 int getIteration() const { return m_data.getIteration(); }
270
272 void setInputFileNames(std::vector<std::string> inputFileNames);
273
275 std::vector<std::string> getVecInputFileNames() const {return m_inputFileNames;}
276
278 template<class T>
279 std::shared_ptr<T> getObjectPtr(const std::string& name, const std::vector<Calibration::ExpRun>& requestedRuns);
280
284 template<class T>
285 std::shared_ptr<T> getObjectPtr(std::string name)
286 {
287 if (m_runsToInputFiles.size() == 0)
289 return getObjectPtr<T>(name, m_data.getRequestedRuns());
290 }
291
292 // Helpers ---------------- Database storage -----
293
295 std::string getGranularityFromData() const;
296
298 void saveCalibration(TClonesArray* data, const std::string& name);
299
301 void saveCalibration(TClonesArray* data, const std::string& name, const IntervalOfValidity& iov);
302
304 void saveCalibration(TObject* data);
305
307 void saveCalibration(TObject* data, const IntervalOfValidity& iov);
308
310 void saveCalibration(TObject* data, const std::string& name);
311
313 void saveCalibration(TObject* data, const std::string& name, const IntervalOfValidity& iov);
314
316 void updateDBObjPtrs(const unsigned int event, const int run, const int experiment);
317
318 // -----------------------------------------------
319
321 void setDescription(const std::string& description) {m_description = description;}
322
325
327 Calibration::ExpRun getAllGranularityExpRun() const {return m_allExpRun;}
328
331
334
336 template<class T>
337 void setOutputJsonValue(const std::string& key, const T& value) {m_jsonExecutionOutput[key] = value;}
338
340 // No attempt to catch exceptions is made here.
341 template<class T>
342 const T getOutputJsonValue(const std::string& key) const
343 {
344 return m_jsonExecutionOutput.at(key);
345 }
346
348 // No attempt to catch them is made here.
349 template<class T>
350 const T getInputJsonValue(const std::string& key) const
351 {
352 return m_jsonExecutionInput.at(key);
353 }
354
356 // up on weird errors where someone snuck an array into the member variable.
357 const nlohmann::json& getInputJsonObject() const {return m_jsonExecutionInput;}
358
360 bool inputJsonKeyExists(const std::string& key) const {return m_jsonExecutionInput.count(key);}
361
362 private:
363
364 static const Calibration::ExpRun m_allExpRun;
367 std::string getExpRunString(Calibration::ExpRun& expRun) const;
368
370 std::string getFullObjectPath(const std::string& name, Calibration::ExpRun expRun) const;
371
373 std::vector<std::string> m_inputFileNames;
374
376 std::map<Calibration::ExpRun, std::vector<std::string>> m_runsToInputFiles;
377
380
383
385 std::string m_description{""};
386
388 std::string m_prefix{""};
389
391 // We initialise to "{}" rather than allowing a JSON array/value as the top level type. This forces the user
392 // to use "key":value for storing data in this object. You should test for empty(), is_null() will always
393 // return false due to the empty top level object.
394 // Functionally similar to simple member variables for configuration of the algorithm.
395 // However these input values are easier to use from Python code without needing to know the details of the algorithm.
396 // These values are intended to be used for a single execution, not reused
397 nlohmann::json m_jsonExecutionInput = nlohmann::json::object();
398
400 // As for input we initialise to an empty "{}" JSON object. Testing for empty() returns true, but is_null() does not.
401 // Nothing is done with these by default, however a calling process may decide to capture these values and use them
402 // as input to a following execution.
403 nlohmann::json m_jsonExecutionOutput = nlohmann::json::object();
404
405 }; // End of CalibrationAlgorithm definition
406
407
408 /**************************************
409 * *
410 * Implementation of larger templates *
411 * *
412 **************************************/
413 template<class T>
414 std::shared_ptr<T> CalibrationAlgorithm::getObjectPtr(const std::string& name,
415 const std::vector<Calibration::ExpRun>& requestedRuns)
416 {
417 // Check if this object already exists
418 RunRange runRangeRequested(requestedRuns);
419 std::shared_ptr<T> objOutputPtr = std::dynamic_pointer_cast<T>(m_data.getCalibObj(name, runRangeRequested));
420 if (objOutputPtr)
421 return objOutputPtr;
422
423 std::shared_ptr<T> mergedObjPtr(nullptr);
424 bool mergedEmpty = true;
425 TDirectory* dir = gDirectory;
426
427 // Technically we could grab all the objects from all files, add to list and then merge at the end.
428 // But I prefer the (maybe) more memory efficient way of merging with all objects
429 // in a file before moving on to the next one, just in case TDirectory stuff screws us.
430 TList list;
431 list.SetOwner(false);
432
433 // Construct the TDirectory names where we expect our objects to be
434 std::string runRangeObjName(getPrefix() + "/" + Calibration::RUN_RANGE_OBJ_NAME);
435
436 if (strcmp(getGranularity().c_str(), "run") == 0) {
437 // Loop over our runs requested for the right files
438 for (auto expRunRequested : requestedRuns) {
439 // Find the relevant files for this ExpRun
440 auto searchFiles = m_runsToInputFiles.find(expRunRequested);
441 if (searchFiles == m_runsToInputFiles.end()) {
442 B2WARNING("No input file found with data collected from run "
443 "(" << expRunRequested.first << "," << expRunRequested.second << ")");
444 continue;
445 } else {
446 auto files = searchFiles->second;
447 for (auto fileName : files) {
448 RunRange* runRangeData;
449 //Open TFile to get the objects
450 std::unique_ptr<TFile> f;
451 f.reset(TFile::Open(fileName.c_str(), "READ"));
452 runRangeData = dynamic_cast<RunRange*>(f->Get(runRangeObjName.c_str()));
453 // Check that nothing went wrong in the mapping and that this file definitely contains this run's data
454 auto runSet = runRangeData->getExpRunSet();
455 if (runSet.find(expRunRequested) == runSet.end()) {
456 B2WARNING("Something went wrong with the mapping of ExpRun -> Input Files. "
457 "(" << expRunRequested.first << "," << expRunRequested.second << ") not in " << fileName);
458 }
459 // Get the path/directory of the Exp,Run TDirectory that holds the object(s)
460 std::string objDirName = getFullObjectPath(name, expRunRequested);
461 TDirectory* objDir = f->GetDirectory(objDirName.c_str());
462 if (!objDir) {
463 B2ERROR("Directory for requested object " << name << " not found: " << objDirName);
464 return nullptr;
465 }
466 // Find all the objects inside, there may be more than one
467 for (auto key : * (objDir->GetListOfKeys())) {
468 std::string keyName = key->GetName();
469 B2DEBUG(100, "Adding found object " << keyName << " in the directory " << objDir->GetPath());
470 T* objOther = (T*)objDir->Get(keyName.c_str());
471 if (objOther) {
472 if (mergedEmpty) {
473 mergedObjPtr = std::shared_ptr<T>(dynamic_cast<T*>(objOther->Clone(name.c_str())));
474 mergedObjPtr->SetDirectory(0);
475 mergedEmpty = false;
476 } else {
477 list.Add(objOther);
478 }
479 }
480 }
481 if (!mergedEmpty)
482 mergedObjPtr->Merge(&list);
483 list.Clear();
484 }
485 }
486 }
487 } else {
488 for (auto fileName : m_inputFileNames) {
489 //Open TFile to get the objects
490 std::unique_ptr<TFile> f;
491 f.reset(TFile::Open(fileName.c_str(), "READ"));
492 Calibration::ExpRun allGranExpRun = getAllGranularityExpRun();
493 std::string objDirName = getFullObjectPath(name, allGranExpRun);
494 std::string objPath = objDirName + "/" + name + "_1";
495 T* objOther = (T*)f->Get(objPath.c_str()); // Only one index for granularity == all
496 B2DEBUG(100, "Adding " << objPath);
497 if (objOther) {
498 if (mergedEmpty) {
499 mergedObjPtr = std::shared_ptr<T>(dynamic_cast<T*>(objOther->Clone(name.c_str())));
500 mergedObjPtr->SetDirectory(0);
501 mergedEmpty = false;
502 } else {
503 list.Add(objOther);
504 }
505 }
506 if (!mergedEmpty)
507 mergedObjPtr->Merge(&list);
508 list.Clear();
509 }
510 }
511 dir->cd();
512 objOutputPtr = mergedObjPtr;
513 if (!objOutputPtr) {
514 B2ERROR("No data found for object " << name);
515 return nullptr;
516 }
517 objOutputPtr->SetDirectory(0);
518 // make a TNamed version to input to the map of previous calib objects
519 std::shared_ptr<TNamed> storedObjPtr = std::static_pointer_cast<TNamed>(objOutputPtr);
520 m_data.setCalibObj(name, runRangeRequested, storedObjPtr);
521 B2DEBUG(100, "Passing back merged data " << name);
522 return objOutputPtr;
523 }
524
528 template<> std::shared_ptr<TTree>
530 const std::string& name,
531 const std::vector<Calibration::ExpRun>& requestedRuns);
532
534} // namespace Belle2
A class to hold all data that is needed ONLY for the most recent single execution of the algorithm.
std::list< Database::DBImportQuery > & getPayloads()
Get constants (in TObjects) for database update from last calibration.
std::list< Database::DBImportQuery > getPayloadValues()
Get constants (in TObjects) for database update from last calibration but passed by VALUE.
const std::vector< Calibration::ExpRun > & getRequestedRuns() const
Returns the vector of ExpRuns.
void setResult(EResult result)
Setter for current iteration.
std::map< std::pair< std::string, RunRange >, std::shared_ptr< TNamed > > m_mapCalibData
Map of shared pointers to merged calibration objects created by getObjectPtr() calls.
IntervalOfValidity m_iov
Current IoV to be executed, default empty. Will be either set by user explicitly or generated from co...
void setRequestedIov(const IntervalOfValidity &iov=IntervalOfValidity(0, 0, -1, -1))
Sets the requested IoV for this execution, based on the.
int m_iteration
current iteration for execution
void setCalibObj(const std::string &name, const RunRange &runRange, const std::shared_ptr< TNamed > &objectPtr)
Insert a newly created object in m_mapCalibData. Overwrites a previous entry if one exists.
void setRequestedRuns(const std::vector< Calibration::ExpRun > &requestedRuns)
Sets the vector of ExpRuns.
int getIteration() const
Getter for current iteration.
void setIteration(int iteration)
Setter for current iteration.
std::vector< Calibration::ExpRun > m_requestedRuns
Runs for which the calibration has been last requested, either requested explicitly or generated from...
std::list< Database::DBImportQuery > m_payloads
Payloads saved by execution.
EResult getResult() const
Getter for current result.
EResult m_result
Result of execution, default undefined to indicate we haven't run yet.
void reset()
Resets this class back to what is needed at the beginning of an execution.
std::shared_ptr< TNamed > getCalibObj(const std::string &name, const RunRange &runRange) const
Get a previously created object in m_mapCalibData if one exists, otherwise return shared_ptr(nullptr)
void clearCalibrationData()
Clear calibration data.
const IntervalOfValidity & getRequestedIov() const
Getter for requested IOV.
Base class for calibration algorithms.
std::list< Database::DBImportQuery > & getPayloads()
Get constants (in TObjects) for database update from last execution.
std::string getExpRunString(Calibration::ExpRun &expRun) const
Gets the "exp.run" string repr. of (exp,run)
RunRange getRunRangeFromAllData() const
Get the complete RunRange from inspection of collected data.
IntervalOfValidity getIovFromAllData() const
Get the complete IoV from inspection of collected data.
void saveCalibration(TClonesArray *data, const std::string &name)
Store DBArray payload with given name with default IOV.
bool checkPyExpRun(PyObject *pyObj)
Checks that a PyObject can be successfully converted to an ExpRun type.
static const Calibration::ExpRun m_allExpRun
allExpRun
std::list< Database::DBImportQuery > getPayloadValues()
Get constants (in TObjects) for database update from last execution but passed by VALUE.
bool inputJsonKeyExists(const std::string &key) const
Test for a key in the input JSON object.
void updateDBObjPtrs(const unsigned int event, const int run, const int experiment)
Updates any DBObjPtrs by calling update(event) for DBStore.
std::vector< Calibration::ExpRun > m_boundaries
When using the boundaries functionality from isBoundaryRequired, this is used to store the boundaries...
PyObject * getInputFileNames()
Get the input file names used for this algorithm and pass them out as a Python list of unicode string...
EResult execute(std::vector< Calibration::ExpRun > runs={}, int iteration=0, IntervalOfValidity iov=IntervalOfValidity())
Runs calibration over vector of runs for a given iteration.
std::vector< Calibration::ExpRun > getRunListFromAllData() const
Get the complete list of runs from inspection of collected data.
void setDescription(const std::string &description)
Set algorithm description (in constructor)
Calibration::ExpRun getAllGranularityExpRun() const
Returns the Exp,Run pair that means 'Everything'. Currently unused.
virtual void boundaryFindingTearDown()
Put your algorithm back into a state ready for normal execution if you need to.
std::string m_prefix
The name of the TDirectory the collector objects are contained within.
std::string getFullObjectPath(const std::string &name, Calibration::ExpRun expRun) const
constructs the full TDirectory + Key name of an object in a TFile based on its name and exprun
std::shared_ptr< T > getObjectPtr(std::string name)
Get calibration data object (for all runs the calibration is requested for) This function will only w...
const std::vector< Calibration::ExpRun > & getRunList() const
Get the list of runs for which calibration is called.
virtual void boundaryFindingSetup(std::vector< Calibration::ExpRun >, int)
If you need to make some changes to your algorithm class before 'findPayloadBoundaries' is run,...
const std::string & getDescription() const
Get the description of the algorithm (set by developers in constructor)
std::string getPrefix() const
Get the prefix used for getting calibration data.
const T getInputJsonValue(const std::string &key) const
Get an input JSON value using a key. The normal exceptions are raised when the key doesn't exist.
std::vector< std::string > m_inputFileNames
List of input files to the Algorithm, will initially be user defined but then gets the wildcards expa...
int getIteration() const
Get current iteration.
std::string getCollectorName() const
Alias for prefix.
EResult
The result of calibration.
@ c_OK
Finished successfully =0 in Python.
@ c_Iterate
Needs iteration =1 in Python.
@ c_NotEnoughData
Needs more data =2 in Python.
@ c_Failure
Failed =3 in Python.
@ c_Undefined
Not yet known (before execution) =4 in Python.
void resetInputJson()
Clears the m_inputJson member variable.
std::string m_granularityOfData
Granularity of input data. This only changes when the input files change so it isn't specific to an e...
const T getOutputJsonValue(const std::string &key) const
Get a value using a key from the JSON output object, not sure why you would want to do this.
nlohmann::json m_jsonExecutionInput
Optional input JSON object used to make decisions about how to execute the algorithm code.
void setInputFileNames(PyObject *inputFileNames)
Set the input file names used for this algorithm from a Python list.
void setOutputJsonValue(const std::string &key, const T &value)
Set a key:value pair for the outputJson object, expected to used internally during calibrate()
const std::vector< Calibration::ExpRun > findPayloadBoundaries(std::vector< Calibration::ExpRun > runs, int iteration=0)
Used to discover the ExpRun boundaries that you want the Python CAF to execute on....
void setPrefix(const std::string &prefix)
Set the prefix used to identify datastore objects.
nlohmann::json m_jsonExecutionOutput
Optional output JSON object that can be set during the execution by the underlying algorithm code.
virtual bool isBoundaryRequired(const Calibration::ExpRun &)
Given the current collector data, make a decision about whether or not this run should be the start o...
std::map< Calibration::ExpRun, std::vector< std::string > > m_runsToInputFiles
Map of Runs to input files. Gets filled when you call getRunRangeFromAllData, gets cleared when setti...
bool loadInputJson(const std::string &jsonString)
Load the m_inputJson variable from a string (useful from Python interface). The return bool indicates...
virtual ~CalibrationAlgorithm()
Virtual destructor (base class)
const nlohmann::json & getInputJsonObject() const
Get the entire top level JSON object. We explicitly say this must be of object type so that we might ...
std::string getGranularityFromData() const
Get the granularity of collected data.
void fillRunToInputFilesMap()
Fill the mapping of ExpRun -> Files.
std::vector< std::string > getVecInputFileNames() const
Get the input file names used for this algorithm as a STL vector.
bool commit()
Submit constants from last calibration into database.
std::string m_description
Description of the algorithm.
const std::string dumpOutputJson() const
Dump the JSON string of the output JSON object.
virtual EResult calibrate()=0
Run algo on data - pure virtual: needs to be implemented.
Calibration::ExpRun convertPyExpRun(PyObject *pyObj)
Performs the conversion of PyObject to ExpRun.
void resetOutputJson()
Clears the m_outputJson member variable.
std::string getGranularity() const
Get the granularity of collected data.
CalibrationAlgorithm(const std::string &collectorModuleName)
Constructor - sets the prefix for collected objects (won't be accesses until execute(....
void clearCalibrationData()
Clear calibration data.
ExecutionData m_data
Data specific to a SINGLE execution of the algorithm. Gets reset at the beginning of execution.
A class that describes the interval of experiments/runs for which an object in the database is valid.
Mergeable object holding (unique) set of (exp,run) pairs.
Definition: RunRange.h:25
const std::set< Calibration::ExpRun > & getExpRunSet()
Get access to the stored set.
Definition: RunRange.h:64
std::shared_ptr< T > getObjectPtr(const std::string &name, const std::vector< Calibration::ExpRun > &requestedRuns)
Get calibration data object by name and list of runs, the Merge function will be called to generate t...
Abstract base class for different kinds of events.