Belle II Software  release-08-01-10
CalibrationAlgorithm.h
1 /**************************************************************************
2  * basf2 (Belle II Analysis Software Framework) *
3  * Author: The Belle II Collaboration *
4  * *
5  * See git log for contributors and copyright holders. *
6  * This file is licensed under LGPL-3.0, see LICENSE.md. *
7  **************************************************************************/
8 
9 #pragma once
10 #include <Python.h>
11 #include <memory>
12 #include <string>
13 #include <vector>
14 #include <map>
15 #include <any>
16 #include <utility>
17 #include <list>
18 #include <nlohmann/json.hpp>
19 #include <TClonesArray.h>
20 #include <TDirectory.h>
21 #include <TFile.h>
22 #include <TTree.h>
23 #include <framework/database/Database.h>
24 #include <framework/database/IntervalOfValidity.h>
25 #include <framework/logging/Logger.h>
26 #include <calibration/Utilities.h>
27 #include <calibration/dataobjects/RunRange.h>
28 
29 namespace Belle2 {
38  public:
40  enum EResult {
41  c_OK,
46  };
47 
53  class ExecutionData {
54  public:
55  ExecutionData() {};
56  ~ExecutionData() {};
58  void reset()
59  {
60  B2DEBUG(100, "Resetting ExecutionData of algorithm");
61  m_requestedRuns.clear();
62  m_iteration = -1;
64  m_payloads.clear();
67  }
70  {
71  m_mapCalibData.clear();
72  }
74  const std::vector<Calibration::ExpRun>& getRequestedRuns() const {return m_requestedRuns;}
76  void setRequestedRuns(const std::vector<Calibration::ExpRun>& requestedRuns) {m_requestedRuns = requestedRuns;}
78  int getIteration() const {return m_iteration;}
80  void setIteration(int iteration)
81  {
82  B2DEBUG(29, "Setting Iteration of Algorithm to " << iteration);
83  m_iteration = iteration;
84  }
86  EResult getResult() const {return m_result;}
88  void setResult(EResult result) {m_result = result;}
90  void setRequestedIov(const IntervalOfValidity& iov = IntervalOfValidity(0, 0, -1, -1)) {m_iov = iov;}
92  const IntervalOfValidity& getRequestedIov() const {return m_iov;}
94  std::list<Database::DBImportQuery>& getPayloads() {return m_payloads;}
96  std::list<Database::DBImportQuery> getPayloadValues() {return m_payloads;}
98  std::shared_ptr<TNamed> getCalibObj(const std::string& name, const RunRange& runRange) const
99  {
100  auto it = m_mapCalibData.find(std::make_pair(name, runRange));
101  if (it == m_mapCalibData.end()) {
102  return nullptr;
103  }
104  return it->second;
105  }
107  void setCalibObj(const std::string& name, const RunRange& runRange, const std::shared_ptr<TNamed>& objectPtr)
108  {
109  m_mapCalibData[std::make_pair(name, runRange)] = objectPtr;
110  }
111 
112  private:
114  std::vector<Calibration::ExpRun> m_requestedRuns{};
116  int m_iteration{ -1};
122  std::list<Database::DBImportQuery> m_payloads{};
131  std::map<std::pair<std::string, RunRange>, std::shared_ptr<TNamed>> m_mapCalibData;
132  };
133 
140  explicit CalibrationAlgorithm(const std::string& collectorModuleName) : m_prefix(collectorModuleName) {}
141 
144 
146  std::string getPrefix() const {return m_prefix;}
147 
149  bool checkPyExpRun(PyObject* pyObj);
150 
152  Calibration::ExpRun convertPyExpRun(PyObject* pyObj);
153 
164  std::string getCollectorName() const {return getPrefix();}
165 
167  void setPrefix(const std::string& prefix) {m_prefix = prefix;}
168 
170  void setInputFileNames(PyObject* inputFileNames);
171 
173  PyObject* getInputFileNames();
174 
176  std::vector<Calibration::ExpRun> getRunListFromAllData() const;
177 
180 
183 
185  void fillRunToInputFilesMap();
186 
188  std::string getGranularity() const {return m_granularityOfData;};
189 
198  EResult execute(std::vector<Calibration::ExpRun> runs = {}, int iteration = 0, IntervalOfValidity iov = IntervalOfValidity());
199 
201  EResult execute(PyObject* runs, int iteration = 0, IntervalOfValidity iov = IntervalOfValidity());
202 
204  std::list<Database::DBImportQuery>& getPayloads() {return m_data.getPayloads();}
205 
207  std::list<Database::DBImportQuery> getPayloadValues() {return m_data.getPayloadValues();}
208 
210  bool commit();
211 
213  bool commit(std::list<Database::DBImportQuery> payloads);
214 
216  const std::string& getDescription() const {return m_description;}
217 
219  // of JSON object creation. Failure probably means that your JSON string was badly formatted.
220  bool loadInputJson(const std::string& jsonString);
221 
223  const std::string dumpOutputJson() const {return m_jsonExecutionOutput.dump();}
224 
226  // areas of the CAF. Basically you search for features in the data that you want to find and make sure that the CAF knows
227  // there is a boundary where payloads should probably start/end. The output boundaries should be the starting ExpRun
228  // of the new boundary.
229  const std::vector<Calibration::ExpRun> findPayloadBoundaries(std::vector<Calibration::ExpRun> runs, int iteration = 0);
230 
231  protected:
232  // Developers implement this function ------------
233 
235  virtual EResult calibrate() = 0;
236 
238  // Implementing this is optional because most people will never call findPayloadBoundaries in their CAF job.
239  // It returns false by default so that the boundaries vector is empty if you forgot to implement this.
240  //
241  // We omit the names of arguments here so that we don't generate lots of compiler warnings in algorithms that don't
242  // implement this function.
243  virtual bool isBoundaryRequired(const Calibration::ExpRun& /*currentRun*/)
244  {
245  B2ERROR("You didn't implement a isBoundaryRequired() member function in your CalibrationAlgorithm but you are calling it!");
246  return false;
247  }
248 
250  // We omit the names of arguments here so that we don't generate lots of compiler warnings in algorithms that don't
251  // implement this function.
252  virtual void boundaryFindingSetup(std::vector<Calibration::ExpRun> /*runs*/, int /*iteration = 0*/) {};
253 
255  // This runs right after 'findPayloadBoundaries' and is supposed to correct any changes you made in 'boundaryFindingSetup'
256  // or 'isBoundaryRequired'.
257  virtual void boundaryFindingTearDown() {};
258 
260  // calling findPayloadBoundaries, before boundaryFindingSetup is called.
261  std::vector<Calibration::ExpRun> m_boundaries;
262 
263  // Helpers ---------------- Data retrieval -------
264 
266  const std::vector<Calibration::ExpRun>& getRunList() const {return m_data.getRequestedRuns();}
267 
269  int getIteration() const { return m_data.getIteration(); }
270 
272  void setInputFileNames(std::vector<std::string> inputFileNames);
273 
275  std::vector<std::string> getVecInputFileNames() const {return m_inputFileNames;}
276 
278  template<class T>
279  std::shared_ptr<T> getObjectPtr(const std::string& name, const std::vector<Calibration::ExpRun>& requestedRuns);
280 
284  template<class T>
285  std::shared_ptr<T> getObjectPtr(std::string name)
286  {
287  if (m_runsToInputFiles.size() == 0)
289  return getObjectPtr<T>(name, m_data.getRequestedRuns());
290  }
291 
292  // Helpers ---------------- Database storage -----
293 
295  std::string getGranularityFromData() const;
296 
298  void saveCalibration(TClonesArray* data, const std::string& name);
299 
301  void saveCalibration(TClonesArray* data, const std::string& name, const IntervalOfValidity& iov);
302 
304  void saveCalibration(TObject* data);
305 
307  void saveCalibration(TObject* data, const IntervalOfValidity& iov);
308 
310  void saveCalibration(TObject* data, const std::string& name);
311 
313  void saveCalibration(TObject* data, const std::string& name, const IntervalOfValidity& iov);
314 
316  void updateDBObjPtrs(const unsigned int event, const int run, const int experiment);
317 
318  // -----------------------------------------------
319 
321  void setDescription(const std::string& description) {m_description = description;}
322 
325 
327  Calibration::ExpRun getAllGranularityExpRun() const {return m_allExpRun;}
328 
331 
334 
336  template<class T>
337  void setOutputJsonValue(const std::string& key, const T& value) {m_jsonExecutionOutput[key] = value;}
338 
340  // No attempt to catch exceptions is made here.
341  template<class T>
342  const T getOutputJsonValue(const std::string& key) const
343  {
344  return m_jsonExecutionOutput.at(key);
345  }
346 
348  // No attempt to catch them is made here.
349  template<class T>
350  const T getInputJsonValue(const std::string& key) const
351  {
352  return m_jsonExecutionInput.at(key);
353  }
354 
356  // up on weird errors where someone snuck an array into the member variable.
357  const nlohmann::json& getInputJsonObject() const {return m_jsonExecutionInput;}
358 
360  bool inputJsonKeyExists(const std::string& key) const {return m_jsonExecutionInput.count(key);}
361 
362  private:
363 
364  static const Calibration::ExpRun m_allExpRun;
367  std::string getExpRunString(Calibration::ExpRun& expRun) const;
368 
370  std::string getFullObjectPath(const std::string& name, Calibration::ExpRun expRun) const;
371 
373  std::vector<std::string> m_inputFileNames;
374 
376  std::map<Calibration::ExpRun, std::vector<std::string>> m_runsToInputFiles;
377 
379  std::string m_granularityOfData;
380 
383 
385  std::string m_description{""};
386 
388  std::string m_prefix{""};
389 
391  // We initialise to "{}" rather than allowing a JSON array/value as the top level type. This forces the user
392  // to use "key":value for storing data in this object. You should test for empty(), is_null() will always
393  // return false due to the empty top level object.
394  // Functionally similar to simple member variables for configuration of the algorithm.
395  // However these input values are easier to use from Python code without needing to know the details of the algorithm.
396  // These values are intended to be used for a single execution, not reused
397  nlohmann::json m_jsonExecutionInput = nlohmann::json::object();
398 
400  // As for input we initialise to an empty "{}" JSON object. Testing for empty() returns true, but is_null() does not.
401  // Nothing is done with these by default, however a calling process may decide to capture these values and use them
402  // as input to a following execution.
403  nlohmann::json m_jsonExecutionOutput = nlohmann::json::object();
404 
405  }; // End of CalibrationAlgorithm definition
406 
407 
408  /**************************************
409  * *
410  * Implementation of larger templates *
411  * *
412  **************************************/
413  template<class T>
414  std::shared_ptr<T> CalibrationAlgorithm::getObjectPtr(const std::string& name,
415  const std::vector<Calibration::ExpRun>& requestedRuns)
416  {
417  // Check if this object already exists
418  RunRange runRangeRequested(requestedRuns);
419  std::shared_ptr<T> objOutputPtr = std::dynamic_pointer_cast<T>(m_data.getCalibObj(name, runRangeRequested));
420  if (objOutputPtr)
421  return objOutputPtr;
422 
423  std::shared_ptr<T> mergedObjPtr(nullptr);
424  bool mergedEmpty = true;
425  TDirectory* dir = gDirectory;
426 
427  // Technically we could grab all the objects from all files, add to list and then merge at the end.
428  // But I prefer the (maybe) more memory efficient way of merging with all objects
429  // in a file before moving on to the next one, just in case TDirectory stuff screws us.
430  TList list;
431  list.SetOwner(false);
432 
433  // Construct the TDirectory names where we expect our objects to be
434  std::string runRangeObjName(getPrefix() + "/" + Calibration::RUN_RANGE_OBJ_NAME);
435 
436  if (strcmp(getGranularity().c_str(), "run") == 0) {
437  // Loop over our runs requested for the right files
438  for (auto expRunRequested : requestedRuns) {
439  // Find the relevant files for this ExpRun
440  auto searchFiles = m_runsToInputFiles.find(expRunRequested);
441  if (searchFiles == m_runsToInputFiles.end()) {
442  B2WARNING("No input file found with data collected from run "
443  "(" << expRunRequested.first << "," << expRunRequested.second << ")");
444  continue;
445  } else {
446  auto files = searchFiles->second;
447  for (auto fileName : files) {
448  RunRange* runRangeData;
449  //Open TFile to get the objects
450  std::unique_ptr<TFile> f;
451  f.reset(TFile::Open(fileName.c_str(), "READ"));
452  runRangeData = dynamic_cast<RunRange*>(f->Get(runRangeObjName.c_str()));
453  // Check that nothing went wrong in the mapping and that this file definitely contains this run's data
454  auto runSet = runRangeData->getExpRunSet();
455  if (runSet.find(expRunRequested) == runSet.end()) {
456  B2WARNING("Something went wrong with the mapping of ExpRun -> Input Files. "
457  "(" << expRunRequested.first << "," << expRunRequested.second << ") not in " << fileName);
458  }
459  // Get the path/directory of the Exp,Run TDirectory that holds the object(s)
460  std::string objDirName = getFullObjectPath(name, expRunRequested);
461  TDirectory* objDir = f->GetDirectory(objDirName.c_str());
462  if (!objDir) {
463  B2ERROR("Directory for requested object " << name << " not found: " << objDirName);
464  return nullptr;
465  }
466  // Find all the objects inside, there may be more than one
467  for (auto key : * (objDir->GetListOfKeys())) {
468  std::string keyName = key->GetName();
469  B2DEBUG(100, "Adding found object " << keyName << " in the directory " << objDir->GetPath());
470  T* objOther = (T*)objDir->Get(keyName.c_str());
471  if (objOther) {
472  if (mergedEmpty) {
473  mergedObjPtr = std::shared_ptr<T>(dynamic_cast<T*>(objOther->Clone(name.c_str())));
474  mergedObjPtr->SetDirectory(0);
475  mergedEmpty = false;
476  } else {
477  list.Add(objOther);
478  }
479  }
480  }
481  if (!mergedEmpty)
482  mergedObjPtr->Merge(&list);
483  list.Clear();
484  }
485  }
486  }
487  } else {
488  for (auto fileName : m_inputFileNames) {
489  //Open TFile to get the objects
490  std::unique_ptr<TFile> f;
491  f.reset(TFile::Open(fileName.c_str(), "READ"));
492  Calibration::ExpRun allGranExpRun = getAllGranularityExpRun();
493  std::string objDirName = getFullObjectPath(name, allGranExpRun);
494  std::string objPath = objDirName + "/" + name + "_1";
495  T* objOther = (T*)f->Get(objPath.c_str()); // Only one index for granularity == all
496  B2DEBUG(100, "Adding " << objPath);
497  if (objOther) {
498  if (mergedEmpty) {
499  mergedObjPtr = std::shared_ptr<T>(dynamic_cast<T*>(objOther->Clone(name.c_str())));
500  mergedObjPtr->SetDirectory(0);
501  mergedEmpty = false;
502  } else {
503  list.Add(objOther);
504  }
505  }
506  if (!mergedEmpty)
507  mergedObjPtr->Merge(&list);
508  list.Clear();
509  }
510  }
511  dir->cd();
512  objOutputPtr = mergedObjPtr;
513  if (!objOutputPtr) {
514  B2ERROR("No data found for object " << name);
515  return nullptr;
516  }
517  objOutputPtr->SetDirectory(0);
518  // make a TNamed version to input to the map of previous calib objects
519  std::shared_ptr<TNamed> storedObjPtr = std::static_pointer_cast<TNamed>(objOutputPtr);
520  m_data.setCalibObj(name, runRangeRequested, storedObjPtr);
521  B2DEBUG(100, "Passing back merged data " << name);
522  return objOutputPtr;
523  }
524 
528  template<> std::shared_ptr<TTree>
530  const std::string& name,
531  const std::vector<Calibration::ExpRun>& requestedRuns);
532 
534 } // namespace Belle2
A class to hold all data that is needed ONLY for the most recent single execution of the algorithm.
void setResult(EResult result)
Setter for current iteration.
std::map< std::pair< std::string, RunRange >, std::shared_ptr< TNamed > > m_mapCalibData
Map of shared pointers to merged calibration objects created by getObjectPtr() calls.
IntervalOfValidity m_iov
Current IoV to be executed, default empty. Will be either set by user explicitly or generated from co...
void setRequestedIov(const IntervalOfValidity &iov=IntervalOfValidity(0, 0, -1, -1))
Sets the requested IoV for this execution, based on the.
int m_iteration
current iteration for execution
void setCalibObj(const std::string &name, const RunRange &runRange, const std::shared_ptr< TNamed > &objectPtr)
Insert a newly created object in m_mapCalibData. Overwrites a previous entry if one exists.
const IntervalOfValidity & getRequestedIov() const
Getter for requested IOV.
void setRequestedRuns(const std::vector< Calibration::ExpRun > &requestedRuns)
Sets the vector of ExpRuns.
const std::vector< Calibration::ExpRun > & getRequestedRuns() const
Returns the vector of ExpRuns.
int getIteration() const
Getter for current iteration.
void setIteration(int iteration)
Setter for current iteration.
std::shared_ptr< TNamed > getCalibObj(const std::string &name, const RunRange &runRange) const
Get a previously created object in m_mapCalibData if one exists, otherwise return shared_ptr(nullptr)
std::vector< Calibration::ExpRun > m_requestedRuns
Runs for which the calibration has been last requested, either requested explicitly or generated from...
std::list< Database::DBImportQuery > m_payloads
Payloads saved by execution.
std::list< Database::DBImportQuery > getPayloadValues()
Get constants (in TObjects) for database update from last calibration but passed by VALUE.
EResult getResult() const
Getter for current result.
EResult m_result
Result of execution, default undefined to indicate we haven't run yet.
void reset()
Resets this class back to what is needed at the beginning of an execution.
std::list< Database::DBImportQuery > & getPayloads()
Get constants (in TObjects) for database update from last calibration.
void clearCalibrationData()
Clear calibration data.
Base class for calibration algorithms.
std::string getExpRunString(Calibration::ExpRun &expRun) const
Gets the "exp.run" string repr. of (exp,run)
RunRange getRunRangeFromAllData() const
Get the complete RunRange from inspection of collected data.
IntervalOfValidity getIovFromAllData() const
Get the complete IoV from inspection of collected data.
const std::string & getDescription() const
Get the description of the algoithm (set by developers in constructor)
void saveCalibration(TClonesArray *data, const std::string &name)
Store DBArray payload with given name with default IOV.
bool checkPyExpRun(PyObject *pyObj)
Checks that a PyObject can be successfully converted to an ExpRun type.
static const Calibration::ExpRun m_allExpRun
allExpRun
bool inputJsonKeyExists(const std::string &key) const
Test for a key in the input JSON object.
void updateDBObjPtrs(const unsigned int event, const int run, const int experiment)
Updates any DBObjPtrs by calling update(event) for DBStore.
std::vector< Calibration::ExpRun > m_boundaries
When using the boundaries functionality from isBoundaryRequired, this is used to store the boundaries...
PyObject * getInputFileNames()
Get the input file names used for this algorithm and pass them out as a Python list of unicode string...
EResult execute(std::vector< Calibration::ExpRun > runs={}, int iteration=0, IntervalOfValidity iov=IntervalOfValidity())
Runs calibration over vector of runs for a given iteration.
std::vector< Calibration::ExpRun > getRunListFromAllData() const
Get the complete list of runs from inspection of collected data.
void setDescription(const std::string &description)
Set algorithm description (in constructor)
std::vector< std::string > getVecInputFileNames() const
Get the input file names used for this algorithm as a STL vector.
Calibration::ExpRun getAllGranularityExpRun() const
Returns the Exp,Run pair that means 'Everything'. Currently unused.
virtual void boundaryFindingTearDown()
Put your algorithm back into a state ready for normal execution if you need to.
std::string m_prefix
The name of the TDirectory the collector objects are contained within.
std::string getFullObjectPath(const std::string &name, Calibration::ExpRun expRun) const
constructs the full TDirectory + Key name of an object in a TFile based on its name and exprun
std::shared_ptr< T > getObjectPtr(std::string name)
Get calibration data object (for all runs the calibration is requested for) This function will only w...
virtual void boundaryFindingSetup(std::vector< Calibration::ExpRun >, int)
If you need to make some changes to your algorithm class before 'findPayloadBoundaries' is run,...
std::string getPrefix() const
Get the prefix used for getting calibration data.
const T getInputJsonValue(const std::string &key) const
Get an input JSON value using a key. The normal exceptions are raised when the key doesn't exist.
std::vector< std::string > m_inputFileNames
List of input files to the Algorithm, will initially be user defined but then gets the wildcards expa...
int getIteration() const
Get current iteration.
std::string getCollectorName() const
Alias for prefix.
EResult
The result of calibration.
@ c_OK
Finished successfuly =0 in Python.
@ c_Iterate
Needs iteration =1 in Python.
@ c_NotEnoughData
Needs more data =2 in Python.
@ c_Failure
Failed =3 in Python.
@ c_Undefined
Not yet known (before execution) =4 in Python.
void resetInputJson()
Clears the m_inputJson member variable.
std::string m_granularityOfData
Granularity of input data. This only changes when the input files change so it isn't specific to an e...
const T getOutputJsonValue(const std::string &key) const
Get a value using a key from the JSON output object, not sure why you would want to do this.
nlohmann::json m_jsonExecutionInput
Optional input JSON object used to make decisions about how to execute the algorithm code.
void setInputFileNames(PyObject *inputFileNames)
Set the input file names used for this algorithm from a Python list.
void setOutputJsonValue(const std::string &key, const T &value)
Set a key:value pair for the outputJson object, expected to used interally during calibrate()
const std::vector< Calibration::ExpRun > findPayloadBoundaries(std::vector< Calibration::ExpRun > runs, int iteration=0)
Used to discover the ExpRun boundaries that you want the Python CAF to execute on....
void setPrefix(const std::string &prefix)
Set the prefix used to identify datastore objects.
const nlohmann::json & getInputJsonObject() const
Get the entire top level JSON object. We explicitly say this must be of object type so that we might ...
const std::vector< Calibration::ExpRun > & getRunList() const
Get the list of runs for which calibration is called.
nlohmann::json m_jsonExecutionOutput
Optional output JSON object that can be set during the execution by the underlying algorithm code.
virtual bool isBoundaryRequired(const Calibration::ExpRun &)
Given the current collector data, make a decision about whether or not this run should be the start o...
std::map< Calibration::ExpRun, std::vector< std::string > > m_runsToInputFiles
Map of Runs to input files. Gets filled when you call getRunRangeFromAllData, gets cleared when setti...
bool loadInputJson(const std::string &jsonString)
Load the m_inputJson variable from a string (useful from Python interface). The rturn bool indicates ...
virtual ~CalibrationAlgorithm()
Virtual destructor (base class)
std::list< Database::DBImportQuery > getPayloadValues()
Get constants (in TObjects) for database update from last execution but passed by VALUE.
std::string getGranularityFromData() const
Get the granularity of collected data.
void fillRunToInputFilesMap()
Fill the mapping of ExpRun -> Files.
bool commit()
Submit constants from last calibration into database.
std::string m_description
Description of the algorithm.
const std::string dumpOutputJson() const
Dump the JSON string of the output JSON object.
virtual EResult calibrate()=0
Run algo on data - pure virtual: needs to be implemented.
Calibration::ExpRun convertPyExpRun(PyObject *pyObj)
Performs the conversion of PyObject to ExpRun.
void resetOutputJson()
Clears the m_outputJson member variable.
std::list< Database::DBImportQuery > & getPayloads()
Get constants (in TObjects) for database update from last execution.
std::string getGranularity() const
Get the granularity of collected data.
CalibrationAlgorithm(const std::string &collectorModuleName)
Constructor - sets the prefix for collected objects (won't be accesses until execute(....
void clearCalibrationData()
Clear calibration data.
ExecutionData m_data
Data specific to a SINGLE execution of the algorithm. Gets reset at the beginning of execution.
A class that describes the interval of experiments/runs for which an object in the database is valid.
Mergeable object holding (unique) set of (exp,run) pairs.
Definition: RunRange.h:25
const std::set< Calibration::ExpRun > & getExpRunSet()
Get access to the stored set.
Definition: RunRange.h:64
std::shared_ptr< T > getObjectPtr(const std::string &name, const std::vector< Calibration::ExpRun > &requestedRuns)
Get calibration data object by name and list of runs, the Merge function will be called to generate t...
Abstract base class for different kinds of events.