Belle II Software  release-08-01-10
Splitter.h
1 /**************************************************************************
2  * basf2 (Belle II Analysis Software Framework) *
3  * Author: The Belle II Collaboration *
4  * *
5  * See git log for contributors and copyright holders. *
6  * This file is licensed under LGPL-3.0, see LICENSE.md. *
7  **************************************************************************/
8 
9 #pragma once
10 #include <map>
11 #include <vector>
12 #include <TF1.h>
13 
14 
15 //If compiled within BASF2
16 #ifdef _PACKAGE_
17 #include <framework/logging/Logger.h>
18 #else
19 #ifndef B2FATAL
20 #define B2FATAL(arg) { std::cout << arg << std::endl; std::exit(1);}
21 #define B2ASSERT(str, cond) { if((cond) == false) {std::cout << __FILE__ <<", line "<<__LINE__ << std::endl << "" << #cond << " failed" << std::endl; std::exit(1);} }
22 #endif
23 #define B2INFO(arg) { std::cout << arg << std::endl;}
24 #define B2WARNING(arg) { std::cout << "WARNING : "<< arg << std::endl;}
25 #endif
26 
27 
28 
29 
30 namespace Belle2 {
36  // The data are divided into many small atoms, where atom has some specific target time length (few mins)
37  // but cannot (by definition) span over two runs.
38  // The task of the Splitter class is to cluster atoms into the calibration intervals according to th lossFunction
39  // In the next step the atoms in each calibration interval are clustered into the calibration subintervals
40 
42  struct Atom {
43  double t1 = 0;
44  double t2 = 0;
45  int nEv = 0;
46  };
47 
48 
49 
51  struct ExpRun {
52  int exp;
53  int run;
55  ExpRun(int Exp, int Run) : exp(Exp), run(Run) {}
56  };
57 
58 
60  struct ExpRunEvt {
61  int exp;
62  int run;
63  int evt;
65  ExpRunEvt(int Exp, int Run, int Evt) : exp(Exp), run(Run), evt(Evt) {}
66  };
67 
68 
69 
71  inline bool operator!=(ExpRun a, ExpRun b) { return (a.exp != b.exp || a.run != b.run); }
72 
74  inline bool operator<(ExpRun a, ExpRun b) {return ((a.exp < b.exp) || (a.exp == b.exp && a.run < b.run));}
75 
77  std::map<ExpRun, std::pair<double, double>> filter(const std::map<ExpRun, std::pair<double, double>>& runs, double cut,
78  std::map<ExpRun, std::pair<double, double>>& runsRemoved);
79 
81  std::pair<int, int> getStartEndIndexes(int nIntervals, std::vector<int> breaks, int indx);
82 
83 
85  inline std::vector<Atom> slice(std::vector<Atom> vec, int s, int e)
86  {
87  return std::vector<Atom>(vec.begin() + s, vec.begin() + e + 1);
88  }
89 
90 
97  std::vector<std::map<ExpRun, std::pair<double, double>>> breaks2intervalsSep(const std::map<ExpRun,
98  std::pair<double, double>>& runsMap, const std::vector<Atom>& currVec, const std::vector<int>& breaks);
99 
100 
101 
108  class Splitter {
109  public:
110 
111  Splitter() : lossFun(nullptr) {}
112 
113 
114 
115 
120  static std::pair<double, double> getStartEnd(std::vector<std::map<ExpRun, std::pair<double, double>>> res)
121  {
122  return {res.front().begin()->second.first,
123  res.back().rbegin()->second.second};
124  }
125 
126 
131  static std::vector<double> getBreaks(std::vector<std::map<ExpRun, std::pair<double, double>>> res)
132  {
133  std::vector<double> breaks;
134  for (int k = 0; k < int(res.size()) - 1; ++k) {
135  double e = res.at(k).rbegin()->second.second; //end of the previous interval
136  double s = res.at(k + 1).begin()->second.first; //start of the next interval
137  breaks.push_back((e + s) / 2.); //the break time is in between
138  }
139  return breaks;
140  }
141 
142 
148  static std::map<ExpRun, std::pair<double, double>> mergeIntervals(std::map<ExpRun, std::pair<double, double>> I1,
149  std::map<ExpRun, std::pair<double, double>> I2);
150 
151 
162  template<typename Evt>
163  std::vector<std::vector<std::map<ExpRun, std::pair<double, double>>>> getIntervals(
164  const std::map<ExpRun, std::pair<double, double>>& runs, std::vector<Evt> evts,
165  TString lossFunctionOuter, TString lossFunctionInner, double atomSize = 3. / 60)
166  {
167  //sort events by time
168  std::sort(evts.begin(), evts.end(), [](const Evt & e1, const Evt & e2) { return (e1.t > e2.t); });
169 
170  // Divide into small intervals
171  std::vector<std::pair<double, double>> smallRuns = splitToSmall(runs, atomSize);
172 
173  std::vector<Atom> atoms = createAtoms(smallRuns, evts);
174 
175 
176  // Calculate breaks for the calib. intervals
177  lossFun = toTF1(lossFunctionOuter);
178  std::vector<int> breaksSize = dynamicBreaks(atoms);
179 
180  std::vector<std::vector<std::map<ExpRun, std::pair<double, double>>>> splitsRun; //split intervals with runNumber info
181 
182 
183  // Calculate breaks for the calib. subintervals
184  lossFun = toTF1(lossFunctionInner);
185  std::vector<int> breaksVtx;
186  for (int i = 0; i < int(breaksSize.size()) + 1; ++i) { //loop over all calib. intervals
187  int s, e;
188  std::tie(s, e) = getStartEndIndexes(atoms.size(), breaksSize, i);
189 
190  // Store only atoms lying in the current calib. interval
191  auto currVec = slice(atoms, s, e);
192 
193  // Get optimal breaks for particular calib. interval
194  std::vector<int> breaksSmall = dynamicBreaks(currVec);
195 
196  // Incorporate the runNumber information
197  auto splitSeg = breaks2intervalsSep(runs, currVec, breaksSmall);
198 
199  // Put the vector of subintervals (representing division of the interval)
200  // into the vector
201  splitsRun.push_back(splitSeg);
202 
203  if (s != 0) breaksVtx.push_back(s - 1);
204  for (auto b : breaksSmall)
205  breaksVtx.push_back(b + s);
206  if (e != int(breaksSize.size())) breaksVtx.push_back(e);
207  }
208 
209  return splitsRun;
210  }
211 
212 
213  private:
214 
219  std::vector<int> dynamicBreaks(const std::vector<Atom>& runs);
220 
221 
222 
231  double getMinLoss(const std::vector<Atom>& vec, int e, std::vector<int>& breaks);
232 
233 
241  double lossFunction(const std::vector<Atom>& vec, int s, int e) const;
242 
243 
251  static std::vector<std::pair<double, double>> splitToSmall(std::map<ExpRun, std::pair<double, double>> runs,
252  double intSize = 1. / 60);
253 
259  template<typename Evt>
260  inline std::vector<Atom> createAtoms(const std::vector<std::pair<double, double>>& atomsTimes, const std::vector<Evt>& evts)
261  {
262  std::vector<Atom> atoms(atomsTimes.size());
263 
264  // Store start/end times to atoms
265  for (unsigned a = 0; a < atoms.size(); ++a) {
266  atoms[a].t1 = atomsTimes[a].first;
267  atoms[a].t2 = atomsTimes[a].second;
268  }
269 
270 
271  // count the number of events in each atom
272  for (unsigned i = 0; i < evts.size(); ++i) {
273  for (unsigned a = 0; a < atoms.size(); ++a)
274  if (atoms[a].t1 <= evts[i].t && evts[i].t < atoms[a].t2)
275  ++atoms[a].nEv;
276 
277  }
278 
279 
280 
281  return atoms;
282  }
283 
284 
286  TF1* toTF1(TString LossString)
287  {
288  LossString.ReplaceAll("rawTime", "[0]");
289  LossString.ReplaceAll("netTime", "[1]");
290  LossString.ReplaceAll("maxGap", "[2]");
291  LossString.ReplaceAll("nEv", "[3]");
292 
293  return (new TF1("lossFun", LossString));
294  }
295 
296 
297  TF1* lossFun;
298 
299 
301  std::vector<std::pair<double, std::vector<int>>> cache;
302 
303 
304  };
305 
306 
311  template<typename Evt>
312  inline std::map<ExpRun, std::pair<double, double>> getRunInfo(const std::vector<Evt>& evts)
313  {
314  std::map<ExpRun, std::pair<double, double>> runsInfo;
315 
316  for (auto& evt : evts) {
317  int Exp = evt.exp;
318  int Run = evt.run;
319  double time = evt.t;
320  if (runsInfo.count(ExpRun(Exp, Run))) {
321  double tMin, tMax;
322  std::tie(tMin, tMax) = runsInfo.at(ExpRun(Exp, Run));
323  tMin = std::min(tMin, time);
324  tMax = std::max(tMax, time);
325  runsInfo.at(ExpRun(Exp, Run)) = {tMin, tMax};
326  } else {
327  runsInfo[ExpRun(Exp, Run)] = {time, time};
328  }
329 
330  }
331  return runsInfo;
332  }
333 
334 
340  template<typename Evt>
341  inline ExpRunEvt getPosition(const std::vector<Evt>& events, double tEdge)
342  {
343  ExpRunEvt evt(-1, -1, -1);
344  double tBreak = -1e10;
345  for (auto& e : events) {
346  if (e.t < tEdge) {
347  if (e.t > tBreak) {
348  tBreak = e.t;
349  evt = ExpRunEvt(e.exp, e.run, e.evtNo);
350  }
351  }
352  }
353  return evt;
354  }
355 
356 // convert splitPoints [in UTC time] to expRunEvt
362  template<typename Evt>
363  std::vector<ExpRunEvt> convertSplitPoints(const std::vector<Evt>& events, std::vector<double> splitPoints)
364  {
365 
366  std::vector<ExpRunEvt> breakPos;
367  for (auto p : splitPoints) {
368  auto pos = getPosition(events, p);
369  breakPos.push_back(pos);
370  }
371  return breakPos;
372  }
373 
374 
376 }
Class that allows to split runs into the intervals of intended properties given by the lossFunction.
Definition: Splitter.h:108
TF1 * toTF1(TString LossString)
Convert lossFunction from string to TF1.
Definition: Splitter.h:286
std::vector< std::vector< std::map< ExpRun, std::pair< double, double > > > > getIntervals(const std::map< ExpRun, std::pair< double, double >> &runs, std::vector< Evt > evts, TString lossFunctionOuter, TString lossFunctionInner, double atomSize=3./60)
Function to merge/divide runs into the calibration intervals of given characteristic length.
Definition: Splitter.h:163
std::vector< Atom > createAtoms(const std::vector< std::pair< double, double >> &atomsTimes, const std::vector< Evt > &evts)
Get the vector with parameters of the calibration Atoms.
Definition: Splitter.h:260
TF1 * lossFun
loss function used for clustering of Atoms
Definition: Splitter.h:297
std::vector< std::pair< double, std::vector< int > > > cache
cache used by the clustering algorithm (has to be reset every time)
Definition: Splitter.h:301
static std::vector< double > getBreaks(std::vector< std::map< ExpRun, std::pair< double, double >>> res)
Get vector with breaks of the calib.
Definition: Splitter.h:131
static std::pair< double, double > getStartEnd(std::vector< std::map< ExpRun, std::pair< double, double >>> res)
Get the start/end time of the calibration interval (vector of the calib.
Definition: Splitter.h:120
bool operator!=(const DecayNode &node1, const DecayNode &node2)
Not equal: See operator==.
Definition: DecayNode.cc:65
ExpRunEvt getPosition(const std::vector< Evt > &events, double tEdge)
Get the exp-run-evt number from the event time [hours].
Definition: Splitter.h:341
std::vector< Atom > slice(std::vector< Atom > vec, int s, int e)
Slice the vector to contain only elements with indexes s .. e (included)
Definition: Splitter.h:85
double getMinLoss(const std::vector< Atom > &vec, int e, std::vector< int > &breaks)
Recursive function to evaluate minimal sum of the lossFuctions for the optimal clustering.
Definition: Splitter.cc:204
std::pair< int, int > getStartEndIndexes(int nIntervals, std::vector< int > breaks, int indx)
get the range of interval with nIntervals and breaks stored in a vector
Definition: Splitter.cc:83
std::map< ExpRun, std::pair< double, double > > filter(const std::map< ExpRun, std::pair< double, double >> &runs, double cut, std::map< ExpRun, std::pair< double, double >> &runsRemoved)
filter events to remove runs shorter than cut, it stores removed runs in runsRemoved
Definition: Splitter.cc:38
static std::vector< std::pair< double, double > > splitToSmall(std::map< ExpRun, std::pair< double, double >> runs, double intSize=1./60)
Split the runs into small calibration intervals (atoms) of a specified size.
Definition: Splitter.cc:173
std::vector< std::map< ExpRun, std::pair< double, double > > > breaks2intervalsSep(const std::map< ExpRun, std::pair< double, double >> &runsMap, const std::vector< Atom > &currVec, const std::vector< int > &breaks)
Get calibration intervals according to the indexes of the breaks.
Definition: Splitter.cc:280
std::vector< int > dynamicBreaks(const std::vector< Atom > &runs)
Get optimal break points using algorithm based on dynamic programing.
Definition: Splitter.cc:241
std::map< ExpRun, std::pair< double, double > > getRunInfo(const std::vector< Evt > &evts)
Get the map of runs, where each run contains pair with start/end time [hours].
Definition: Splitter.h:312
std::vector< ExpRunEvt > convertSplitPoints(const std::vector< Evt > &events, std::vector< double > splitPoints)
Convert splitPoints [hours] to breakPoints in ExpRunEvt.
Definition: Splitter.h:363
double lossFunction(const std::vector< Atom > &vec, int s, int e) const
lossFunction of the calibration interval consisting of several "atoms" stored in vector vec The atoms...
Definition: Splitter.cc:135
bool operator<(ExpRun a, ExpRun b)
less than for ExpRun
Definition: Splitter.h:74
static std::map< ExpRun, std::pair< double, double > > mergeIntervals(std::map< ExpRun, std::pair< double, double >> I1, std::map< ExpRun, std::pair< double, double >> I2)
Merge two subintervals into one subinterval.
Definition: Splitter.cc:306
Abstract base class for different kinds of events.
Very small (few mins) calibration interval which cannot be further divided : Atom.
Definition: Splitter.h:42
double t1
Start time of the Atom.
Definition: Splitter.h:43
int nEv
Number of events in Atom.
Definition: Splitter.h:45
double t2
End time of the Atom.
Definition: Splitter.h:44
struct with expNum, runNum, evtNum
Definition: Splitter.h:60
int exp
experiment number
Definition: Splitter.h:61
int evt
event number
Definition: Splitter.h:63
ExpRunEvt(int Exp, int Run, int Evt)
simple constructor
Definition: Splitter.h:65
int run
run number
Definition: Splitter.h:62
Struct containing exp number and run number.
Definition: Splitter.h:51
int exp
experiment number
Definition: Splitter.h:52
ExpRun(int Exp, int Run)
Simple constructor.
Definition: Splitter.h:55
int run
run number
Definition: Splitter.h:53