Belle II Software  release-05-02-19
Splitter.h
1 /**************************************************************************
2  * BASF2 (Belle Analysis Framework 2) *
3  * Copyright(C) 2020 - Belle II Collaboration *
4  * *
5  * Author: The Belle II Collaboration *
6  * Contributors: Radek Zlebcik *
7  * *
8  * This software is provided "as is" without any warranty. *
9  **************************************************************************/
10 
11 #pragma once
12 #include <map>
13 #include <vector>
14 #include <utility>
15 #include <limits>
16 #include <TF1.h>
17 
18 
19 //If compiled within BASF2
20 #ifdef _PACKAGE_
21 #include <framework/logging/Logger.h>
22 #else
23 #ifndef B2FATAL
24 #define B2FATAL(arg) { std::cout << arg << std::endl; std::exit(1);}
25 #define B2ASSERT(str, cond) { if((cond) == false) {std::cout << __FILE__ <<", line "<<__LINE__ << std::endl << "" << #cond << " failed" << std::endl; std::exit(1);} }
26 #endif
27 #define B2INFO(arg) { std::cout << arg << std::endl;}
28 #define B2WARNING(arg) { std::cout << "WARNING : "<< arg << std::endl;}
29 #endif
30 
31 
32 
33 
34 namespace Belle2 {
40  // The data are divided into many small atoms, where atom has some specific target time length (few mins)
41  // but cannot (by definition) span over two runs.
42  // The task of the Splitter class is to cluster atoms into the calibration intervals according to th lossFunction
43  // In the next step the atoms in each calibration interval are clustered into the calibration subintervals
44 
46  struct Atom {
47  double t1 = 0;
48  double t2 = 0;
49  int nEv = 0;
50  };
51 
52 
53 
55  struct ExpRun {
56  int exp;
57  int run;
58 
59  ExpRun(int Exp, int Run) : exp(Exp), run(Run) {}
60  };
61 
62 
64  struct ExpRunEvt {
65  int exp;
66  int run;
67  int evt;
68 
69  ExpRunEvt(int Exp, int Run, int Evt) : exp(Exp), run(Run), evt(Evt) {}
70  };
71 
72 
73 
75  inline bool operator!=(ExpRun a, ExpRun b) { return (a.exp != b.exp || a.run != b.run); }
76 
78  inline bool operator<(ExpRun a, ExpRun b) {return ((a.exp < b.exp) || (a.exp == b.exp && a.run < b.run));}
79 
81  std::map<ExpRun, std::pair<double, double>> filter(const std::map<ExpRun, std::pair<double, double>>& runs, double cut,
82  std::map<ExpRun, std::pair<double, double>>& runsRemoved);
83 
85  std::pair<int, int> getStartEndIndexes(int nIntervals, std::vector<int> breaks, int indx);
86 
87 
89  inline std::vector<Atom> slice(std::vector<Atom> vec, int s, int e)
90  {
91  return std::vector<Atom>(vec.begin() + s, vec.begin() + e + 1);
92  }
93 
94 
101  std::vector<std::map<ExpRun, std::pair<double, double>>> breaks2intervalsSep(const std::map<ExpRun,
102  std::pair<double, double>>& runsMap, const std::vector<Atom>& currVec, const std::vector<int>& breaks);
103 
104 
105 
112  class Splitter {
113  public:
114 
115  Splitter() : lossFun(nullptr) {}
116 
117 
118 
119 
124  static std::pair<double, double> getStartEnd(std::vector<std::map<ExpRun, std::pair<double, double>>> res)
125  {
126  return {res.front().begin()->second.first,
127  res.back().rbegin()->second.second};
128  }
129 
130 
135  static std::vector<double> getBreaks(std::vector<std::map<ExpRun, std::pair<double, double>>> res)
136  {
137  std::vector<double> breaks;
138  for (int k = 0; k < int(res.size()) - 1; ++k) {
139  double e = res.at(k).rbegin()->second.second; //end of the previous interval
140  double s = res.at(k + 1).begin()->second.first; //start of the next interval
141  breaks.push_back((e + s) / 2.); //the break time is in between
142  }
143  return breaks;
144  }
145 
146 
152  static std::map<ExpRun, std::pair<double, double>> mergeIntervals(std::map<ExpRun, std::pair<double, double>> I1,
153  std::map<ExpRun, std::pair<double, double>> I2);
154 
155 
165  template<typename Evt>
166  std::vector<std::vector<std::map<ExpRun, std::pair<double, double>>>> getIntervals(
167  const std::map<ExpRun, std::pair<double, double>>& runs, std::vector<Evt> evts,
168  TString lossFunctionOuter, TString lossFunctionInner)
169  {
170  //sort events by time
171  std::sort(evts.begin(), evts.end(), [](const Evt & e1, const Evt & e2) { return (e1.t > e2.t); });
172 
173  // Divide into small intervals
174  std::vector<std::pair<double, double>> smallRuns = splitToSmall(runs, 0.1);
175 
176  std::vector<Atom> atoms = createAtoms(smallRuns, evts);
177 
178 
179  // Calculate breaks for the calib. intervals
180  lossFun = toTF1(lossFunctionOuter);
181  std::vector<int> breaksSize = dynamicBreaks(atoms);
182 
183  std::vector<std::vector<std::map<ExpRun, std::pair<double, double>>>> splitsRun; //split intervals with runNumber info
184 
185 
186  // Calculate breaks for the calib. subintervals
187  lossFun = toTF1(lossFunctionInner);
188  std::vector<int> breaksVtx;
189  for (int i = 0; i < int(breaksSize.size()) + 1; ++i) { //loop over all calib. intervals
190  int s, e;
191  std::tie(s, e) = getStartEndIndexes(atoms.size(), breaksSize, i);
192 
193  // Store only atoms lying in the current calib. interval
194  auto currVec = slice(atoms, s, e);
195 
196  // Get optimal breaks for particular calib. interval
197  std::vector<int> breaksSmall = dynamicBreaks(currVec);
198 
199  // Incorporate the runNumber information
200  auto splitSeg = breaks2intervalsSep(runs, currVec, breaksSmall);
201 
202  // Put the vector of subintervals (representing division of the interval)
203  // into the vector
204  splitsRun.push_back(splitSeg);
205 
206  if (s != 0) breaksVtx.push_back(s - 1);
207  for (auto b : breaksSmall)
208  breaksVtx.push_back(b + s);
209  if (e != int(breaksSize.size())) breaksVtx.push_back(e);
210  }
211 
212  return splitsRun;
213  }
214 
215 
216  private:
217 
222  std::vector<int> dynamicBreaks(const std::vector<Atom>& runs);
223 
224 
225 
234  double getMinLoss(const std::vector<Atom>& vec, int e, std::vector<int>& breaks);
235 
236 
244  double lossFunction(const std::vector<Atom>& vec, int s, int e) const;
245 
246 
254  static std::vector<std::pair<double, double>> splitToSmall(std::map<ExpRun, std::pair<double, double>> runs,
255  double intSize = 1. / 60);
256 
262  template<typename Evt>
263  inline std::vector<Atom> createAtoms(const std::vector<std::pair<double, double>>& atomsTimes, const std::vector<Evt>& evts)
264  {
265  std::vector<Atom> atoms(atomsTimes.size());
266 
267  // Store start/end times to atoms
268  for (unsigned a = 0; a < atoms.size(); ++a) {
269  atoms[a].t1 = atomsTimes[a].first;
270  atoms[a].t2 = atomsTimes[a].second;
271  }
272 
273 
274  // count the number of events in each atom
275  for (unsigned i = 0; i < evts.size(); ++i) {
276  for (unsigned a = 0; a < atoms.size(); ++a)
277  if (atoms[a].t1 <= evts[i].t && evts[i].t < atoms[a].t2)
278  ++atoms[a].nEv;
279 
280  }
281 
282 
283 
284  return atoms;
285  }
286 
287 
289  TF1* toTF1(TString LossString)
290  {
291  LossString.ReplaceAll("rawTime", "[0]");
292  LossString.ReplaceAll("netTime", "[1]");
293  LossString.ReplaceAll("maxGap", "[2]");
294  LossString.ReplaceAll("nEv", "[3]");
295 
296  return (new TF1("lossFun", LossString));
297  }
298 
299 
300  TF1* lossFun;
301 
302 
304  std::vector<std::pair<double, std::vector<int>>> cache;
305 
306 
307  };
308 
309 
314  template<typename Evt>
315  inline std::map<ExpRun, std::pair<double, double>> getRunInfo(const std::vector<Evt>& evts)
316  {
317  std::map<ExpRun, std::pair<double, double>> runsInfo;
318 
319  for (auto& evt : evts) {
320  int Exp = evt.exp;
321  int Run = evt.run;
322  double time = evt.t;
323  if (runsInfo.count(ExpRun(Exp, Run))) {
324  double tMin, tMax;
325  std::tie(tMin, tMax) = runsInfo.at(ExpRun(Exp, Run));
326  tMin = std::min(tMin, time);
327  tMax = std::max(tMax, time);
328  runsInfo.at(ExpRun(Exp, Run)) = {tMin, tMax};
329  } else {
330  runsInfo[ExpRun(Exp, Run)] = {time, time};
331  }
332 
333  }
334  return runsInfo;
335  }
336 
337 
343  template<typename Evt>
344  inline ExpRunEvt getPosition(const std::vector<Evt>& events, double tEdge)
345  {
346  ExpRunEvt evt(-1, -1, -1);
347  double tBreak = -1e10;
348  for (auto& e : events) {
349  if (e.t < tEdge) {
350  if (e.t > tBreak) {
351  tBreak = e.t;
352  evt = ExpRunEvt(e.exp, e.run, e.evtNo);
353  }
354  }
355  }
356  return evt;
357  }
358 
359 // convert splitPoints [in UTC time] to expRunEvt
365  template<typename Evt>
366  std::vector<ExpRunEvt> convertSplitPoints(const std::vector<Evt>& events, std::vector<double> splitPoints)
367  {
368 
369  std::vector<ExpRunEvt> breakPos;
370  for (auto p : splitPoints) {
371  auto pos = getPosition(events, p);
372  breakPos.push_back(pos);
373  }
374  return breakPos;
375  }
376 
377 
379 }
Belle2::getRunInfo
std::map< ExpRun, std::pair< double, double > > getRunInfo(const std::vector< Evt > &evts)
Get the map of runs, where each run contains pair with start/end time [hours].
Definition: Splitter.h:315
Belle2::Splitter
Class that allows to split runs into the intervals of intended properties given by the lossFunction.
Definition: Splitter.h:112
Belle2::Splitter::getStartEnd
static std::pair< double, double > getStartEnd(std::vector< std::map< ExpRun, std::pair< double, double >>> res)
Get the start/end time of the calibration interval (vector of the calib.
Definition: Splitter.h:124
Belle2::Splitter::getIntervals
std::vector< std::vector< std::map< ExpRun, std::pair< double, double > > > > getIntervals(const std::map< ExpRun, std::pair< double, double >> &runs, std::vector< Evt > evts, TString lossFunctionOuter, TString lossFunctionInner)
Function to merge/divide runs into the calibration intervals of given characteristic length.
Definition: Splitter.h:166
Belle2::Splitter::getBreaks
static std::vector< double > getBreaks(std::vector< std::map< ExpRun, std::pair< double, double >>> res)
Get vector with breaks of the calib.
Definition: Splitter.h:135
Belle2::Atom::t2
double t2
End time of the Atom.
Definition: Splitter.h:48
Belle2::ExpRunEvt::ExpRunEvt
ExpRunEvt(int Exp, int Run, int Evt)
simple constructor
Definition: Splitter.h:69
Belle2::operator!=
bool operator!=(const DecayNode &node1, const DecayNode &node2)
Not equal: See operator==.
Definition: DecayNode.cc:67
Belle2::convertSplitPoints
std::vector< ExpRunEvt > convertSplitPoints(const std::vector< Evt > &events, std::vector< double > splitPoints)
Convert splitPoints [hours] to breakPoints in ExpRunEvt.
Definition: Splitter.h:366
Belle2::Splitter::toTF1
TF1 * toTF1(TString LossString)
Convert lossFunction from string to TF1.
Definition: Splitter.h:289
Belle2::Splitter::lossFun
TF1 * lossFun
loss function used for clustering of Atoms
Definition: Splitter.h:300
Belle2::breaks2intervalsSep
std::vector< std::map< ExpRun, std::pair< double, double > > > breaks2intervalsSep(const std::map< ExpRun, std::pair< double, double >> &runsMap, const std::vector< Atom > &currVec, const std::vector< int > &breaks)
Get calibration intervals according to the indexes of the breaks.
Definition: Splitter.cc:280
Belle2::Splitter::createAtoms
std::vector< Atom > createAtoms(const std::vector< std::pair< double, double >> &atomsTimes, const std::vector< Evt > &evts)
Get the vector with parameters of the calibration Atoms.
Definition: Splitter.h:263
Belle2::ExpRunEvt::evt
int evt
event number
Definition: Splitter.h:67
Belle2::filter
std::map< ExpRun, std::pair< double, double > > filter(const std::map< ExpRun, std::pair< double, double >> &runs, double cut, std::map< ExpRun, std::pair< double, double >> &runsRemoved)
filter events to remove runs shorter than cut, it stores removed runs in runsRemoved
Definition: Splitter.cc:43
Belle2::Atom
Very small (few mins) calibration interval which cannot be further divided : Atom.
Definition: Splitter.h:46
Belle2::ExpRun::exp
int exp
experiment number
Definition: Splitter.h:56
Belle2::ExpRunEvt::run
int run
run number
Definition: Splitter.h:66
Belle2::operator<
bool operator<(ExpRun a, ExpRun b)
less than for ExpRun
Definition: Splitter.h:78
Belle2::Splitter::dynamicBreaks
std::vector< int > dynamicBreaks(const std::vector< Atom > &runs)
Get optimal break points using algorithm based on dynamic programing.
Definition: Splitter.cc:241
Belle2::Atom::nEv
int nEv
Number of events in Atom.
Definition: Splitter.h:49
Belle2::Atom::t1
double t1
Start time of the Atom.
Definition: Splitter.h:47
Belle2::getStartEndIndexes
std::pair< int, int > getStartEndIndexes(int nIntervals, std::vector< int > breaks, int indx)
get the range of interval with nIntervals and breaks stored in a vector
Definition: Splitter.cc:88
Belle2::ExpRun::run
int run
run number
Definition: Splitter.h:57
Belle2
Abstract base class for different kinds of events.
Definition: MillepedeAlgorithm.h:19
Belle2::Splitter::mergeIntervals
static std::map< ExpRun, std::pair< double, double > > mergeIntervals(std::map< ExpRun, std::pair< double, double >> I1, std::map< ExpRun, std::pair< double, double >> I2)
Merge two subintervals into one subinterval.
Definition: Splitter.cc:305
Belle2::Splitter::lossFunction
double lossFunction(const std::vector< Atom > &vec, int s, int e) const
lossFunction of the calibration interval consisting of several "atoms" stored in vector vec The atoms...
Definition: Splitter.cc:140
Belle2::ExpRun
Struct containing exp number and run number.
Definition: Splitter.h:55
Belle2::ExpRunEvt
struct with expNum, runNum, evtNum
Definition: Splitter.h:64
Belle2::Splitter::cache
std::vector< std::pair< double, std::vector< int > > > cache
cache used by the clustering algorithm (has to be reset every time)
Definition: Splitter.h:304
Belle2::slice
std::vector< Atom > slice(std::vector< Atom > vec, int s, int e)
Slice the vector to contain only elements with indexes s .. e (included)
Definition: Splitter.h:89
Belle2::Splitter::getMinLoss
double getMinLoss(const std::vector< Atom > &vec, int e, std::vector< int > &breaks)
Recursive function to evaluate minimal sum of the lossFuctions for the optimal clustering.
Definition: Splitter.cc:204
Belle2::ExpRun::ExpRun
ExpRun(int Exp, int Run)
Simple constructor.
Definition: Splitter.h:59
Belle2::Splitter::splitToSmall
static std::vector< std::pair< double, double > > splitToSmall(std::map< ExpRun, std::pair< double, double >> runs, double intSize=1./60)
Split the runs into small calibration intervals (atoms) of a specified size.
Definition: Splitter.cc:178
Belle2::getPosition
ExpRunEvt getPosition(const std::vector< Evt > &events, double tEdge)
Get the exp-run-evt number from the event time [hours].
Definition: Splitter.h:344
Belle2::ExpRunEvt::exp
int exp
experiment number
Definition: Splitter.h:65