Belle II Software development
Splitter.h
1/**************************************************************************
2 * basf2 (Belle II Analysis Software Framework) *
3 * Author: The Belle II Collaboration *
4 * *
5 * See git log for contributors and copyright holders. *
6 * This file is licensed under LGPL-3.0, see LICENSE.md. *
7 **************************************************************************/
8
9#pragma once
10#include <map>
11#include <vector>
12#include <TF1.h>
13
14
15//If compiled within BASF2
16#ifdef _PACKAGE_
17#include <framework/logging/Logger.h>
18#else
19#ifndef B2FATAL
20#define B2FATAL(arg) { std::cout << arg << std::endl; std::exit(1);}
21#define B2ASSERT(str, cond) { if((cond) == false) {std::cout << __FILE__ <<", line "<<__LINE__ << std::endl << "" << #cond << " failed" << std::endl; std::exit(1);} }
22#endif
23#define B2INFO(arg) { std::cout << arg << std::endl;}
24#define B2WARNING(arg) { std::cout << "WARNING : "<< arg << std::endl;}
25#endif
26
27
28
29
30namespace Belle2 {
36 // The data are divided into many small atoms, where atom has some specific target time length (few mins)
37 // but cannot (by definition) span over two runs.
38 // The task of the Splitter class is to cluster atoms into the calibration intervals according to th lossFunction
39 // In the next step the atoms in each calibration interval are clustered into the calibration subintervals
40
42 struct Atom {
43 double t1 = 0;
44 double t2 = 0;
45 int nEv = 0;
46 };
47
48
49
51 struct ExpRun {
52 int exp;
53 int run;
55 ExpRun(int Exp, int Run) : exp(Exp), run(Run) {}
56 };
57
58
60 struct ExpRunEvt {
61 int exp;
62 int run;
63 int evt;
65 ExpRunEvt(int Exp, int Run, int Evt) : exp(Exp), run(Run), evt(Evt) {}
66 };
67
68
69
71 inline bool operator!=(ExpRun a, ExpRun b) { return (a.exp != b.exp || a.run != b.run); }
72
74 inline bool operator<(ExpRun a, ExpRun b) {return ((a.exp < b.exp) || (a.exp == b.exp && a.run < b.run));}
75
77 std::map<ExpRun, std::pair<double, double>> filter(const std::map<ExpRun, std::pair<double, double>>& runs, double cut,
78 std::map<ExpRun, std::pair<double, double>>& runsRemoved);
79
81 std::pair<int, int> getStartEndIndexes(int nIntervals, std::vector<int> breaks, int indx);
82
83
85 inline std::vector<Atom> slice(std::vector<Atom> vec, int s, int e)
86 {
87 return std::vector<Atom>(vec.begin() + s, vec.begin() + e + 1);
88 }
89
90
97 std::vector<std::map<ExpRun, std::pair<double, double>>> breaks2intervalsSep(const std::map<ExpRun,
98 std::pair<double, double>>& runsMap, const std::vector<Atom>& currVec, const std::vector<int>& breaks);
99
100
101
108 class Splitter {
109 public:
110
111 Splitter() : lossFun(nullptr) {}
112
113
114
115
120 static std::pair<double, double> getStartEnd(std::vector<std::map<ExpRun, std::pair<double, double>>> res)
121 {
122 return {res.front().begin()->second.first,
123 res.back().rbegin()->second.second};
124 }
125
126
131 static std::vector<double> getBreaks(std::vector<std::map<ExpRun, std::pair<double, double>>> res)
132 {
133 std::vector<double> breaks;
134 for (int k = 0; k < int(res.size()) - 1; ++k) {
135 double e = res.at(k).rbegin()->second.second; //end of the previous interval
136 double s = res.at(k + 1).begin()->second.first; //start of the next interval
137 breaks.push_back((e + s) / 2.); //the break time is in between
138 }
139 return breaks;
140 }
141
142
148 static std::map<ExpRun, std::pair<double, double>> mergeIntervals(std::map<ExpRun, std::pair<double, double>> I1,
149 std::map<ExpRun, std::pair<double, double>> I2);
150
151
162 template<typename Evt>
163 std::vector<std::vector<std::map<ExpRun, std::pair<double, double>>>> getIntervals(
164 const std::map<ExpRun, std::pair<double, double>>& runs, std::vector<Evt> evts,
165 TString lossFunctionOuter, TString lossFunctionInner, double atomSize = 3. / 60)
166 {
167 //sort events by time
168 std::sort(evts.begin(), evts.end(), [](const Evt & e1, const Evt & e2) { return (e1.t > e2.t); });
169
170 // Divide into small intervals
171 std::vector<std::pair<double, double>> smallRuns = splitToSmall(runs, atomSize);
172
173 std::vector<Atom> atoms = createAtoms(smallRuns, evts);
174
175
176 // Calculate breaks for the calib. intervals
177 lossFun = toTF1(lossFunctionOuter);
178 std::vector<int> breaksSize = dynamicBreaks(atoms);
179
180 std::vector<std::vector<std::map<ExpRun, std::pair<double, double>>>> splitsRun; //split intervals with runNumber info
181
182
183 // Calculate breaks for the calib. subintervals
184 lossFun = toTF1(lossFunctionInner);
185 std::vector<int> breaksVtx;
186 for (int i = 0; i < int(breaksSize.size()) + 1; ++i) { //loop over all calib. intervals
187 int s, e;
188 std::tie(s, e) = getStartEndIndexes(atoms.size(), breaksSize, i);
189
190 // Store only atoms lying in the current calib. interval
191 auto currVec = slice(atoms, s, e);
192
193 // Get optimal breaks for particular calib. interval
194 std::vector<int> breaksSmall = dynamicBreaks(currVec);
195
196 // Incorporate the runNumber information
197 auto splitSeg = breaks2intervalsSep(runs, currVec, breaksSmall);
198
199 // Put the vector of subintervals (representing division of the interval)
200 // into the vector
201 splitsRun.push_back(splitSeg);
202
203 if (s != 0) breaksVtx.push_back(s - 1);
204 for (auto b : breaksSmall)
205 breaksVtx.push_back(b + s);
206 if (e != int(breaksSize.size())) breaksVtx.push_back(e);
207 }
208
209 return splitsRun;
210 }
211
212
213 private:
214
219 std::vector<int> dynamicBreaks(const std::vector<Atom>& runs);
220
221
222
231 double getMinLoss(const std::vector<Atom>& vec, int e, std::vector<int>& breaks);
232
233
241 double lossFunction(const std::vector<Atom>& vec, int s, int e) const;
242
243
251 static std::vector<std::pair<double, double>> splitToSmall(std::map<ExpRun, std::pair<double, double>> runs,
252 double intSize = 1. / 60);
253
259 template<typename Evt>
260 inline std::vector<Atom> createAtoms(const std::vector<std::pair<double, double>>& atomsTimes, const std::vector<Evt>& evts)
261 {
262 std::vector<Atom> atoms(atomsTimes.size());
263
264 // Store start/end times to atoms
265 for (unsigned a = 0; a < atoms.size(); ++a) {
266 atoms[a].t1 = atomsTimes[a].first;
267 atoms[a].t2 = atomsTimes[a].second;
268 }
269
270
271 // count the number of events in each atom
272 for (unsigned i = 0; i < evts.size(); ++i) {
273 for (unsigned a = 0; a < atoms.size(); ++a)
274 if (atoms[a].t1 <= evts[i].t && evts[i].t < atoms[a].t2)
275 ++atoms[a].nEv;
276
277 }
278
279
280
281 return atoms;
282 }
283
284
286 TF1* toTF1(TString LossString)
287 {
288 LossString.ReplaceAll("rawTime", "[0]");
289 LossString.ReplaceAll("netTime", "[1]");
290 LossString.ReplaceAll("maxGap", "[2]");
291 LossString.ReplaceAll("nEv", "[3]");
292
293 return (new TF1("lossFun", LossString));
294 }
295
296
297 TF1* lossFun;
298
299
301 std::vector<std::pair<double, std::vector<int>>> cache;
302
303
304 };
305
306
311 template<typename Evt>
312 inline std::map<ExpRun, std::pair<double, double>> getRunInfo(const std::vector<Evt>& evts)
313 {
314 std::map<ExpRun, std::pair<double, double>> runsInfo;
315
316 for (auto& evt : evts) {
317 int Exp = evt.exp;
318 int Run = evt.run;
319 double time = evt.t;
320 if (runsInfo.count(ExpRun(Exp, Run))) {
321 double tMin, tMax;
322 std::tie(tMin, tMax) = runsInfo.at(ExpRun(Exp, Run));
323 tMin = std::min(tMin, time);
324 tMax = std::max(tMax, time);
325 runsInfo.at(ExpRun(Exp, Run)) = {tMin, tMax};
326 } else {
327 runsInfo[ExpRun(Exp, Run)] = {time, time};
328 }
329
330 }
331 return runsInfo;
332 }
333
334
340 template<typename Evt>
341 inline ExpRunEvt getPosition(const std::vector<Evt>& events, double tEdge)
342 {
343 ExpRunEvt evt(-1, -1, -1);
344 double tBreak = -1e10;
345 for (auto& e : events) {
346 if (e.t < tEdge) {
347 if (e.t > tBreak) {
348 tBreak = e.t;
349 evt = ExpRunEvt(e.exp, e.run, e.evtNo);
350 }
351 }
352 }
353 return evt;
354 }
355
356// convert splitPoints [in UTC time] to expRunEvt
362 template<typename Evt>
363 std::vector<ExpRunEvt> convertSplitPoints(const std::vector<Evt>& events, std::vector<double> splitPoints)
364 {
365
366 std::vector<ExpRunEvt> breakPos;
367 for (auto p : splitPoints) {
368 auto pos = getPosition(events, p);
369 breakPos.push_back(pos);
370 }
371 return breakPos;
372 }
373
374
376}
Class that allows to split runs into the intervals of intended properties given by the lossFunction.
Definition: Splitter.h:108
std::vector< Atom > createAtoms(const std::vector< std::pair< double, double > > &atomsTimes, const std::vector< Evt > &evts)
Get the vector with parameters of the calibration Atoms.
Definition: Splitter.h:260
TF1 * toTF1(TString LossString)
Convert lossFunction from string to TF1.
Definition: Splitter.h:286
static std::vector< double > getBreaks(std::vector< std::map< ExpRun, std::pair< double, double > > > res)
Get vector with breaks of the calib.
Definition: Splitter.h:131
TF1 * lossFun
loss function used for clustering of Atoms
Definition: Splitter.h:297
std::vector< std::pair< double, std::vector< int > > > cache
cache used by the clustering algorithm (has to be reset every time)
Definition: Splitter.h:301
static std::pair< double, double > getStartEnd(std::vector< std::map< ExpRun, std::pair< double, double > > > res)
Get the start/end time of the calibration interval (vector of the calib.
Definition: Splitter.h:120
std::vector< std::vector< std::map< ExpRun, std::pair< double, double > > > > getIntervals(const std::map< ExpRun, std::pair< double, double > > &runs, std::vector< Evt > evts, TString lossFunctionOuter, TString lossFunctionInner, double atomSize=3./60)
Function to merge/divide runs into the calibration intervals of given characteristic length.
Definition: Splitter.h:163
bool operator!=(const DecayNode &node1, const DecayNode &node2)
Not equal: See operator==.
Definition: DecayNode.cc:65
static std::map< ExpRun, std::pair< double, double > > mergeIntervals(std::map< ExpRun, std::pair< double, double > > I1, std::map< ExpRun, std::pair< double, double > > I2)
Merge two subintervals into one subinterval.
Definition: Splitter.cc:306
static std::vector< std::pair< double, double > > splitToSmall(std::map< ExpRun, std::pair< double, double > > runs, double intSize=1./60)
Split the runs into small calibration intervals (atoms) of a specified size.
Definition: Splitter.cc:173
bool operator<(ExpRun a, ExpRun b)
less than for ExpRun
Definition: Splitter.h:74
std::vector< Atom > slice(std::vector< Atom > vec, int s, int e)
Slice the vector to contain only elements with indexes s .. e (included)
Definition: Splitter.h:85
double getMinLoss(const std::vector< Atom > &vec, int e, std::vector< int > &breaks)
Recursive function to evaluate minimal sum of the lossFuctions for the optimal clustering.
Definition: Splitter.cc:204
std::vector< std::map< ExpRun, std::pair< double, double > > > breaks2intervalsSep(const std::map< ExpRun, std::pair< double, double > > &runsMap, const std::vector< Atom > &currVec, const std::vector< int > &breaks)
Get calibration intervals according to the indexes of the breaks.
Definition: Splitter.cc:280
std::pair< int, int > getStartEndIndexes(int nIntervals, std::vector< int > breaks, int indx)
get the range of interval with nIntervals and breaks stored in a vector
Definition: Splitter.cc:83
std::map< ExpRun, std::pair< double, double > > filter(const std::map< ExpRun, std::pair< double, double > > &runs, double cut, std::map< ExpRun, std::pair< double, double > > &runsRemoved)
filter events to remove runs shorter than cut, it stores removed runs in runsRemoved
Definition: Splitter.cc:38
std::vector< int > dynamicBreaks(const std::vector< Atom > &runs)
Get optimal break points using algorithm based on dynamic programming.
Definition: Splitter.cc:241
double lossFunction(const std::vector< Atom > &vec, int s, int e) const
lossFunction of the calibration interval consisting of several "atoms" stored in vector vec The atoms...
Definition: Splitter.cc:135
std::vector< ExpRunEvt > convertSplitPoints(const std::vector< Evt > &events, std::vector< double > splitPoints)
Convert splitPoints [hours] to breakPoints in ExpRunEvt.
Definition: Splitter.h:363
std::map< ExpRun, std::pair< double, double > > getRunInfo(const std::vector< Evt > &evts)
Get the map of runs, where each run contains pair with start/end time [hours].
Definition: Splitter.h:312
ExpRunEvt getPosition(const std::vector< Evt > &events, double tEdge)
Get the exp-run-evt number from the event time [hours].
Definition: Splitter.h:341
Abstract base class for different kinds of events.
Very small (few mins) calibration interval which cannot be further divided : Atom.
Definition: Splitter.h:42
double t1
Start time of the Atom.
Definition: Splitter.h:43
int nEv
Number of events in Atom.
Definition: Splitter.h:45
double t2
End time of the Atom.
Definition: Splitter.h:44
struct with expNum, runNum, evtNum
Definition: Splitter.h:60
int exp
experiment number
Definition: Splitter.h:61
int evt
event number
Definition: Splitter.h:63
ExpRunEvt(int Exp, int Run, int Evt)
simple constructor
Definition: Splitter.h:65
int run
run number
Definition: Splitter.h:62
Struct containing exp number and run number.
Definition: Splitter.h:51
int exp
experiment number
Definition: Splitter.h:52
ExpRun(int Exp, int Run)
Simple constructor.
Definition: Splitter.h:55
int run
run number
Definition: Splitter.h:53