Belle II Software development
Splitter.cc
1/**************************************************************************
2 * basf2 (Belle II Analysis Software Framework) *
3 * Author: The Belle II Collaboration *
4 * *
5 * See git log for contributors and copyright holders. *
6 * This file is licensed under LGPL-3.0, see LICENSE.md. *
7 **************************************************************************/
8
9
10#include <vector>
11#include <map>
12#include <algorithm>
13#include <iostream>
14#include <utility>
15#include <cmath>
16#include <numeric>
17#include <TGraphErrors.h>
18#include <TStyle.h>
19#include <TAxis.h>
20
21
22//If compiled within BASF2
23#ifdef _PACKAGE_
24#include <tracking/calibration/Splitter.h>
25#else
26#include <Splitter.h>
27#endif
28
29
30namespace Belle2 {
37 //return filtered runs intervals (remove very short runs)
38 std::map<ExpRun, std::pair<double, double>> filter(const std::map<ExpRun, std::pair<double, double>>& runs, double cut,
39 std::map<ExpRun, std::pair<double, double>>& runsRemoved)
40 {
41 std::map<ExpRun, std::pair<double, double>> runsCopy;
42
43 for (auto r : runs) {
44 auto& I = r.second;
45 double d = I.second - I.first;
46 if (d > cut)
47 runsCopy[r.first] = r.second;
48 else
49 runsRemoved[r.first] = r.second;
50 }
51
52 return runsCopy;
53 }
54
55
57 void plotRuns(std::vector<std::pair<double, double>> runs)
58 {
59 TGraphErrors* gr = new TGraphErrors();
60
61
62 for (auto r : runs) {
63 double m = (r.first + r.second) / 2;
64 double e = (r.second - r.first) / 2;
65
66 gr->SetPoint(gr->GetN(), m, 1);
67 gr->SetPointError(gr->GetN() - 1, e, 0);
68 }
69
70 gStyle->SetEndErrorSize(6);
71
72 gr->SetLineWidth(1);
73 gr->SetMarkerSize(40);
74 gr->Draw("ape");
75 gr->GetYaxis()->SetRangeUser(-10, 10);
76 gr->GetXaxis()->SetRangeUser(0, 256);
77
78 gr->GetXaxis()->SetTitle("time [hours]");
79
80 }
81
83 std::pair<int, int> getStartEndIndexes(int nIntervals, std::vector<int> breaks, int indx)
84 {
85 B2ASSERT("There must be at least one interval", nIntervals >= 1);
86 B2ASSERT("Interval index must be positive", indx >= 0);
87 B2ASSERT("Interval index must be smaller than #breaks", indx < int(breaks.size()) + 1); //There is one more interval than #breaks
88 int s = (indx == 0) ? 0 : breaks[indx - 1] + 1;
89 int e = (indx == int(breaks.size())) ? nIntervals - 1 : breaks[indx];
90 return {s, e};
91 }
92
94 void plotSRuns(std::vector<std::pair<double, double>> runs, std::vector<int> breaks, int offset = 2)
95 {
96 TGraphErrors* gr = new TGraphErrors();
97
98 for (int i = 0; i < int(breaks.size()) + 1; ++i) {
99 int s, e;
100 std::tie(s, e) = getStartEndIndexes(runs.size(), breaks, i);
101 double a = runs[s].first;
102 double b = runs[e].second;
103
104 double m = (a + b) / 2;
105 double err = (b - a) / 2;
106
107 gr->SetPoint(gr->GetN(), m, offset);
108 gr->SetPointError(gr->GetN() - 1, err, 0);
109
110 }
111
112 gr->SetLineColor(kRed);
113 gr->SetMarkerColor(kRed);
114 gr->Draw("pe same");
115 }
116
117
119 void printBySize(std::vector<std::pair<double, double>> runs)
120 {
121 std::vector<double> dist;
122 for (auto r : runs) {
123 double d = r.second - r.first;
124 dist.push_back(d);
125 }
126
127 sort(dist.begin(), dist.end());
128
129 for (auto d : dist)
130 B2INFO(d);
131
132 }
133
135 double Splitter::lossFunction(const std::vector<Atom>& vec, int s, int e) const
136 {
137
138 //raw time
139 double rawTime = vec[e].t2 - vec[s].t1;
140
141 //max gap
142 double maxGap = 0;
143 for (int i = s; i <= e - 1; ++i) {
144 double d = vec[i + 1].t1 - vec[i].t2;
145 maxGap = std::max(maxGap, d);
146 }
147
148 //net time
149 double netTime = 0;
150 for (int i = s; i <= e; ++i) {
151 netTime += vec[i].t2 - vec[i].t1;
152 }
153
154 // Number of events
155 double nEv = 0;
156 for (int i = s; i <= e; ++i) {
157 nEv += vec[i].nEv;
158 }
159 if (nEv == 0) nEv = 0.1;
160
161 //double loss = pow(rawTime - tBest, 2) + gapPenalty * pow(maxGap, 2);
162 //double loss = 1./nEv + timePenalty * pow(rawTime, 2);
163
164 lossFun->SetParameters(rawTime, netTime, maxGap, nEv);
165 double lossNew = lossFun->Eval(0);
166
167 return lossNew;
168 }
169
170
171
172 // split to many small intervals (atoms)
173 std::vector<std::pair<double, double>> Splitter::splitToSmall(std::map<ExpRun, std::pair<double, double>> runs, double intSize)
174 {
175 // split into small intervals
176 std::vector<std::pair<double, double>> smallRuns;
177
178 for (auto r : runs) {
179 auto& I = r.second;
180 if (intSize < 0) {
181 smallRuns.push_back(I);
182 continue;
183 }
184
185 double runTime = I.second - I.first;
186 int nSplits = runTime / intSize; //1-m intervals
187 nSplits = std::max(1, nSplits); //at least 1 interval
188
189 for (int i = 0; i < nSplits; ++i) {
190 double L = I.first + i * (runTime / nSplits);
191 double H = I.first + (i + 1) * (runTime / nSplits);
192 smallRuns.push_back({L, H});
193 }
194 }
195 return smallRuns;
196 }
197
198
199
200
201
202
203 // Get the optimal clustering of the atoms with indeces 0 .. e (recursive function with cache)
204 double Splitter::getMinLoss(const std::vector<Atom>& vec, int e, std::vector<int>& breaks)
205 {
206 // If entry in cache (speed up)
207 if (cache[e].first >= 0) {
208 breaks = cache[e].second;
209 return cache[e].first;
210 }
211
212
213 std::vector<int> breaksOpt;
214 double minVal = 1e30;
215 int iMin = -10;
216 for (int i = -1; i <= e - 1; ++i) {
217 auto breaksNow = breaks;
218 double r1 = 0;
219 if (i != -1)
220 r1 = getMinLoss(vec, i, breaksNow);
221 double r2 = lossFunction(vec, i + 1, e);
222 double tot = r1 + r2;
223
224 if (tot < minVal) { //store minimum
225 minVal = tot;
226 iMin = i;
227 breaksOpt = breaksNow;
228 }
229 }
230
231 if (iMin != -1)
232 breaksOpt.push_back(iMin);
233
234
235 breaks = breaksOpt;
236 cache[e] = std::make_pair(minVal, breaks); //store solution to cache
237 return minVal;
238 }
239
240 //Get the indexes of the break-points
241 std::vector<int> Splitter::dynamicBreaks(const std::vector<Atom>& runs)
242 {
243 //reset cache
244 cache.resize(runs.size());
245 for (auto& c : cache)
246 c = std::make_pair(-1.0, std::vector<int>({}));
247
248
249 std::vector<int> breaks;
250 getMinLoss(runs, runs.size() - 1, breaks); //the minLoss (output) currently not used, only breaks
251
252 return breaks;
253 }
254
255
256
262 static ExpRun getRun(std::map<ExpRun, std::pair<double, double>> runs, double t)
263 {
264 ExpRun rFound(-1, -1);
265 int nFound = 0;
266 for (auto r : runs) { //Linear search over runs
267 if (r.second.first <= t && t < r.second.second) {
268 rFound = r.first;
269 ++nFound;
270 }
271 }
272
273 B2ASSERT("Exactly one interval should be found", nFound == 1);
274 B2ASSERT("Assert that something was found", rFound != ExpRun(-1, -1));
275 return rFound;
276 }
277
278
279
280 std::vector<std::map<ExpRun, std::pair<double, double>>> breaks2intervalsSep(const std::map<ExpRun, std::pair<double, double>>&
281 runsMap,
282 const std::vector<Atom>& currVec, const std::vector<int>& breaks)
283 {
284 std::vector<std::map<ExpRun, std::pair<double, double>>> splitsNow(breaks.size() + 1);
285 for (int i = 0; i < int(breaks.size()) + 1; ++i) {
286 int s, e;
287 std::tie(s, e) = getStartEndIndexes(currVec.size(), breaks, i);
288
289 // loop over atoms in single calib interval
290 for (int k = s; k <= e; ++k) {
291 ExpRun r = getRun(runsMap, (currVec[k].t1 + currVec[k].t2) / 2.); //runexp of the atom
292 if (splitsNow[i].count(r)) { //if already there
293 splitsNow[i].at(r).first = std::min(splitsNow[i].at(r).first, currVec[k].t1);
294 splitsNow[i].at(r).second = std::max(splitsNow[i].at(r).second, currVec[k].t2);
295 } else { //if new
296 splitsNow[i][r].first = currVec[k].t1;
297 splitsNow[i][r].second = currVec[k].t2;
298 }
299 }
300 }
301
302 return splitsNow;
303 }
304
305 //Merge two intervals together
306 std::map<ExpRun, std::pair<double, double>> Splitter::mergeIntervals(std::map<ExpRun, std::pair<double, double>> I1,
307 std::map<ExpRun, std::pair<double, double>> I2)
308 {
309 std::map<ExpRun, std::pair<double, double>> I = I1;
310 for (auto r : I2) {
311 ExpRun run = r.first;
312 if (I.count(run) == 0)
313 I[run] = r.second;
314 else {
315 I.at(run) = std::make_pair(std::min(I1.at(run).first, I2.at(run).first), std::max(I1.at(run).second, I2.at(run).second));
316 }
317 }
318 return I;
319 }
320
321
323}
TF1 * lossFun
loss function used for clustering of Atoms
Definition: Splitter.h:297
std::vector< std::pair< double, std::vector< int > > > cache
cache used by the clustering algorithm (has to be reset every time)
Definition: Splitter.h:301
static std::map< ExpRun, std::pair< double, double > > mergeIntervals(std::map< ExpRun, std::pair< double, double > > I1, std::map< ExpRun, std::pair< double, double > > I2)
Merge two subintervals into one subinterval.
Definition: Splitter.cc:306
void plotRuns(std::vector< std::pair< double, double > > runs)
plot runs on time axis
Definition: Splitter.cc:57
static std::vector< std::pair< double, double > > splitToSmall(std::map< ExpRun, std::pair< double, double > > runs, double intSize=1./60)
Split the runs into small calibration intervals (atoms) of a specified size.
Definition: Splitter.cc:173
double getMinLoss(const std::vector< Atom > &vec, int e, std::vector< int > &breaks)
Recursive function to evaluate minimal sum of the lossFuctions for the optimal clustering.
Definition: Splitter.cc:204
static ExpRun getRun(std::map< ExpRun, std::pair< double, double > > runs, double t)
Get exp number + run number from time.
Definition: Splitter.cc:262
std::vector< std::map< ExpRun, std::pair< double, double > > > breaks2intervalsSep(const std::map< ExpRun, std::pair< double, double > > &runsMap, const std::vector< Atom > &currVec, const std::vector< int > &breaks)
Get calibration intervals according to the indexes of the breaks.
Definition: Splitter.cc:280
std::pair< int, int > getStartEndIndexes(int nIntervals, std::vector< int > breaks, int indx)
get the range of interval with nIntervals and breaks stored in a vector
Definition: Splitter.cc:83
std::map< ExpRun, std::pair< double, double > > filter(const std::map< ExpRun, std::pair< double, double > > &runs, double cut, std::map< ExpRun, std::pair< double, double > > &runsRemoved)
filter events to remove runs shorter than cut, it stores removed runs in runsRemoved
Definition: Splitter.cc:38
std::vector< int > dynamicBreaks(const std::vector< Atom > &runs)
Get optimal break points using algorithm based on dynamic programing.
Definition: Splitter.cc:241
void plotSRuns(std::vector< std::pair< double, double > > runs, std::vector< int > breaks, int offset=2)
plot clusters or runs on time axis
Definition: Splitter.cc:94
double lossFunction(const std::vector< Atom > &vec, int s, int e) const
lossFunction of the calibration interval consisting of several "atoms" stored in vector vec The atoms...
Definition: Splitter.cc:135
void printBySize(std::vector< std::pair< double, double > > runs)
print sorted lenghts of the runs
Definition: Splitter.cc:119
Abstract base class for different kinds of events.
Struct containing exp number and run number.
Definition: Splitter.h:51