Belle II Software  release-08-01-10
Splitter.cc
1 /**************************************************************************
2  * basf2 (Belle II Analysis Software Framework) *
3  * Author: The Belle II Collaboration *
4  * *
5  * See git log for contributors and copyright holders. *
6  * This file is licensed under LGPL-3.0, see LICENSE.md. *
7  **************************************************************************/
8 
9 
10 #include <vector>
11 #include <map>
12 #include <algorithm>
13 #include <iostream>
14 #include <utility>
15 #include <cmath>
16 #include <numeric>
17 #include <TGraphErrors.h>
18 #include <TStyle.h>
19 #include <TAxis.h>
20 
21 
22 //If compiled within BASF2
23 #ifdef _PACKAGE_
24 #include <tracking/calibration/Splitter.h>
25 #else
26 #include <Splitter.h>
27 #endif
28 
29 
30 namespace Belle2 {
37  //return filtered runs intervals (remove very short runs)
38  std::map<ExpRun, std::pair<double, double>> filter(const std::map<ExpRun, std::pair<double, double>>& runs, double cut,
39  std::map<ExpRun, std::pair<double, double>>& runsRemoved)
40  {
41  std::map<ExpRun, std::pair<double, double>> runsCopy;
42 
43  for (auto r : runs) {
44  auto& I = r.second;
45  double d = I.second - I.first;
46  if (d > cut)
47  runsCopy[r.first] = r.second;
48  else
49  runsRemoved[r.first] = r.second;
50  }
51 
52  return runsCopy;
53  }
54 
55 
57  void plotRuns(std::vector<std::pair<double, double>> runs)
58  {
59  TGraphErrors* gr = new TGraphErrors();
60 
61 
62  for (auto r : runs) {
63  double m = (r.first + r.second) / 2;
64  double e = (r.second - r.first) / 2;
65 
66  gr->SetPoint(gr->GetN(), m, 1);
67  gr->SetPointError(gr->GetN() - 1, e, 0);
68  }
69 
70  gStyle->SetEndErrorSize(6);
71 
72  gr->SetLineWidth(1);
73  gr->SetMarkerSize(40);
74  gr->Draw("ape");
75  gr->GetYaxis()->SetRangeUser(-10, 10);
76  gr->GetXaxis()->SetRangeUser(0, 256);
77 
78  gr->GetXaxis()->SetTitle("time [hours]");
79 
80  }
81 
83  std::pair<int, int> getStartEndIndexes(int nIntervals, std::vector<int> breaks, int indx)
84  {
85  B2ASSERT("There must be at least one interval", nIntervals >= 1);
86  B2ASSERT("Interval index must be positive", indx >= 0);
87  B2ASSERT("Interval index must be smaller than #breaks", indx < int(breaks.size()) + 1); //There is one more interval than #breaks
88  int s = (indx == 0) ? 0 : breaks[indx - 1] + 1;
89  int e = (indx == int(breaks.size())) ? nIntervals - 1 : breaks[indx];
90  return {s, e};
91  }
92 
94  void plotSRuns(std::vector<std::pair<double, double>> runs, std::vector<int> breaks, int offset = 2)
95  {
96  TGraphErrors* gr = new TGraphErrors();
97 
98  for (int i = 0; i < int(breaks.size()) + 1; ++i) {
99  int s, e;
100  std::tie(s, e) = getStartEndIndexes(runs.size(), breaks, i);
101  double a = runs[s].first;
102  double b = runs[e].second;
103 
104  double m = (a + b) / 2;
105  double err = (b - a) / 2;
106 
107  gr->SetPoint(gr->GetN(), m, offset);
108  gr->SetPointError(gr->GetN() - 1, err, 0);
109 
110  }
111 
112  gr->SetLineColor(kRed);
113  gr->SetMarkerColor(kRed);
114  gr->Draw("pe same");
115  }
116 
117 
119  void printBySize(std::vector<std::pair<double, double>> runs)
120  {
121  std::vector<double> dist;
122  for (auto r : runs) {
123  double d = r.second - r.first;
124  dist.push_back(d);
125  }
126 
127  sort(dist.begin(), dist.end());
128 
129  for (auto d : dist)
130  B2INFO(d);
131 
132  }
133 
135  double Splitter::lossFunction(const std::vector<Atom>& vec, int s, int e) const
136  {
137 
138  //raw time
139  double rawTime = vec[e].t2 - vec[s].t1;
140 
141  //max gap
142  double maxGap = 0;
143  for (int i = s; i <= e - 1; ++i) {
144  double d = vec[i + 1].t1 - vec[i].t2;
145  maxGap = std::max(maxGap, d);
146  }
147 
148  //net time
149  double netTime = 0;
150  for (int i = s; i <= e; ++i) {
151  netTime += vec[i].t2 - vec[i].t1;
152  }
153 
154  // Number of events
155  double nEv = 0;
156  for (int i = s; i <= e; ++i) {
157  nEv += vec[i].nEv;
158  }
159  if (nEv == 0) nEv = 0.1;
160 
161  //double loss = pow(rawTime - tBest, 2) + gapPenalty * pow(maxGap, 2);
162  //double loss = 1./nEv + timePenalty * pow(rawTime, 2);
163 
164  lossFun->SetParameters(rawTime, netTime, maxGap, nEv);
165  double lossNew = lossFun->Eval(0);
166 
167  return lossNew;
168  }
169 
170 
171 
172  // split to many small intervals (atoms)
173  std::vector<std::pair<double, double>> Splitter::splitToSmall(std::map<ExpRun, std::pair<double, double>> runs, double intSize)
174  {
175  // split into small intervals
176  std::vector<std::pair<double, double>> smallRuns;
177 
178  for (auto r : runs) {
179  auto& I = r.second;
180  if (intSize < 0) {
181  smallRuns.push_back(I);
182  continue;
183  }
184 
185  double runTime = I.second - I.first;
186  int nSplits = runTime / intSize; //1-m intervals
187  nSplits = std::max(1, nSplits); //at least 1 interval
188 
189  for (int i = 0; i < nSplits; ++i) {
190  double L = I.first + i * (runTime / nSplits);
191  double H = I.first + (i + 1) * (runTime / nSplits);
192  smallRuns.push_back({L, H});
193  }
194  }
195  return smallRuns;
196  }
197 
198 
199 
200 
201 
202 
203  // Get the optimal clustering of the atoms with indeces 0 .. e (recursive function with cache)
204  double Splitter::getMinLoss(const std::vector<Atom>& vec, int e, std::vector<int>& breaks)
205  {
206  // If entry in cache (speed up)
207  if (cache[e].first >= 0) {
208  breaks = cache[e].second;
209  return cache[e].first;
210  }
211 
212 
213  std::vector<int> breaksOpt;
214  double minVal = 1e30;
215  int iMin = -10;
216  for (int i = -1; i <= e - 1; ++i) {
217  auto breaksNow = breaks;
218  double r1 = 0;
219  if (i != -1)
220  r1 = getMinLoss(vec, i, breaksNow);
221  double r2 = lossFunction(vec, i + 1, e);
222  double tot = r1 + r2;
223 
224  if (tot < minVal) { //store minimum
225  minVal = tot;
226  iMin = i;
227  breaksOpt = breaksNow;
228  }
229  }
230 
231  if (iMin != -1)
232  breaksOpt.push_back(iMin);
233 
234 
235  breaks = breaksOpt;
236  cache[e] = std::make_pair(minVal, breaks); //store solution to cache
237  return minVal;
238  }
239 
240  //Get the indexes of the break-points
241  std::vector<int> Splitter::dynamicBreaks(const std::vector<Atom>& runs)
242  {
243  //reset cache
244  cache.resize(runs.size());
245  for (auto& c : cache)
246  c = std::make_pair(-1.0, std::vector<int>({}));
247 
248 
249  std::vector<int> breaks;
250  getMinLoss(runs, runs.size() - 1, breaks); //the minLoss (output) currently not used, only breaks
251 
252  return breaks;
253  }
254 
255 
256 
262  static ExpRun getRun(std::map<ExpRun, std::pair<double, double>> runs, double t)
263  {
264  ExpRun rFound(-1, -1);
265  int nFound = 0;
266  for (auto r : runs) { //Linear search over runs
267  if (r.second.first <= t && t < r.second.second) {
268  rFound = r.first;
269  ++nFound;
270  }
271  }
272 
273  B2ASSERT("Exactly one interval should be found", nFound == 1);
274  B2ASSERT("Assert that something was found", rFound != ExpRun(-1, -1));
275  return rFound;
276  }
277 
278 
279 
280  std::vector<std::map<ExpRun, std::pair<double, double>>> breaks2intervalsSep(const std::map<ExpRun, std::pair<double, double>>&
281  runsMap,
282  const std::vector<Atom>& currVec, const std::vector<int>& breaks)
283  {
284  std::vector<std::map<ExpRun, std::pair<double, double>>> splitsNow(breaks.size() + 1);
285  for (int i = 0; i < int(breaks.size()) + 1; ++i) {
286  int s, e;
287  std::tie(s, e) = getStartEndIndexes(currVec.size(), breaks, i);
288 
289  // loop over atoms in single calib interval
290  for (int k = s; k <= e; ++k) {
291  ExpRun r = getRun(runsMap, (currVec[k].t1 + currVec[k].t2) / 2.); //runexp of the atom
292  if (splitsNow[i].count(r)) { //if already there
293  splitsNow[i].at(r).first = std::min(splitsNow[i].at(r).first, currVec[k].t1);
294  splitsNow[i].at(r).second = std::max(splitsNow[i].at(r).second, currVec[k].t2);
295  } else { //if new
296  splitsNow[i][r].first = currVec[k].t1;
297  splitsNow[i][r].second = currVec[k].t2;
298  }
299  }
300  }
301 
302  return splitsNow;
303  }
304 
305  //Merge two intervals together
306  std::map<ExpRun, std::pair<double, double>> Splitter::mergeIntervals(std::map<ExpRun, std::pair<double, double>> I1,
307  std::map<ExpRun, std::pair<double, double>> I2)
308  {
309  std::map<ExpRun, std::pair<double, double>> I = I1;
310  for (auto r : I2) {
311  ExpRun run = r.first;
312  if (I.count(run) == 0)
313  I[run] = r.second;
314  else {
315  I.at(run) = std::make_pair(std::min(I1.at(run).first, I2.at(run).first), std::max(I1.at(run).second, I2.at(run).second));
316  }
317  }
318  return I;
319  }
320 
321 
323 }
TF1 * lossFun
loss function used for clustering of Atoms
Definition: Splitter.h:297
std::vector< std::pair< double, std::vector< int > > > cache
cache used by the clustering algorithm (has to be reset every time)
Definition: Splitter.h:301
static ExpRun getRun(std::map< ExpRun, std::pair< double, double >> runs, double t)
Get exp number + run number from time.
Definition: Splitter.cc:262
void plotSRuns(std::vector< std::pair< double, double >> runs, std::vector< int > breaks, int offset=2)
plot clusters or runs on time axis
Definition: Splitter.cc:94
void printBySize(std::vector< std::pair< double, double >> runs)
print sorted lenghts of the runs
Definition: Splitter.cc:119
double getMinLoss(const std::vector< Atom > &vec, int e, std::vector< int > &breaks)
Recursive function to evaluate minimal sum of the lossFuctions for the optimal clustering.
Definition: Splitter.cc:204
std::pair< int, int > getStartEndIndexes(int nIntervals, std::vector< int > breaks, int indx)
get the range of interval with nIntervals and breaks stored in a vector
Definition: Splitter.cc:83
std::map< ExpRun, std::pair< double, double > > filter(const std::map< ExpRun, std::pair< double, double >> &runs, double cut, std::map< ExpRun, std::pair< double, double >> &runsRemoved)
filter events to remove runs shorter than cut, it stores removed runs in runsRemoved
Definition: Splitter.cc:38
static std::vector< std::pair< double, double > > splitToSmall(std::map< ExpRun, std::pair< double, double >> runs, double intSize=1./60)
Split the runs into small calibration intervals (atoms) of a specified size.
Definition: Splitter.cc:173
std::vector< std::map< ExpRun, std::pair< double, double > > > breaks2intervalsSep(const std::map< ExpRun, std::pair< double, double >> &runsMap, const std::vector< Atom > &currVec, const std::vector< int > &breaks)
Get calibration intervals according to the indexes of the breaks.
Definition: Splitter.cc:280
std::vector< int > dynamicBreaks(const std::vector< Atom > &runs)
Get optimal break points using algorithm based on dynamic programing.
Definition: Splitter.cc:241
double lossFunction(const std::vector< Atom > &vec, int s, int e) const
lossFunction of the calibration interval consisting of several "atoms" stored in vector vec The atoms...
Definition: Splitter.cc:135
void plotRuns(std::vector< std::pair< double, double >> runs)
plot runs on time axis
Definition: Splitter.cc:57
static std::map< ExpRun, std::pair< double, double > > mergeIntervals(std::map< ExpRun, std::pair< double, double >> I1, std::map< ExpRun, std::pair< double, double >> I2)
Merge two subintervals into one subinterval.
Definition: Splitter.cc:306
Abstract base class for different kinds of events.
Struct containing exp number and run number.
Definition: Splitter.h:51