Belle II Software  release-05-01-25
Splitter.cc
1 /**************************************************************************
2  * BASF2 (Belle Analysis Framework 2) *
3  * Copyright(C) 2020 - Belle II Collaboration *
4  * *
5  * Author: The Belle II Collaboration *
6  * Contributors: Radek Zlebcik *
7  * *
8  * This software is provided "as is" without any warranty. *
9  **************************************************************************/
10 
11 #include <vector>
12 #include <map>
13 #include <algorithm>
14 #include <iostream>
15 #include <fstream>
16 #include <utility>
17 #include <cmath>
18 #include <numeric>
19 #include <TGraphErrors.h>
20 #include <TCanvas.h>
21 #include <TStyle.h>
22 #include <TAxis.h>
23 
24 
25 //If compiled within BASF2
26 #ifdef _PACKAGE_
27 #include <tracking/calibration/Splitter.h>
28 #else
29 #include <Splitter.h>
30 #endif
31 
32 
33 using namespace std;
34 
35 namespace Belle2 {
42  //return filtered runs intervals (remove very short runs)
43  map<ExpRun, pair<double, double>> filter(const map<ExpRun, pair<double, double>>& runs, double cut,
44  map<ExpRun, pair<double, double>>& runsRemoved)
45  {
46  map<ExpRun, pair<double, double>> runsCopy;
47 
48  for (auto r : runs) {
49  auto& I = r.second;
50  double d = I.second - I.first;
51  if (d > cut)
52  runsCopy[r.first] = r.second;
53  else
54  runsRemoved[r.first] = r.second;
55  }
56 
57  return runsCopy;
58  }
59 
60 
62  void plotRuns(vector<pair<double, double>> runs)
63  {
64  TGraphErrors* gr = new TGraphErrors();
65 
66 
67  for (auto r : runs) {
68  double m = (r.first + r.second) / 2;
69  double e = (r.second - r.first) / 2;
70 
71  gr->SetPoint(gr->GetN(), m, 1);
72  gr->SetPointError(gr->GetN() - 1, e, 0);
73  }
74 
75  gStyle->SetEndErrorSize(6);
76 
77  gr->SetLineWidth(1);
78  gr->SetMarkerSize(40);
79  gr->Draw("ape");
80  gr->GetYaxis()->SetRangeUser(-10, 10);
81  gr->GetXaxis()->SetRangeUser(0, 256);
82 
83  gr->GetXaxis()->SetTitle("time [hours]");
84 
85  }
86 
88  pair<int, int> getStartEndIndexes(int nIntervals, vector<int> breaks, int indx)
89  {
90  B2ASSERT("There must be at least one interval", nIntervals >= 1);
91  B2ASSERT("Interval index must be positive", indx >= 0);
92  B2ASSERT("Interval index must be smaller than #breaks", indx < int(breaks.size()) + 1); //There is one more interval than #breaks
93  int s = (indx == 0) ? 0 : breaks[indx - 1] + 1;
94  int e = (indx == int(breaks.size())) ? nIntervals - 1 : breaks[indx];
95  return {s, e};
96  }
97 
99  void plotSRuns(vector<pair<double, double>> runs, vector<int> breaks, int offset = 2)
100  {
101  TGraphErrors* gr = new TGraphErrors();
102 
103  for (int i = 0; i < int(breaks.size()) + 1; ++i) {
104  int s, e;
105  tie(s, e) = getStartEndIndexes(runs.size(), breaks, i);
106  double a = runs[s].first;
107  double b = runs[e].second;
108 
109  double m = (a + b) / 2;
110  double err = (b - a) / 2;
111 
112  gr->SetPoint(gr->GetN(), m, offset);
113  gr->SetPointError(gr->GetN() - 1, err, 0);
114 
115  }
116 
117  gr->SetLineColor(kRed);
118  gr->SetMarkerColor(kRed);
119  gr->Draw("pe same");
120  }
121 
122 
124  void printBySize(vector<pair<double, double>> runs)
125  {
126  vector<double> dist;
127  for (auto r : runs) {
128  double d = r.second - r.first;
129  dist.push_back(d);
130  }
131 
132  sort(dist.begin(), dist.end());
133 
134  for (auto d : dist)
135  B2INFO(d);
136 
137  }
138 
140  double Splitter::lossFunction(const vector<Atom>& vec, int s, int e) const
141  {
142 
143  //raw time
144  double rawTime = vec[e].t2 - vec[s].t1;
145 
146  //max gap
147  double maxGap = 0;
148  for (int i = s; i <= e - 1; ++i) {
149  double d = vec[i + 1].t1 - vec[i].t2;
150  maxGap = max(maxGap, d);
151  }
152 
153  //net time
154  double netTime = 0;
155  for (int i = s; i <= e; ++i) {
156  netTime += vec[i].t2 - vec[i].t1;
157  }
158 
159  // Number of events
160  double nEv = 0;
161  for (int i = s; i <= e; ++i) {
162  nEv += vec[i].nEv;
163  }
164  if (nEv == 0) nEv = 0.1;
165 
166  //double loss = pow(rawTime - tBest, 2) + gapPenalty * pow(maxGap, 2);
167  //double loss = 1./nEv + timePenalty * pow(rawTime, 2);
168 
169  lossFun->SetParameters(rawTime, netTime, maxGap, nEv);
170  double lossNew = lossFun->Eval(0);
171 
172  return lossNew;
173  }
174 
175 
176 
177  // split to many small intervals (atoms)
178  vector<pair<double, double>> Splitter::splitToSmall(map<ExpRun, pair<double, double>> runs, double intSize)
179  {
180  // split into ~1m intervals
181  vector<pair<double, double>> smallRuns;
182 
183  for (auto r : runs) {
184  auto& I = r.second;
185  double runTime = I.second - I.first;
186  int nSplits = runTime / intSize; //1-m intervals
187  nSplits = max(1, nSplits); //at least 1 interval
188 
189  for (int i = 0; i < nSplits; ++i) {
190  double L = I.first + i * (runTime / nSplits);
191  double H = I.first + (i + 1) * (runTime / nSplits);
192  smallRuns.push_back({L, H});
193  }
194  }
195  return smallRuns;
196  }
197 
198 
199 
200 
201 
202 
203  // Get the optimal clustering of the atoms with indeces 0 .. e (recursive function with cache)
204  double Splitter::getMinLoss(const vector<Atom>& vec, int e, vector<int>& breaks)
205  {
206  // If entry in cache (speed up)
207  if (cache[e].first >= 0) {
208  breaks = cache[e].second;
209  return cache[e].first;
210  }
211 
212 
213  vector<int> breaksOpt;
214  double minVal = 1e30;
215  int iMin = -10;
216  for (int i = -1; i <= e - 1; ++i) {
217  auto breaksNow = breaks;
218  double r1 = 0;
219  if (i != -1)
220  r1 = getMinLoss(vec, i, breaksNow);
221  double r2 = lossFunction(vec, i + 1, e);
222  double tot = r1 + r2;
223 
224  if (tot < minVal) { //store minimum
225  minVal = tot;
226  iMin = i;
227  breaksOpt = breaksNow;
228  }
229  }
230 
231  if (iMin != -1)
232  breaksOpt.push_back(iMin);
233 
234 
235  breaks = breaksOpt;
236  cache[e] = make_pair(minVal, breaks); //store solution to cache
237  return minVal;
238  }
239 
240  //Get the indexes of the break-points
241  vector<int> Splitter::dynamicBreaks(const vector<Atom>& runs)
242  {
243  //reset cache
244  cache.resize(runs.size());
245  for (auto& c : cache)
246  c = make_pair(-1.0, vector<int>({}));
247 
248 
249  vector<int> breaks;
250  getMinLoss(runs, runs.size() - 1, breaks); //the minLoss (output) currently not used, only breaks
251 
252  return breaks;
253  }
254 
255 
256 
262  static ExpRun getRun(map<ExpRun, pair<double, double>> runs, double t)
263  {
264  ExpRun rFound(-1, -1);
265  int nFound = 0;
266  for (auto r : runs) { //Linear search over runs
267  if (r.second.first <= t && t < r.second.second) {
268  rFound = r.first;
269  ++nFound;
270  }
271  }
272 
273  B2ASSERT("Exactly one interval should be found", nFound == 1);
274  B2ASSERT("Assert that something was found", rFound != ExpRun(-1, -1));
275  return rFound;
276  }
277 
278 
279 
280  vector<map<ExpRun, pair<double, double>>> breaks2intervalsSep(const map<ExpRun, pair<double, double>>& runsMap,
281  const vector<Atom>& currVec, const vector<int>& breaks)
282  {
283  vector<map<ExpRun, pair<double, double>>> splitsNow(breaks.size() + 1);
284  for (int i = 0; i < int(breaks.size()) + 1; ++i) {
285  int s, e;
286  tie(s, e) = getStartEndIndexes(currVec.size(), breaks, i);
287 
288  // loop over atoms in single calib interval
289  for (int k = s; k <= e; ++k) {
290  ExpRun r = getRun(runsMap, (currVec[k].t1 + currVec[k].t2) / 2.); //runexp of the atom
291  if (splitsNow[i].count(r)) { //if already there
292  splitsNow[i].at(r).first = min(splitsNow[i].at(r).first, currVec[k].t1);
293  splitsNow[i].at(r).second = max(splitsNow[i].at(r).second, currVec[k].t2);
294  } else { //if new
295  splitsNow[i][r].first = currVec[k].t1;
296  splitsNow[i][r].second = currVec[k].t2;
297  }
298  }
299  }
300 
301  return splitsNow;
302  }
303 
304  //Merge two intervals together
305  map<ExpRun, pair<double, double>> Splitter::mergeIntervals(map<ExpRun, pair<double, double>> I1,
306  map<ExpRun, pair<double, double>> I2)
307  {
308  map<ExpRun, pair<double, double>> I = I1;
309  for (auto r : I2) {
310  ExpRun run = r.first;
311  if (I.count(run) == 0)
312  I[run] = r.second;
313  else {
314  I.at(run) = make_pair(min(I1.at(run).first, I2.at(run).first), max(I1.at(run).second, I2.at(run).second));
315  }
316  }
317  return I;
318  }
319 
320 
322 }
Belle2::printBySize
void printBySize(vector< pair< double, double >> runs)
print sorted lenghts of the runs
Definition: Splitter.cc:124
Belle2::filter
map< ExpRun, pair< double, double > > filter(const map< ExpRun, pair< double, double >> &runs, double cut, map< ExpRun, pair< double, double >> &runsRemoved)
filter events to remove runs shorter than cut, it stores removed runs in runsRemoved
Definition: Splitter.cc:43
Belle2::plotSRuns
void plotSRuns(vector< pair< double, double >> runs, vector< int > breaks, int offset=2)
plot clusters or runs on time axis
Definition: Splitter.cc:99
Belle2::getStartEndIndexes
std::pair< int, int > getStartEndIndexes(int nIntervals, std::vector< int > breaks, int indx)
get the range of interval with nIntervals and breaks stored in a vector
Definition: Splitter.cc:88
Belle2
Abstract base class for different kinds of events.
Definition: MillepedeAlgorithm.h:19
Belle2::getRun
static ExpRun getRun(map< ExpRun, pair< double, double >> runs, double t)
Get exp number + run number from time.
Definition: Splitter.cc:262
Belle2::ExpRun
Struct containing exp number and run number.
Definition: Splitter.h:55
Belle2::breaks2intervalsSep
vector< map< ExpRun, pair< double, double > > > breaks2intervalsSep(const map< ExpRun, pair< double, double >> &runsMap, const vector< Atom > &currVec, const vector< int > &breaks)
Get calibration intervals according to the indexes of the breaks.
Definition: Splitter.cc:280
Belle2::plotRuns
void plotRuns(vector< pair< double, double >> runs)
plot runs on time axis
Definition: Splitter.cc:62