Belle II Software  release-08-01-10
RawDataCollectedMinMax.h
1 /**************************************************************************
2  * basf2 (Belle II Analysis Software Framework) *
3  * Author: The Belle II Collaboration *
4  * *
5  * See git log for contributors and copyright holders. *
6  * This file is licensed under LGPL-3.0, see LICENSE.md. *
7  **************************************************************************/
8 
9 #pragma once
10 
11 #include <tracking/trackFindingVXD/sectorMapTools/MinMaxCollector.h>
12 #include <framework/logging/Logger.h>
13 
14 #include <vector>
15 #include <utility> // std::pair
16 #include <limits> // std::numeric_limits
17 #include <algorithm> // std::sort
18 
19 
20 namespace Belle2 {
28  protected:
29  unsigned m_currentSize;
31  std::pair<double, double> m_minMaxQuantiles;
32  std::vector<std::pair<double, double>> m_intermediateValues;
34  public:
35 
37  RawDataCollectedMinMax(unsigned expectedSize,
38  std::pair<double, double> quantiles,
39  unsigned maxSizeThreshold = 100000) :
40  m_currentSize(0),
41  m_fillIntermediateThreshold(std::numeric_limits<unsigned>::max()),
42  m_minMaxQuantiles(quantiles),
43  m_collector((quantiles.first > (1. - quantiles.second) ? quantiles.first * 2. : (1. - quantiles.second) * 2.))
44  {
45  if (double(expectedSize) / (double(maxSizeThreshold) * 0.05) > double(maxSizeThreshold))
46  { B2FATAL("RawDataCollectedMinMax: expected data to big, can not execute!"); }
47 
48  if (maxSizeThreshold < expectedSize) {
49  m_fillIntermediateThreshold = maxSizeThreshold / 10;
50  }
51  }
52 
54  void add(double newVal)
55  {
56  m_collector.append(newVal);
57  m_currentSize++;
58 
59  // if threshold reached, collect results and fill into intermediate value-container:
61  std::pair<double, double> results = m_collector.getMinMax(m_minMaxQuantiles.first, m_minMaxQuantiles.second);
62  m_intermediateValues.push_back(std::move(results));
64  }
65  }
66 
68  unsigned getSampleSize() const { return m_currentSize; }
69 
71  std::pair<double, double> getMinMax()
72  {
73  if (m_intermediateValues.empty()) {
75  }
76 
77  // issue: m_collector-sample could be too small and therefore distort results for small intermediateValue-samples. Therefore neglect m_collector for that case.
78  if (m_intermediateValues.size() == 1) {
79  return { m_intermediateValues.at(0).first, m_intermediateValues.at(0).second};
80  }
81  if (m_intermediateValues.size() == 2) {
82  return {
83  0.5 * (m_intermediateValues.at(0).first + m_intermediateValues.at(1).first),
84  0.5 * (m_intermediateValues.at(0).second + m_intermediateValues.at(1).second) };
85  }
86 
87  if (!m_collector.empty()) {
88  std::pair<double, double> results = m_collector.getMinMax(m_minMaxQuantiles.first, m_minMaxQuantiles.second);
89  m_intermediateValues.push_back(results);
90  }
91 
92  unsigned index = std::floor(double(m_intermediateValues.size()) * 0.5);
93  double min, max;
94 
95  std::sort(m_intermediateValues.begin(), m_intermediateValues.end(),
96  [](const std::pair<double, double>& a, const std::pair<double, double>& b) -> bool { return a.first < b.first; });
97  min = m_intermediateValues.at(index).first;
98 
99  std::sort(m_intermediateValues.begin(), m_intermediateValues.end(),
100  [](const std::pair<double, double>& a, const std::pair<double, double>& b) -> bool { return a.second < b.second; });
101  max = m_intermediateValues.at(index).second;
102 
103  return {min, max};
104  }
105  };
107 }
108 
unsigned totalSize() const
returns the combined size of the containers storing the values
void append(DataType newVal)
append new value
bool empty() const
returns if internal containers are empty
std::pair< DataType, DataType > getMinMax(DataType minQuantile=0., DataType maxQuantile=1.) const
for given pair of quantiles, the according cuts (min, max) will be returned.
void clear()
deletes all values collected so far and resets to constructor-settings.
takes care of collecting raw data and staying below RAM-threshold.
unsigned m_fillIntermediateThreshold
an internal threshold taking care of collecting intermediate results during sample collection
MinMaxCollector< double > m_collector
collects raw data in an RAM-saving way.
unsigned getSampleSize() const
returns current sample size (which is not the actual size of the container).
unsigned m_currentSize
the current size of the data sample.
std::vector< std::pair< double, double > > m_intermediateValues
collects intermediate threshold if expected size is too big.
std::pair< double, double > m_minMaxQuantiles
the quantiles to be collected in the end (defined in [0;1])
RawDataCollectedMinMax(unsigned expectedSize, std::pair< double, double > quantiles, unsigned maxSizeThreshold=100000)
constructor. please use for quantiles [min, max] min ~0 & max ~1 (range 0-1)
std::pair< double, double > getMinMax()
returns current best estimates for min and max cuts.
void add(double newVal)
adds value to collector.
Abstract base class for different kinds of events.