Belle II Software development
RawDataCollectedMinMax.h
1/**************************************************************************
2 * basf2 (Belle II Analysis Software Framework) *
3 * Author: The Belle II Collaboration *
4 * *
5 * See git log for contributors and copyright holders. *
6 * This file is licensed under LGPL-3.0, see LICENSE.md. *
7 **************************************************************************/
8
9#pragma once
10
11#include <tracking/trackFindingVXD/sectorMapTools/MinMaxCollector.h>
12#include <framework/logging/Logger.h>
13
14#include <vector>
15#include <utility> // std::pair
16#include <limits> // std::numeric_limits
17#include <algorithm> // std::sort
18
19
20namespace Belle2 {
25
28 protected:
30 std::pair<double, double> m_minMaxQuantiles;
31 std::vector<std::pair<double, double>> m_intermediateValues;
32 unsigned m_currentSize;
34 public:
35
37 RawDataCollectedMinMax(unsigned expectedSize,
38 std::pair<double, double> quantiles,
39 unsigned maxSizeThreshold = 100000) :
40 m_collector((quantiles.first > (1. - quantiles.second) ? quantiles.first * 2. : (1. - quantiles.second) * 2.)),
41 m_minMaxQuantiles(quantiles),
43 m_fillIntermediateThreshold(std::numeric_limits<unsigned>::max())
44 {
45 if (double(expectedSize) / (double(maxSizeThreshold) * 0.05) > double(maxSizeThreshold))
46 { B2FATAL("RawDataCollectedMinMax: expected data to big, can not execute!"); }
47
48 if (maxSizeThreshold < expectedSize) {
49 m_fillIntermediateThreshold = maxSizeThreshold / 10;
50 }
51 }
52
54 void add(double newVal)
55 {
56 m_collector.append(newVal);
58
59 // if threshold reached, collect results and fill into intermediate value-container:
60 if (m_collector.totalSize() > m_fillIntermediateThreshold) {
61 std::pair<double, double> results = m_collector.getMinMax(m_minMaxQuantiles.first, m_minMaxQuantiles.second);
62 m_intermediateValues.push_back(std::move(results));
63 m_collector.clear();
64 }
65 }
66
68 unsigned getSampleSize() const { return m_currentSize; }
69
71 std::pair<double, double> getMinMax()
72 {
73 if (m_intermediateValues.empty()) {
74 return m_collector.getMinMax(m_minMaxQuantiles.first, m_minMaxQuantiles.second);
75 }
76
77 // issue: m_collector-sample could be too small and therefore distort results for small intermediateValue-samples. Therefore neglect m_collector for that case.
78 if (m_intermediateValues.size() == 1) {
79 return { m_intermediateValues.at(0).first, m_intermediateValues.at(0).second};
80 }
81 if (m_intermediateValues.size() == 2) {
82 return {
83 0.5 * (m_intermediateValues.at(0).first + m_intermediateValues.at(1).first),
84 0.5 * (m_intermediateValues.at(0).second + m_intermediateValues.at(1).second) };
85 }
86
87 if (!m_collector.empty()) {
88 std::pair<double, double> results = m_collector.getMinMax(m_minMaxQuantiles.first, m_minMaxQuantiles.second);
89 m_intermediateValues.push_back(results);
90 }
91
92 unsigned index = std::floor(double(m_intermediateValues.size()) * 0.5);
93 double min, max;
94
95 std::sort(m_intermediateValues.begin(), m_intermediateValues.end(),
96 [](const std::pair<double, double>& a, const std::pair<double, double>& b) -> bool { return a.first < b.first; });
97 min = m_intermediateValues.at(index).first;
98
99 std::sort(m_intermediateValues.begin(), m_intermediateValues.end(),
100 [](const std::pair<double, double>& a, const std::pair<double, double>& b) -> bool { return a.second < b.second; });
101 max = m_intermediateValues.at(index).second;
102
103 return {min, max};
104 }
105 };
106
107}
108
A container for collecting data, where min- and max-quantiles near q(0) and q(1) are to be found.
unsigned m_fillIntermediateThreshold
an internal threshold taking care of collecting intermediate results during sample collection
MinMaxCollector< double > m_collector
collects raw data in an RAM-saving way.
unsigned getSampleSize() const
returns current sample size (which is not the actual size of the container).
std::pair< double, double > getMinMax()
returns current best estimates for min and max cuts.
unsigned m_currentSize
the current size of the data sample.
std::vector< std::pair< double, double > > m_intermediateValues
collects intermediate threshold if expected size is too big.
std::pair< double, double > m_minMaxQuantiles
the quantiles to be collected in the end (defined in [0;1])
RawDataCollectedMinMax(unsigned expectedSize, std::pair< double, double > quantiles, unsigned maxSizeThreshold=100000)
constructor. please use for quantiles [min, max] min ~0 & max ~1 (range 0-1)
void add(double newVal)
adds value to collector.
Abstract base class for different kinds of events.
STL namespace.