Belle II Software development
RawDataCollectedMinMax.h
1/**************************************************************************
2 * basf2 (Belle II Analysis Software Framework) *
3 * Author: The Belle II Collaboration *
4 * *
5 * See git log for contributors and copyright holders. *
6 * This file is licensed under LGPL-3.0, see LICENSE.md. *
7 **************************************************************************/
8
9#pragma once
10
11#include <tracking/trackFindingVXD/sectorMapTools/MinMaxCollector.h>
12#include <framework/logging/Logger.h>
13
14#include <vector>
15#include <utility> // std::pair
16#include <limits> // std::numeric_limits
17#include <algorithm> // std::sort
18
19
20namespace Belle2 {
28 protected:
29 unsigned m_currentSize;
31 std::pair<double, double> m_minMaxQuantiles;
32 std::vector<std::pair<double, double>> m_intermediateValues;
34 public:
35
37 RawDataCollectedMinMax(unsigned expectedSize,
38 std::pair<double, double> quantiles,
39 unsigned maxSizeThreshold = 100000) :
41 m_fillIntermediateThreshold(std::numeric_limits<unsigned>::max()),
42 m_minMaxQuantiles(quantiles),
43 m_collector((quantiles.first > (1. - quantiles.second) ? quantiles.first * 2. : (1. - quantiles.second) * 2.))
44 {
45 if (double(expectedSize) / (double(maxSizeThreshold) * 0.05) > double(maxSizeThreshold))
46 { B2FATAL("RawDataCollectedMinMax: expected data to big, can not execute!"); }
47
48 if (maxSizeThreshold < expectedSize) {
49 m_fillIntermediateThreshold = maxSizeThreshold / 10;
50 }
51 }
52
54 void add(double newVal)
55 {
56 m_collector.append(newVal);
58
59 // if threshold reached, collect results and fill into intermediate value-container:
61 std::pair<double, double> results = m_collector.getMinMax(m_minMaxQuantiles.first, m_minMaxQuantiles.second);
62 m_intermediateValues.push_back(std::move(results));
64 }
65 }
66
68 unsigned getSampleSize() const { return m_currentSize; }
69
71 std::pair<double, double> getMinMax()
72 {
73 if (m_intermediateValues.empty()) {
75 }
76
77 // issue: m_collector-sample could be too small and therefore distort results for small intermediateValue-samples. Therefore neglect m_collector for that case.
78 if (m_intermediateValues.size() == 1) {
79 return { m_intermediateValues.at(0).first, m_intermediateValues.at(0).second};
80 }
81 if (m_intermediateValues.size() == 2) {
82 return {
83 0.5 * (m_intermediateValues.at(0).first + m_intermediateValues.at(1).first),
84 0.5 * (m_intermediateValues.at(0).second + m_intermediateValues.at(1).second) };
85 }
86
87 if (!m_collector.empty()) {
88 std::pair<double, double> results = m_collector.getMinMax(m_minMaxQuantiles.first, m_minMaxQuantiles.second);
89 m_intermediateValues.push_back(results);
90 }
91
92 unsigned index = std::floor(double(m_intermediateValues.size()) * 0.5);
93 double min, max;
94
95 std::sort(m_intermediateValues.begin(), m_intermediateValues.end(),
96 [](const std::pair<double, double>& a, const std::pair<double, double>& b) -> bool { return a.first < b.first; });
97 min = m_intermediateValues.at(index).first;
98
99 std::sort(m_intermediateValues.begin(), m_intermediateValues.end(),
100 [](const std::pair<double, double>& a, const std::pair<double, double>& b) -> bool { return a.second < b.second; });
101 max = m_intermediateValues.at(index).second;
102
103 return {min, max};
104 }
105 };
107}
108
A container for collecting data, where min- and max-quantiles near q(0) and q(1) are to be found.
std::pair< DataType, DataType > getMinMax(DataType minQuantile=0., DataType maxQuantile=1.) const
for given pair of quantiles, the according cuts (min, max) will be returned.
unsigned totalSize() const
returns the combined size of the containers storing the values
void append(DataType newVal)
append new value
bool empty() const
returns if internal containers are empty
void clear()
deletes all values collected so far and resets to constructor-settings.
takes care of collecting raw data and staying below RAM-threshold.
unsigned m_fillIntermediateThreshold
an internal threshold taking care of collecting intermediate results during sample collection
MinMaxCollector< double > m_collector
collects raw data in an RAM-saving way.
unsigned getSampleSize() const
returns current sample size (which is not the actual size of the container).
std::pair< double, double > getMinMax()
returns current best estimates for min and max cuts.
unsigned m_currentSize
the current size of the data sample.
std::vector< std::pair< double, double > > m_intermediateValues
collects intermediate threshold if expected size is too big.
std::pair< double, double > m_minMaxQuantiles
the quantiles to be collected in the end (defined in [0;1])
RawDataCollectedMinMax(unsigned expectedSize, std::pair< double, double > quantiles, unsigned maxSizeThreshold=100000)
constructor. please use for quantiles [min, max] min ~0 & max ~1 (range 0-1)
void add(double newVal)
adds value to collector.
Abstract base class for different kinds of events.
STL namespace.