Belle II Software development
MinMaxCollector.h
1/**************************************************************************
2 * basf2 (Belle II Analysis Software Framework) *
3 * Author: The Belle II Collaboration *
4 * *
5 * See git log for contributors and copyright holders. *
6 * This file is licensed under LGPL-3.0, see LICENSE.md. *
7 **************************************************************************/
8
9#pragma once
10
11
12// includes - stl:
13#include <deque>
14#include <string>
15#include <utility> // std::pair, std::move
16
17#include <math.h> /* ceil */
18
19// includes - tf-related stuff:
20// includes - general fw stuff:
21#include <framework/core/FrameworkExceptions.h>
22#include <framework/logging/Logger.h>
23
24
25namespace Belle2 {
41 template<class DataType>
43 protected:
45 std::deque<DataType> m_smallestValues;
46
48 std::deque<DataType> m_biggestValues;
49
51 unsigned m_sampleSize;
52
53
55 DataType m_quantileCut;
56
57
60
61
62
64 bool addMin(DataType newVal, bool allow2Grow) { return addEntry(newVal, true, allow2Grow); }
65
66
67
69 bool addMax(DataType newVal, bool allow2Grow) { return addEntry(newVal, false, allow2Grow); }
70
71
72
74 bool addEntry(DataType newVal, bool isMinContainer, bool allow2Grow)
75 {
76 if (isMinContainer) {
77 if (m_smallestValues.empty() or allow2Grow) { sortIn(newVal, true); return true; }
78 if (m_smallestValues.back() > newVal) {
79 m_smallestValues.pop_back();
80 sortIn(newVal, true);
81 return true;
82 }
83 } else {
84 if (m_biggestValues.empty() or allow2Grow) { sortIn(newVal, false); return true; }
85 if (m_biggestValues.front() < newVal) {
86 m_biggestValues.pop_front();
87 sortIn(newVal, false);
88 return true;
89 }
90 }
91 return false;
92 }
93
94
95
97 void sortIn(DataType newVal, bool isMinContainer)
98 {
103 if (isMinContainer) {
104 m_smallestValues.push_back(newVal);
105 std::sort(m_smallestValues.begin(), m_smallestValues.end());
106 return;
107 }
108 m_biggestValues.push_back(newVal);
109 std::sort(m_biggestValues.begin(), m_biggestValues.end());
110 return;
111 }
112
113
114
116 unsigned getIndex(DataType aQuantile) const
117// { return floor(double(vecSize-1) * quantile + 0.5); };
118 { return unsigned(double(m_sampleSize/*-1*/) * double(aQuantile) + 0.5); }
119
120
121
123 bool checkVectorSize(const std::deque<DataType>& container)
124 {
125 // want to allow growing with some extra margin to prevent issues
126 unsigned newCalcThreshold = unsigned(ceil(float(m_sampleSize) * float(m_quantileCut) + ceil(0.01 * float(m_sampleSize))));
127
128 if (newCalcThreshold > container.size() /*+1*/) { return true; }
129 return false;
130 }
131
133// bool checkResize()
134// {
135// float sampleSize = float(m_sampleSize);
136// float nSmallestValues = float(m_smallestValues.size());
137// float nBiggestValues = float(m_biggestValues.size());
138// float quantileCut = float(m_quantileCut);
139//
140// float threshold = sampleSize*quantileCut + ceil(0.01*sampleSize);
141// if (threshold < nSmallestValues or threshold < nBiggestValues) return false;
142//
143// unsigned newSize = ceil(threshold*1.1 + 0.5); // stretch by further 10%
144// // fill up with dummy values
145// m_smallestValues.resize(newSize, std::numeric_limits<DataType>::max());
146// m_biggestValues.resize(newSize, std::numeric_limits<DataType>::min());
147// return true;
148// }
149 public:
150
152 BELLE2_DEFINE_EXCEPTION(Quantile_out_of_bounds, "The quantileCut (%1%) you gave is illegal (only allowed between 0-1)");
153
154
156 BELLE2_DEFINE_EXCEPTION(Illegal_quantile,
157 "The quantiles you asked for (%1% and %2%) are not within the collected range of data (0-%3% and %4%-1) to prevent this happening, you have to pass a bigger value for QuantileCut when constructing a MinMaxCollector-Instance!");
158
159
161 BELLE2_DEFINE_EXCEPTION(Request_in_empty_Container, "Data of an empty container was requested!");
162
163
164
166 MinMaxCollector(DataType quantileCut = 0.025, unsigned warmUpThreshold = 10) :
167 m_sampleSize(0),
168 m_quantileCut(quantileCut),
169 m_warmUpThreshold(warmUpThreshold)
170 {
171 if (0 > quantileCut or 0.5 < quantileCut) { throw (Quantile_out_of_bounds() << quantileCut); }
172 }
173
174
175
177 friend std::ostream& operator<< (std::ostream& out, const MinMaxCollector& mmCol) { out << mmCol.getName(); return out; }
178
179
180
182 std::pair<DataType, DataType> getMinMax(DataType minQuantile = 0., DataType maxQuantile = 1.) const
183 {
184 if (m_biggestValues.empty() or m_smallestValues.empty()) { throw (Request_in_empty_Container()); }
185 if (0 > minQuantile or 1 < minQuantile) { throw (Quantile_out_of_bounds() << minQuantile); }
186 if (0 > maxQuantile or 1 < maxQuantile) { throw (Quantile_out_of_bounds() << maxQuantile); }
187 if (minQuantile > m_quantileCut or maxQuantile < (1. - m_quantileCut)) {
188 throw (Illegal_quantile() << minQuantile << maxQuantile << m_quantileCut << 1. - m_quantileCut);
189 }
190
191 unsigned minIndex = getIndex(minQuantile);
192// B2INFO("minIndex: " << minIndex);
193 unsigned maxIndex = getIndex(1. - maxQuantile);
194// B2INFO("maxIndex: " << maxIndex);
195 unsigned finalMaxIndex = ((int(m_biggestValues.size()) - int(maxIndex) - 1) < 0) ? 0 : m_biggestValues.size() - 1 - maxIndex;
196
197 if (minIndex > (m_smallestValues.size() - 1)) { B2ERROR("minIndex " << minIndex << " calculated for minQuantile " << minQuantile << " bigger than nSmallestValues " << m_smallestValues.size() << "!"); }
198 if (finalMaxIndex > (m_biggestValues.size() - 1)) { B2ERROR("maxIndex " << maxIndex << " calculated for maxQuantile " << maxQuantile << " bigger than nBiggestValues " << m_biggestValues.size() << "!"); }
199 return {m_smallestValues.at(minIndex), m_biggestValues.at(finalMaxIndex)};
200 }
201
202
203
205 void insert(DataType newVal)
206 { append(std::move(newVal));}
207
208
209
211 void push_back(DataType newVal)
212 { append(std::move(newVal));}
213
214
215
217 void append(DataType newVal)
218 {
219 m_sampleSize++;
220
221 if (m_sampleSize < m_warmUpThreshold) { // to shorten warm-up-phase
222 m_smallestValues.push_back(newVal);
223 std::sort(m_smallestValues.begin(), m_smallestValues.end());
224 m_biggestValues.push_back(newVal);
225 std::sort(m_biggestValues.begin(), m_biggestValues.end());
226 return;
227 }
228
229 /*bool wasAdded =*/ addMin(newVal, checkVectorSize(m_smallestValues));
230 /*if (wasAdded == false)*/ addMax(newVal, checkVectorSize(m_biggestValues));
231 }
232
233
234
237 {
238 if (other.m_quantileCut != m_quantileCut) {
239 B2WARNING("MinMaxCollector::merge: other collector has differing size in quantileCut. If this is not the purpose, this could indicate unintended behavior!");
240 }
241 if (other.m_quantileCut > m_quantileCut) {
243 }
244 m_smallestValues.insert(m_smallestValues.end(), other.m_smallestValues.begin(), other.m_smallestValues.end());
245 std::sort(m_smallestValues.begin(), m_smallestValues.end());
246 m_biggestValues.insert(m_biggestValues.end(), other.m_biggestValues.begin(), other.m_biggestValues.end());
247 std::sort(m_biggestValues.begin(), m_biggestValues.end());
248 m_sampleSize += other.m_sampleSize;
249 }
250
251
252
254 unsigned totalSize() const
255 { return m_smallestValues.size() + m_biggestValues.size(); }
256
257
258
260 unsigned size() const
261 { return m_smallestValues.size() > m_biggestValues.size() ? m_smallestValues.size() : m_biggestValues.size(); }
262
263
264
266 bool empty() const { return (m_smallestValues.empty() and m_biggestValues.empty()); }
267
268
269
271 void clear()
272 {
273 m_sampleSize = 0;
274 m_smallestValues.clear();
275 m_biggestValues.clear();
276 }
277
279 unsigned sampleSize() const { return m_sampleSize; }
280
281
282
284 void print(bool printFull = false) const
285 { B2INFO(getName(printFull)); }
286
287
288
290 std::string getName(bool printFull = false) const
291 {
292 unsigned nSmallest = m_smallestValues.size();
293 unsigned nBiggest = m_biggestValues.size();
294 using namespace std;
295 string out = "MinMaxCollector with sampleSize " + to_string(m_sampleSize) +
296 " and quantileCut " + to_string(m_quantileCut) +
297 " has nSmallestValues/nBiggestValues: " + to_string(nSmallest) +
298 "/" + to_string(nBiggest) + "\n";
299
300 if (!printFull) out += "The 5 values each describe for the valueContainer pos[0], pos[1], pos[mean], pos[max-1], pos[max]\n";
301 out += "SmallestValues: ";
302 // only want to print full vector if there are not many entries in it:
303 if (printFull or size() < 6) {
304 for (DataType entry : m_smallestValues) { out += to_string(entry) + ", "; }
305 out += "\n" + string("BiggestValues: ");
306 for (DataType entry : m_biggestValues) { out += to_string(entry) + ", "; }
307 out += "\n";
308 } else {
309 DataType smallestTotal = 0, biggestTotal = 0, smallestMean, biggestMean;
310 for (DataType entry : m_smallestValues) { smallestTotal += entry; }
311 smallestMean = smallestTotal / DataType(nSmallest);
312 out += to_string(m_smallestValues.at(0))
313 + ", " + to_string(m_smallestValues.at(1))
314 + ", mean: " + to_string(smallestMean)
315 + ", " + to_string(m_smallestValues.at(nSmallest - 2))
316 + ", " + to_string(m_smallestValues.at(nSmallest - 1));
317 for (DataType entry : m_biggestValues) { biggestTotal += entry; }
318 biggestMean = biggestTotal / DataType(nBiggest);
319 out += "\n" + string("BiggestValues: ");
320 out += to_string(m_biggestValues.at(0))
321 + ", " + to_string(m_biggestValues.at(1))
322 + ", mean: " + to_string(biggestMean)
323 + ", " + to_string(m_biggestValues.at(nBiggest - 2))
324 + ", " + to_string(m_biggestValues.at(nBiggest - 1));
325 }
326 return out;
327 }
328
329 };
331} //Belle2 namespace
A container for collecting data, where min- and max-quantiles near q(0) and q(1) are to be found.
bool checkVectorSize(const std::deque< DataType > &container)
returns true, if vector is allowed to grow
std::pair< DataType, DataType > getMinMax(DataType minQuantile=0., DataType maxQuantile=1.) const
for given pair of quantiles, the according cuts (min, max) will be returned.
unsigned size() const
returns the size (in a sense of roughly collected data)
void print(bool printFull=false) const
print an overview of the entries collected.
unsigned sampleSize() const
returns actual sampleSize
std::string getName(bool printFull=false) const
return a string of an overview of the entries collected.
BELLE2_DEFINE_EXCEPTION(Illegal_quantile, "The quantiles you asked for (%1% and %2%) are not within the collected range of data (0-%3% and %4%-1) to prevent this happening, you have to pass a bigger value for QuantileCut when constructing a MinMaxCollector-Instance!")
exception shall be thrown if the requested quantiles are not within the ranges collected
DataType m_quantileCut
sets the threshold for storing data.
unsigned totalSize() const
returns the combined size of the containers storing the values
void append(DataType newVal)
append new value
MinMaxCollector(DataType quantileCut=0.025, unsigned warmUpThreshold=10)
constructor.
bool empty() const
returns if internal containers are empty
unsigned m_warmUpThreshold
sets a threshold for warm-up.
bool addMax(DataType newVal, bool allow2Grow)
add entry to maxContainer if it fits in
unsigned m_sampleSize
counts numbre of values added so far
BELLE2_DEFINE_EXCEPTION(Request_in_empty_Container, "Data of an empty container was requested!")
exception shall be thrown if value is not between 0-1 and therefore not normalized
friend std::ostream & operator<<(std::ostream &out, const MinMaxCollector &mmCol)
overloaded '<<' stream operator.
void merge(const MinMaxCollector< DataType > &other)
fill the stuff of the other one with this one
unsigned getIndex(DataType aQuantile) const
the correct access-index for given quantile will be determined
void push_back(DataType newVal)
for convenience reasons, pipe to append.
bool addEntry(DataType newVal, bool isMinContainer, bool allow2Grow)
add entry to container if it fits in
std::deque< DataType > m_biggestValues
collects biggest values occured so far
BELLE2_DEFINE_EXCEPTION(Quantile_out_of_bounds, "The quantileCut (%1%) you gave is illegal (only allowed between 0-1)")
returns true, if the valueContainers were increased in size.
void clear()
deletes all values collected so far and resets to constructor-settings.
void insert(DataType newVal)
for convenience reasons, pipe to append.
std::deque< DataType > m_smallestValues
collects smallest values occured so far
void sortIn(DataType newVal, bool isMinContainer)
add newVal to appropriate container
bool addMin(DataType newVal, bool allow2Grow)
add entry to minContainer if it fits in
Abstract base class for different kinds of events.
STL namespace.