Belle II Software  release-08-02-04
MinMaxCollector.h
1 /**************************************************************************
2  * basf2 (Belle II Analysis Software Framework) *
3  * Author: The Belle II Collaboration *
4  * *
5  * See git log for contributors and copyright holders. *
6  * This file is licensed under LGPL-3.0, see LICENSE.md. *
7  **************************************************************************/
8 
9 #pragma once
10 
11 
12 // includes - stl:
13 #include <deque>
14 #include <string>
15 #include <utility> // std::pair, std::move
16 
17 #include <math.h> /* ceil */
18 
19 // includes - tf-related stuff:
20 // includes - general fw stuff:
21 #include <framework/core/FrameworkExceptions.h>
22 #include <framework/logging/Logger.h>
23 
24 
25 namespace Belle2 {
41  template<class DataType>
43  protected:
45  std::deque<DataType> m_smallestValues;
46 
48  std::deque<DataType> m_biggestValues;
49 
51  unsigned m_sampleSize;
52 
53 
55  DataType m_quantileCut;
56 
57 
60 
61 
62 
64  bool addMin(DataType newVal, bool allow2Grow) { return addEntry(newVal, true, allow2Grow); }
65 
66 
67 
69  bool addMax(DataType newVal, bool allow2Grow) { return addEntry(newVal, false, allow2Grow); }
70 
71 
72 
74  bool addEntry(DataType newVal, bool isMinContainer, bool allow2Grow)
75  {
76  if (isMinContainer) {
77  if (m_smallestValues.empty() or allow2Grow) { sortIn(newVal, true); return true; }
78  if (m_smallestValues.back() > newVal) {
79  m_smallestValues.pop_back();
80  sortIn(newVal, true);
81  return true;
82  }
83  } else {
84  if (m_biggestValues.empty() or allow2Grow) { sortIn(newVal, false); return true; }
85  if (m_biggestValues.front() < newVal) {
86  m_biggestValues.pop_front();
87  sortIn(newVal, false);
88  return true;
89  }
90  }
91  return false;
92  }
93 
94 
95 
97  void sortIn(DataType newVal, bool isMinContainer)
98  {
103  if (isMinContainer) {
104  m_smallestValues.push_back(newVal);
105  std::sort(m_smallestValues.begin(), m_smallestValues.end());
106  return;
107  }
108  m_biggestValues.push_back(newVal);
109  std::sort(m_biggestValues.begin(), m_biggestValues.end());
110  return;
111  }
112 
113 
114 
116  unsigned getIndex(DataType aQuantile) const
117 // { return floor(double(vecSize-1) * quantile + 0.5); };
118  { return unsigned(double(m_sampleSize/*-1*/) * double(aQuantile) + 0.5); }
119 
120 
121 
123  bool checkVectorSize(const std::deque<DataType>& container)
124  {
125  // want to allow growing with some extra margin to prevent issues
126  unsigned newCalcThreshold = unsigned(ceil(float(m_sampleSize) * float(m_quantileCut) + ceil(0.01 * float(m_sampleSize))));
127 
128  if (newCalcThreshold > container.size() /*+1*/) { return true; }
129  return false;
130  }
131 
133 // bool checkResize()
134 // {
135 // float sampleSize = float(m_sampleSize);
136 // float nSmallestValues = float(m_smallestValues.size());
137 // float nBiggestValues = float(m_biggestValues.size());
138 // float quantileCut = float(m_quantileCut);
139 //
140 // float threshold = sampleSize*quantileCut + ceil(0.01*sampleSize);
141 // if (threshold < nSmallestValues or threshold < nBiggestValues) return false;
142 //
143 // unsigned newSize = ceil(threshold*1.1 + 0.5); // stretch by further 10%
144 // // fill up with dummy values
145 // m_smallestValues.resize(newSize, std::numeric_limits<DataType>::max());
146 // m_biggestValues.resize(newSize, std::numeric_limits<DataType>::min());
147 // return true;
148 // }
149  public:
150 
152  BELLE2_DEFINE_EXCEPTION(Quantile_out_of_bounds, "The quantileCut (%1%) you gave is illegal (only allowed between 0-1)");
153 
154 
156  BELLE2_DEFINE_EXCEPTION(Illegal_quantile,
157  "The quantiles you asked for (%1% and %2%) are not within the collected range of data (0-%3% and %4%-1) to prevent this happening, you have to pass a bigger value for QuantileCut when constructing a MinMaxCollector-Instance!");
158 
159 
161  BELLE2_DEFINE_EXCEPTION(Request_in_empty_Container, "Data of an empty container was requested!");
162 
163 
164 
166  MinMaxCollector(DataType quantileCut = 0.025, unsigned warmUpThreshold = 10) :
167  m_sampleSize(0),
168  m_quantileCut(quantileCut),
169  m_warmUpThreshold(warmUpThreshold)
170  {
171  if (0 > quantileCut or 0.5 < quantileCut) { throw (Quantile_out_of_bounds() << quantileCut); }
172  }
173 
174 
175 
177  friend std::ostream& operator<< (std::ostream& out, const MinMaxCollector& mmCol) { out << mmCol.getName(); return out; }
178 
179 
180 
182  std::pair<DataType, DataType> getMinMax(DataType minQuantile = 0., DataType maxQuantile = 1.) const
183  {
184  if (m_biggestValues.empty() or m_smallestValues.empty()) { throw (Request_in_empty_Container()); }
185  if (0 > minQuantile or 1 < minQuantile) { throw (Quantile_out_of_bounds() << minQuantile); }
186  if (0 > maxQuantile or 1 < maxQuantile) { throw (Quantile_out_of_bounds() << maxQuantile); }
187  if (minQuantile > m_quantileCut or maxQuantile < (1. - m_quantileCut)) {
188  throw (Illegal_quantile() << minQuantile << maxQuantile << m_quantileCut << 1. - m_quantileCut);
189  }
190 
191  unsigned minIndex = getIndex(minQuantile);
192 // B2INFO("minIndex: " << minIndex);
193  unsigned maxIndex = getIndex(1. - maxQuantile);
194 // B2INFO("maxIndex: " << maxIndex);
195  unsigned finalMaxIndex = ((int(m_biggestValues.size()) - int(maxIndex) - 1) < 0) ? 0 : m_biggestValues.size() - 1 - maxIndex;
196 
197  if (minIndex > (m_smallestValues.size() - 1)) { B2ERROR("minIndex " << minIndex << " calculated for minQuantile " << minQuantile << " bigger than nSmallestValues " << m_smallestValues.size() << "!"); }
198  if (finalMaxIndex > (m_biggestValues.size() - 1)) { B2ERROR("maxIndex " << maxIndex << " calculated for maxQuantile " << maxQuantile << " bigger than nBiggestValues " << m_biggestValues.size() << "!"); }
199  return {m_smallestValues.at(minIndex), m_biggestValues.at(finalMaxIndex)};
200  }
201 
202 
203 
205  void insert(DataType newVal)
206  { append(std::move(newVal));}
207 
208 
209 
211  void push_back(DataType newVal)
212  { append(std::move(newVal));}
213 
214 
215 
217  void append(DataType newVal)
218  {
219  m_sampleSize++;
220 
221  if (m_sampleSize < m_warmUpThreshold) { // to shorten warm-up-phase
222  m_smallestValues.push_back(newVal);
223  std::sort(m_smallestValues.begin(), m_smallestValues.end());
224  m_biggestValues.push_back(newVal);
225  std::sort(m_biggestValues.begin(), m_biggestValues.end());
226  return;
227  }
228 
229  /*bool wasAdded =*/ addMin(newVal, checkVectorSize(m_smallestValues));
230  /*if (wasAdded == false)*/ addMax(newVal, checkVectorSize(m_biggestValues));
231  }
232 
233 
234 
236  void merge(const MinMaxCollector<DataType>& other)
237  {
238  if (other.m_quantileCut != m_quantileCut) {
239  B2WARNING("MinMaxCollector::merge: other collector has differing size in quantileCut. If this is not the purpose, this could indicate unintended behavior!");
240  }
241  if (other.m_quantileCut > m_quantileCut) {
243  }
244  m_smallestValues.insert(m_smallestValues.end(), other.m_smallestValues.begin(), other.m_smallestValues.end());
245  std::sort(m_smallestValues.begin(), m_smallestValues.end());
246  m_biggestValues.insert(m_biggestValues.end(), other.m_biggestValues.begin(), other.m_biggestValues.end());
247  std::sort(m_biggestValues.begin(), m_biggestValues.end());
248  m_sampleSize += other.m_sampleSize;
249  }
250 
251 
252 
254  unsigned totalSize() const
255  { return m_smallestValues.size() + m_biggestValues.size(); }
256 
257 
258 
260  unsigned size() const
261  { return m_smallestValues.size() > m_biggestValues.size() ? m_smallestValues.size() : m_biggestValues.size(); }
262 
263 
264 
266  bool empty() const { return (m_smallestValues.empty() and m_biggestValues.empty()); }
267 
268 
269 
271  void clear()
272  {
273  m_sampleSize = 0;
274  m_smallestValues.clear();
275  m_biggestValues.clear();
276  }
277 
279  unsigned sampleSize() const { return m_sampleSize; }
280 
281 
282 
284  void print(bool printFull = false) const
285  { B2INFO(getName(printFull)); }
286 
287 
288 
290  std::string getName(bool printFull = false) const
291  {
292  unsigned nSmallest = m_smallestValues.size();
293  unsigned nBiggest = m_biggestValues.size();
294  using namespace std;
295  string out = "MinMaxCollector with sampleSize " + to_string(m_sampleSize) +
296  " and quantileCut " + to_string(m_quantileCut) +
297  " has nSmallestValues/nBiggestValues: " + to_string(nSmallest) +
298  "/" + to_string(nBiggest) + "\n";
299 
300  if (!printFull) out += "The 5 values each describe for the valueContainer pos[0], pos[1], pos[mean], pos[max-1], pos[max]\n";
301  out += "SmallestValues: ";
302  // only want to print full vector if there are not many entries in it:
303  if (printFull or size() < 6) {
304  for (DataType entry : m_smallestValues) { out += to_string(entry) + ", "; }
305  out += "\n" + string("BiggestValues: ");
306  for (DataType entry : m_biggestValues) { out += to_string(entry) + ", "; }
307  out += "\n";
308  } else {
309  DataType smallestTotal = 0, biggestTotal = 0, smallestMean, biggestMean;
310  for (DataType entry : m_smallestValues) { smallestTotal += entry; }
311  smallestMean = smallestTotal / DataType(nSmallest);
312  out += to_string(m_smallestValues.at(0))
313  + ", " + to_string(m_smallestValues.at(1))
314  + ", mean: " + to_string(smallestMean)
315  + ", " + to_string(m_smallestValues.at(nSmallest - 2))
316  + ", " + to_string(m_smallestValues.at(nSmallest - 1));
317  for (DataType entry : m_biggestValues) { biggestTotal += entry; }
318  biggestMean = biggestTotal / DataType(nBiggest);
319  out += "\n" + string("BiggestValues: ");
320  out += to_string(m_biggestValues.at(0))
321  + ", " + to_string(m_biggestValues.at(1))
322  + ", mean: " + to_string(biggestMean)
323  + ", " + to_string(m_biggestValues.at(nBiggest - 2))
324  + ", " + to_string(m_biggestValues.at(nBiggest - 1));
325  }
326  return out;
327  }
328 
329  };
331 } //Belle2 namespace
A container for collecting data, where min- and max-quantiles near q(0) and q(1) are to be found.
bool checkVectorSize(const std::deque< DataType > &container)
returns true, if vector is allowed to grow
unsigned size() const
returns the size (in a sense of roughly collected data)
void print(bool printFull=false) const
print an overview of the entries collected.
unsigned sampleSize() const
returns actual sampleSize
std::string getName(bool printFull=false) const
return a string of an overview of the entries collected.
BELLE2_DEFINE_EXCEPTION(Illegal_quantile, "The quantiles you asked for (%1% and %2%) are not within the collected range of data (0-%3% and %4%-1) to prevent this happening, you have to pass a bigger value for QuantileCut when constructing a MinMaxCollector-Instance!")
exception shall be thrown if the requested quantiles are not within the ranges collected
DataType m_quantileCut
sets the threshold for storing data.
unsigned totalSize() const
returns the combined size of the containers storing the values
void append(DataType newVal)
append new value
friend std::ostream & operator<<(std::ostream &out, const MinMaxCollector &mmCol)
overloaded '<<' stream operator.
MinMaxCollector(DataType quantileCut=0.025, unsigned warmUpThreshold=10)
constructor.
bool empty() const
returns if internal containers are empty
std::pair< DataType, DataType > getMinMax(DataType minQuantile=0., DataType maxQuantile=1.) const
for given pair of quantiles, the according cuts (min, max) will be returned.
unsigned m_warmUpThreshold
sets a threshold for warm-up.
bool addMax(DataType newVal, bool allow2Grow)
add entry to maxContainer if it fits in
unsigned m_sampleSize
counts numbre of values added so far
BELLE2_DEFINE_EXCEPTION(Request_in_empty_Container, "Data of an empty container was requested!")
exception shall be thrown if value is not between 0-1 and therefore not normalized
void merge(const MinMaxCollector< DataType > &other)
fill the stuff of the other one with this one
unsigned getIndex(DataType aQuantile) const
the correct access-index for given quantile will be determined
void push_back(DataType newVal)
for convenience reasons, pipe to append.
bool addEntry(DataType newVal, bool isMinContainer, bool allow2Grow)
add entry to container if it fits in
std::deque< DataType > m_biggestValues
collects biggest values occured so far
BELLE2_DEFINE_EXCEPTION(Quantile_out_of_bounds, "The quantileCut (%1%) you gave is illegal (only allowed between 0-1)")
returns true, if the valueContainers were increased in size.
void clear()
deletes all values collected so far and resets to constructor-settings.
void insert(DataType newVal)
for convenience reasons, pipe to append.
std::deque< DataType > m_smallestValues
collects smallest values occured so far
void sortIn(DataType newVal, bool isMinContainer)
add newVal to appropriate container
bool addMin(DataType newVal, bool allow2Grow)
add entry to minContainer if it fits in
Abstract base class for different kinds of events.