Belle II Software  release-05-01-25
MinMaxCollector.h
1 /**************************************************************************
2  * BASF2 (Belle Analysis Framework 2) *
3  * Copyright(C) 2010 - Belle II Collaboration *
4  * *
5  * Author: The Belle II Collaboration *
6  * Contributors: Jakob Lettenbichler *
7  * *
8  * This software is provided "as is" without any warranty. *
9  **************************************************************************/
10 
11 #pragma once
12 
13 
14 // includes - stl:
15 #include <deque>
16 #include <string>
17 #include <utility> // std::pair, std::move
18 
19 #include <math.h> /* ceil */
20 
21 // includes - tf-related stuff:
22 // includes - general fw stuff:
23 #include <framework/core/FrameworkExceptions.h>
24 #include <framework/logging/Logger.h>
25 
26 
27 namespace Belle2 {
43  template<class DataType>
44  class MinMaxCollector {
45  protected:
47  std::deque<DataType> m_smallestValues;
48 
50  std::deque<DataType> m_biggestValues;
51 
53  unsigned m_sampleSize;
54 
55 
57  DataType m_quantileCut;
58 
59 
62 
63 
64 
66  bool addMin(DataType newVal, bool allow2Grow) { return addEntry(newVal, true, allow2Grow); }
67 
68 
69 
71  bool addMax(DataType newVal, bool allow2Grow) { return addEntry(newVal, false, allow2Grow); }
72 
73 
74 
76  bool addEntry(DataType newVal, bool isMinContainer, bool allow2Grow)
77  {
78  if (isMinContainer) {
79  if (m_smallestValues.empty() or allow2Grow) { sortIn(newVal, true); return true; }
80  if (m_smallestValues.back() > newVal) {
81  m_smallestValues.pop_back();
82  sortIn(newVal, true);
83  return true;
84  }
85  } else {
86  if (m_biggestValues.empty() or allow2Grow) { sortIn(newVal, false); return true; }
87  if (m_biggestValues.front() < newVal) {
88  m_biggestValues.pop_front();
89  sortIn(newVal, false);
90  return true;
91  }
92  }
93  return false;
94  }
95 
96 
97 
99  void sortIn(DataType newVal, bool isMinContainer)
100  {
105  if (isMinContainer) {
106  m_smallestValues.push_back(newVal);
107  std::sort(m_smallestValues.begin(), m_smallestValues.end());
108  return;
109  }
110  m_biggestValues.push_back(newVal);
111  std::sort(m_biggestValues.begin(), m_biggestValues.end());
112  return;
113  }
114 
115 
116 
118  unsigned getIndex(DataType aQuantile) const
119 // { return floor(double(vecSize-1) * quantile + 0.5); };
120  { return unsigned(double(m_sampleSize/*-1*/) * double(aQuantile) + 0.5); }
121 
122 
123 
125  bool checkVectorSize(std::deque<DataType>& container)
126  {
127  // want to allow growing with some extra margin to prevent issues
128  unsigned newCalcThreshold = unsigned(ceil(float(m_sampleSize) * float(m_quantileCut) + ceil(0.01 * float(m_sampleSize))));
129 
130  if (newCalcThreshold > container.size() /*+1*/) { return true; }
131  return false;
132  }
133 
135 // bool checkResize()
136 // {
137 // float sampleSize = float(m_sampleSize);
138 // float nSmallestValues = float(m_smallestValues.size());
139 // float nBiggestValues = float(m_biggestValues.size());
140 // float quantileCut = float(m_quantileCut);
141 //
142 // float threshold = sampleSize*quantileCut + ceil(0.01*sampleSize);
143 // if (threshold < nSmallestValues or threshold < nBiggestValues) return false;
144 //
145 // unsigned newSize = ceil(threshold*1.1 + 0.5); // stretch by further 10%
146 // // fill up with dummy values
147 // m_smallestValues.resize(newSize, std::numeric_limits<DataType>::max());
148 // m_biggestValues.resize(newSize, std::numeric_limits<DataType>::min());
149 // return true;
150 // }
151  public:
152 
154  BELLE2_DEFINE_EXCEPTION(Quantile_out_of_bounds, "The quantileCut (%1%) you gave is illegal (only allowed between 0-1)");
155 
156 
158  BELLE2_DEFINE_EXCEPTION(Illegal_quantile,
159  "The quantiles you asked for (%1% and %2%) are not within the collected range of data (0-%3% and %4%-1) to prevent this happening, you have to pass a bigger value for QuantileCut when constructing a MinMaxCollector-Instance!");
160 
161 
163  BELLE2_DEFINE_EXCEPTION(Request_in_empty_Container, "Data of an empty container was requested!");
164 
165 
166 
168  MinMaxCollector(DataType quantileCut = 0.025, unsigned warmUpThreshold = 10) :
169  m_sampleSize(0),
170  m_quantileCut(quantileCut),
171  m_warmUpThreshold(warmUpThreshold)
172  {
173  if (0 > quantileCut or 0.5 < quantileCut) { throw (Quantile_out_of_bounds() << quantileCut); }
174  }
175 
176 
177 
179  friend std::ostream& operator<< (std::ostream& out, const MinMaxCollector& mmCol) { out << mmCol.getName(); return out; }
180 
181 
182 
184  std::pair<DataType, DataType> getMinMax(DataType minQuantile = 0., DataType maxQuantile = 1.) const
185  {
186  if (m_biggestValues.empty() or m_smallestValues.empty()) { throw (Request_in_empty_Container()); }
187  if (0 > minQuantile or 1 < minQuantile) { throw (Quantile_out_of_bounds() << minQuantile); }
188  if (0 > maxQuantile or 1 < maxQuantile) { throw (Quantile_out_of_bounds() << maxQuantile); }
189  if (minQuantile > m_quantileCut or maxQuantile < (1. - m_quantileCut)) {
190  throw (Illegal_quantile() << minQuantile << maxQuantile << m_quantileCut << 1. - m_quantileCut);
191  }
192 
193  unsigned minIndex = getIndex(minQuantile);
194 // B2INFO("minIndex: " << minIndex);
195  unsigned maxIndex = getIndex(1. - maxQuantile);
196 // B2INFO("maxIndex: " << maxIndex);
197  unsigned finalMaxIndex = ((int(m_biggestValues.size()) - int(maxIndex) - 1) < 0) ? 0 : m_biggestValues.size() - 1 - maxIndex;
198 
199  if (minIndex > (m_smallestValues.size() - 1)) { B2ERROR("minIndex " << minIndex << " calculated for minQuantile " << minQuantile << " bigger than nSmallestValues " << m_smallestValues.size() << "!"); }
200  if (finalMaxIndex > (m_biggestValues.size() - 1)) { B2ERROR("maxIndex " << maxIndex << " calculated for maxQuantile " << maxQuantile << " bigger than nBiggestValues " << m_biggestValues.size() << "!"); }
201  return {m_smallestValues.at(minIndex), m_biggestValues.at(finalMaxIndex)};
202  }
203 
204 
205 
207  void insert(DataType newVal)
208  { append(std::move(newVal));}
209 
210 
211 
213  void push_back(DataType newVal)
214  { append(std::move(newVal));}
215 
216 
217 
219  void append(DataType newVal)
220  {
222 
223  if (m_sampleSize < m_warmUpThreshold) { // to shorten warm-up-phase
224  m_smallestValues.push_back(newVal);
225  std::sort(m_smallestValues.begin(), m_smallestValues.end());
226  m_biggestValues.push_back(newVal);
227  std::sort(m_biggestValues.begin(), m_biggestValues.end());
228  return;
229  }
230 
231  /*bool wasAdded =*/ addMin(newVal, checkVectorSize(m_smallestValues));
232  /*if (wasAdded == false)*/ addMax(newVal, checkVectorSize(m_biggestValues));
233  }
234 
235 
236 
238  void merge(const MinMaxCollector<DataType>& other)
239  {
240  if (other.m_quantileCut != m_quantileCut) {
241  B2WARNING("MinMaxCollector::merge: other collector has differing size in quantileCut. If this is not the purpose, this could indicate unintended behavior!");
242  }
243  if (other.m_quantileCut > m_quantileCut) {
244  m_quantileCut = other.m_quantileCut;
245  }
246  m_smallestValues.insert(m_smallestValues.end(), other.m_smallestValues.begin(), other.m_smallestValues.end());
247  std::sort(m_smallestValues.begin(), m_smallestValues.end());
248  m_biggestValues.insert(m_biggestValues.end(), other.m_biggestValues.begin(), other.m_biggestValues.end());
249  std::sort(m_biggestValues.begin(), m_biggestValues.end());
250  m_sampleSize += other.m_sampleSize;
251  }
252 
253 
254 
256  unsigned totalSize() const
257  { return m_smallestValues.size() + m_biggestValues.size(); }
258 
259 
260 
262  unsigned size() const
263  { return m_smallestValues.size() > m_biggestValues.size() ? m_smallestValues.size() : m_biggestValues.size(); }
264 
265 
266 
268  bool empty() const { return (m_smallestValues.empty() and m_biggestValues.empty()); }
269 
270 
271 
273  void clear()
274  {
275  m_sampleSize = 0;
277  m_biggestValues.clear();
278  }
279 
281  unsigned sampleSize() const { return m_sampleSize; }
282 
283 
284 
286  void print(bool printFull = false) const
287  { B2INFO(getName(printFull)); }
288 
289 
290 
292  std::string getName(bool printFull = false) const
293  {
294  unsigned nSmallest = m_smallestValues.size();
295  unsigned nBiggest = m_biggestValues.size();
296  using namespace std;
297  string out = "MinMaxCollector with sampleSize " + to_string(m_sampleSize) +
298  " and quantileCut " + to_string(m_quantileCut) +
299  " has nSmallestValues/nBiggestValues: " + to_string(nSmallest) +
300  "/" + to_string(nBiggest) + "\n";
301 
302  if (!printFull) out += "The 5 values each describe for the valueContainer pos[0], pos[1], pos[mean], pos[max-1], pos[max]\n";
303  out += "SmallestValues: ";
304  // only want to print full vector if there are not many entries in it:
305  if (printFull or size() < 6) {
306  for (DataType entry : m_smallestValues) { out += to_string(entry) + ", "; }
307  out += "\n" + string("BiggestValues: ");
308  for (DataType entry : m_biggestValues) { out += to_string(entry) + ", "; }
309  out += "\n";
310  } else {
311  DataType smallestTotal = 0, biggestTotal = 0, smallestMean, biggestMean;
312  for (DataType entry : m_smallestValues) { smallestTotal += entry; }
313  smallestMean = smallestTotal / DataType(nSmallest);
314  out += to_string(m_smallestValues.at(0))
315  + ", " + to_string(m_smallestValues.at(1))
316  + ", mean: " + to_string(smallestMean)
317  + ", " + to_string(m_smallestValues.at(nSmallest - 2))
318  + ", " + to_string(m_smallestValues.at(nSmallest - 1));
319  for (DataType entry : m_biggestValues) { biggestTotal += entry; }
320  biggestMean = biggestTotal / DataType(nBiggest);
321  out += "\n" + string("BiggestValues: ");
322  out += to_string(m_biggestValues.at(0))
323  + ", " + to_string(m_biggestValues.at(1))
324  + ", mean: " + to_string(biggestMean)
325  + ", " + to_string(m_biggestValues.at(nBiggest - 2))
326  + ", " + to_string(m_biggestValues.at(nBiggest - 1));
327  }
328  return out;
329  }
330 
331  };
333 } //Belle2 namespace
Belle2::MinMaxCollector::getMinMax
std::pair< DataType, DataType > getMinMax(DataType minQuantile=0., DataType maxQuantile=1.) const
for given pair of quantiles, the according cuts (min, max) will be returned.
Definition: MinMaxCollector.h:192
Belle2::MinMaxCollector::addMax
bool addMax(DataType newVal, bool allow2Grow)
add entry to maxContainer if it fits in
Definition: MinMaxCollector.h:79
Belle2::MinMaxCollector::size
unsigned size() const
returns the size (in a sense of roughly collected data)
Definition: MinMaxCollector.h:270
Belle2::MinMaxCollector::push_back
void push_back(DataType newVal)
for convenience reasons, pipe to append.
Definition: MinMaxCollector.h:221
Belle2::MinMaxCollector::m_warmUpThreshold
unsigned m_warmUpThreshold
sets a threshold for warm-up.
Definition: MinMaxCollector.h:69
Belle2::MinMaxCollector::addEntry
bool addEntry(DataType newVal, bool isMinContainer, bool allow2Grow)
add entry to container if it fits in
Definition: MinMaxCollector.h:84
Belle2::MinMaxCollector::append
void append(DataType newVal)
append new value
Definition: MinMaxCollector.h:227
Belle2::MinMaxCollector::insert
void insert(DataType newVal)
for convenience reasons, pipe to append.
Definition: MinMaxCollector.h:215
Belle2::MinMaxCollector::m_quantileCut
DataType m_quantileCut
sets the threshold for storing data.
Definition: MinMaxCollector.h:65
Belle2::MinMaxCollector
A container for collecting data, where min- and max-quantiles near q(0) and q(1) are to be found.
Definition: MinMaxCollector.h:52
Belle2::MinMaxCollector::merge
void merge(const MinMaxCollector< DataType > &other)
fill the stuff of the other one with this one
Definition: MinMaxCollector.h:246
Belle2::MinMaxCollector::BELLE2_DEFINE_EXCEPTION
BELLE2_DEFINE_EXCEPTION(Quantile_out_of_bounds, "The quantileCut (%1%) you gave is illegal (only allowed between 0-1)")
returns true, if the valueContainers were increased in size.
Belle2::MinMaxCollector::MinMaxCollector
MinMaxCollector(DataType quantileCut=0.025, unsigned warmUpThreshold=10)
constructor.
Definition: MinMaxCollector.h:176
Belle2
Abstract base class for different kinds of events.
Definition: MillepedeAlgorithm.h:19
Belle2::MinMaxCollector::empty
bool empty() const
returns if internal containers are empty
Definition: MinMaxCollector.h:276
Belle2::MinMaxCollector::checkVectorSize
bool checkVectorSize(std::deque< DataType > &container)
returns true, if vector is allowed to grow
Definition: MinMaxCollector.h:133
Belle2::MinMaxCollector::m_biggestValues
std::deque< DataType > m_biggestValues
collects biggest values occured so far
Definition: MinMaxCollector.h:58
Belle2::MinMaxCollector::sampleSize
unsigned sampleSize() const
returns actual sampleSize
Definition: MinMaxCollector.h:289
Belle2::MinMaxCollector::addMin
bool addMin(DataType newVal, bool allow2Grow)
add entry to minContainer if it fits in
Definition: MinMaxCollector.h:74
Belle2::MinMaxCollector::m_smallestValues
std::deque< DataType > m_smallestValues
collects smallest values occured so far
Definition: MinMaxCollector.h:55
Belle2::MinMaxCollector::print
void print(bool printFull=false) const
print an overview of the entries collected.
Definition: MinMaxCollector.h:294
Belle2::MinMaxCollector::sortIn
void sortIn(DataType newVal, bool isMinContainer)
add newVal to appropriate container
Definition: MinMaxCollector.h:107
Belle2::MinMaxCollector::totalSize
unsigned totalSize() const
returns the combined size of the containers storing the values
Definition: MinMaxCollector.h:264
Belle2::MinMaxCollector::getName
std::string getName(bool printFull=false) const
return a string of an overview of the entries collected.
Definition: MinMaxCollector.h:300
Belle2::MinMaxCollector::clear
void clear()
deletes all values collected so far and resets to constructor-settings.
Definition: MinMaxCollector.h:281
Belle2::MinMaxCollector::operator<<
friend std::ostream & operator<<(std::ostream &out, const MinMaxCollector &mmCol)
overloaded '<<' stream operator.
Definition: MinMaxCollector.h:187
Belle2::MinMaxCollector::m_sampleSize
unsigned m_sampleSize
counts numbre of values added so far
Definition: MinMaxCollector.h:61
Belle2::MinMaxCollector::getIndex
unsigned getIndex(DataType aQuantile) const
the correct access-index for given quantile will be determined
Definition: MinMaxCollector.h:126