Belle II Software  release-08-01-10
DecorrelationMatrixHelper.h
1 /**************************************************************************
2  * basf2 (Belle II Analysis Software Framework) *
3  * Author: The Belle II Collaboration *
4  * *
5  * See git log for contributors and copyright holders. *
6  * This file is licensed under LGPL-3.0, see LICENSE.md. *
7  **************************************************************************/
8 
9 #pragma once
10 
11 #include <Eigen/Core>
12 #include <framework/logging/Logger.h>
13 
14 #include <array>
15 #include <vector>
16 #include <numeric>
17 
18 namespace Belle2 {
36  template<size_t Ndims>
37  const Eigen::Matrix<double, Ndims, Ndims, Eigen::RowMajor> calculateCovMatrix(std::array<std::vector<double>, Ndims> inputData)
38  {
39  // typedefs used in function
40  using RVector = Eigen::Matrix<double, 1, Ndims, Eigen::RowMajor>; // typedef for row vector type of one measurement
41  using DMatrix = Eigen::Matrix<double, Eigen::Dynamic, Ndims, Eigen::ColMajor>; // typedef for dataMatrix type
42  using DVector = Eigen::Matrix<double, Eigen::Dynamic, 1>; // typedef for column vector in dataMatrix
43 
44  // check if the inputData represent a "full" data matrix
45  size_t nSamples = inputData[0].size();
46  for (size_t i = 1; i < Ndims; ++i) {
47  if (inputData[i].size() != nSamples) {
48  B2ERROR("The input data is no MxN matrix, cannot calculate covariance matrix! Returning identity");
49  return Eigen::Matrix<double, Ndims, Ndims, Eigen::RowMajor>::Identity();
50  }
51  }
52 
53  // calculate the mean of every column and store them in a row vector
54  RVector meanVector{};
55  for (size_t i = 0; i < Ndims; ++i) {
56  meanVector(i) = std::accumulate(inputData[i].begin(), inputData[i].end(), 0.0) / inputData[i].size();
57  }
58 
59  // map the data to a more easily accessible Eigen::Matrix where each row is one measurement (consisting of Ndims values)
60  DMatrix dataMatrix(nSamples, Ndims);
61  for (size_t i = 0; i < Ndims; ++i) {
62  dataMatrix.col(i) = Eigen::Map<const DVector>(inputData[i].data(), inputData[i].size());
63  }
64 
65  // define a matrix where each row holds the mean vector to be able to substract the mean value from each column directly
66  DMatrix meanMatrix(nSamples, Ndims);
67  for (size_t i = 0; i < Ndims; ++i) {
68  meanMatrix.col(i) = DVector::Ones(nSamples) * meanVector(i);
69  }
70 
71  // calculate the cov matrix as product of dataMatrix reduced by meanValues in order to (hopefully) utilize Eigens optimizations
72  // COULDDO: test if this can be accelerated by splitting the process in smaller matrices
73  return (dataMatrix - meanMatrix).transpose() * (dataMatrix - meanMatrix) / (nSamples);
74  }
75 
77 } // end namespace
const Eigen::Matrix< double, Ndims, Ndims, Eigen::RowMajor > calculateCovMatrix(std::array< std::vector< double >, Ndims > inputData)
calculates the empirical covariance matrix from the inputData.
Abstract base class for different kinds of events.