Belle II Software  release-05-01-25
PyEstimator.test.cc
1 /**************************************************************************
2  * BASF2 (Belle Analysis Framework 2) *
3  * Copyright(C) 2015 - Belle II Collaboration *
4  * *
5  * Author: The Belle II Collaboration *
6  * Contributors: Oliver Frost <oliver.frost@desy.de> *
7  * *
8  * This software is provided "as is" without any warranty. *
9  **************************************************************************/
10 #include <gtest/gtest.h>
11 
12 #include <tracking/trackFindingCDC/mva/PyEstimator.h>
13 #include <tracking/trackFindingCDC/utilities/TimeIt.h>
14 
15 #include <framework/logging/Logger.h>
16 
17 #include <boost/python/import.hpp>
18 #include <boost/python/exec.hpp>
19 #include <boost/python/extract.hpp>
20 #include "Python.h"
21 #include <vector>
22 #include <cstdio>
23 
24 namespace {
25  using namespace Belle2::TrackFindingCDC;
26  TEST(TrackFindingCDCTest, PyEstimator_predict_regression)
27  {
28  Py_Initialize();
29  // Create dummy regession object
30  std::string first_var_regressor_code = R"code(
31 class FirstVarRegressor(object):
32  def predict(self, ys):
33  return ys[0, 0:1]
34 
35 first_var_regressor = FirstVarRegressor()
36 import pickle
37 with open("first_var_regressor.pickle", "wb") as first_var_regressor_file:
38  pickle.dump(first_var_regressor, first_var_regressor_file)
39 )code";
40 
41  // Retrieve the globals of the main module.
42  boost::python::object main = boost::python::import("__main__");
43  boost::python::object global = main.attr("__dict__");
44 
45  // Pickle the dummy regressor
46  boost::python::exec(first_var_regressor_code.c_str(), global);
47  PyEstimator pyRegressor("first_var_regressor.pickle");
48  std::vector<double> variables;
49  variables.resize(5);
50  const double expected = 42.;
51  variables[0] = expected;
52  try {
53  double actual = pyRegressor.predict(variables);
54  EXPECT_EQ(expected, actual);
55  } catch (...) {
56  PyErr_Print();
57  throw;
58  }
59  remove("first_var_regressor.pickle");
60  }
61 
62  TEST(TrackFindingCDCTest, PyEstimator_predict_classifier)
63  {
64  Py_Initialize();
65  // Create dummy regession object
66  std::string first_var_classifier_code = R"code(
67 import numpy
68 class FirstVarClassifier(object):
69  def predict_proba(self, ys):
70  signal_proba = ys[0:1, 0:1]
71  bkg_proba = 1.0 - signal_proba
72  result = numpy.hstack([bkg_proba, signal_proba])
73  return result
74 
75 first_var_classifier = FirstVarClassifier()
76 import pickle
77 with open("first_var_classifier.pickle", "wb") as first_var_classifier_file:
78  pickle.dump(first_var_classifier, first_var_classifier_file)
79 )code";
80 
81  // Retrieve the globals of the main module.
82  boost::python::object main = boost::python::import("__main__");
83  boost::python::object global = main.attr("__dict__");
84 
85  // Pickle the dummy classifier
86  boost::python::exec(first_var_classifier_code.c_str(), global);
87  PyEstimator pyClassifier("first_var_classifier.pickle");
88  std::vector<double> variables;
89  variables.resize(5);
90  const double expected = 0.7;
91  variables[0] = expected;
92  try {
93  double actual = pyClassifier.predict(variables);
94  EXPECT_EQ(expected, actual);
95  } catch (...) {
96  PyErr_Print();
97  throw;
98  }
99  remove("first_var_classifier.pickle");
100  }
101 
102 
103  TEST(DISABLED_TrackFindingCDCTest, PyEstimator_predict_sklearn_regressor)
104  {
105  Py_Initialize();
106  try {
107  boost::python::object main = boost::python::import("sklearn");
108  } catch (...) {
109  // This test only works for people who installed sklearn
110  // Do a pip3 install sklearn to enjoy it.
111  PyErr_Clear();
112  return;
113  }
114 
115  // Create dummy regession object
116  std::string bdt_regressor_code = R"code(
117 
118 from sklearn import ensemble
119 from sklearn import datasets
120 from sklearn.utils import shuffle
121 import numpy as np
122 
123 boston = datasets.load_boston()
124 x, y = shuffle(boston.data, boston.target, random_state=13)
125 x = x.astype(np.float64)
126 
127 offset = int(x.shape[0] * 0.9)
128 trainX, trainY = x[:offset], y[:offset]
129 testX, testY = x[offset:], y[offset:]
130 
131 params = {'n_estimators': 500, 'max_depth': 4, 'min_samples_split': 1,
132  'learning_rate': 0.01, 'loss': 'ls'}
133 
134 clf = ensemble.GradientBoostingRegressor(**params)
135 clf.fit(trainX, trainY)
136 
137 import pickle
138 with open("bdt_regressor.pickle", "wb") as bdt_regressor_file:
139  pickle.dump(clf, bdt_regressor_file)
140 )code";
141 
142  // Retrieve the globals of the main module.
143  boost::python::object main = boost::python::import("__main__");
144  boost::python::object global = main.attr("__dict__");
145 
146  // Pickle the dummy classifier
147  try {
148  boost::python::exec(bdt_regressor_code.c_str(), global);
149  } catch (...) {
150  PyErr_Print();
151  throw;
152  }
153 
154  try {
155  PyEstimator pyClassifier("bdt_regressor.pickle");
156 
157  boost::python::object testX = global["testX"];
158  boost::python::object shape = testX.attr("shape");
159  size_t nRowsTestX = boost::python::extract<size_t>(shape[0]);
160  size_t nColsTestX = boost::python::extract<size_t>(shape[1]);
161 
162  std::vector<std::vector<double> > knowledgeX(nRowsTestX);
163  for (size_t iRowTestX = 0; iRowTestX < nRowsTestX; ++iRowTestX) {
164  boost::python::object testXRow = testX[iRowTestX];
165  std::vector<double>& variables = knowledgeX[iRowTestX];
166  for (size_t iColTestX = 0; iColTestX < nColsTestX; ++iColTestX) {
167  variables.push_back(boost::python::extract<double>(testXRow[iColTestX]));
168  }
169  }
170 
171  std::vector<double> predictions;
172  predictions.reserve(nRowsTestX);
173 
174  // Make predictions
175  auto timeItResult = timeIt(1, false, [&](){
176  predictions.clear();
177  for (size_t iRowTestX = 0; iRowTestX < nRowsTestX; ++iRowTestX) {
178  const std::vector<double>& variables = knowledgeX[iRowTestX];
179  double prediction = pyClassifier.predict(variables);
180  predictions.push_back(prediction);
181  }
182  });
183  remove("bdt_regressor.pickle");
184 
185  timeItResult.printSummary();
186  B2INFO("This might be to slow for serious stuff");
187 
188  // Compare prediciton
189  boost::python::object testY = global["testY"];
190  double squareSum = 0;
191  for (size_t iRowTestX = 0; iRowTestX < nRowsTestX; ++iRowTestX) {
192  double prediction = predictions[iRowTestX];
193  double target = boost::python::extract<double>(testY[iRowTestX]);
194  squareSum += (prediction - target) * (prediction - target);
195  }
196 
197  double mean_square_error = squareSum / nRowsTestX;
198  EXPECT_GT(7, mean_square_error);
199 
200  } catch (...) {
201  PyErr_Print();
202  throw;
203  }
204 
205  }
206 }
Belle2::TrackFindingCDC::PyEstimator
Class to invoke a pretrained python estimator that follows the sklearn interface.
Definition: PyEstimator.h:33
main
int main(int argc, char **argv)
Run all tests.
Definition: test_main.cc:77
Belle2::TEST
TEST(TestgetDetectorRegion, TestgetDetectorRegion)
Test Constructors.
Definition: utilityFunctions.cc:18