Belle II Software  release-08-01-10
PyEstimator.test.cc
1 /**************************************************************************
2  * basf2 (Belle II Analysis Software Framework) *
3  * Author: The Belle II Collaboration *
4  * *
5  * See git log for contributors and copyright holders. *
6  * This file is licensed under LGPL-3.0, see LICENSE.md. *
7  **************************************************************************/
8 #include <gtest/gtest.h>
9 
10 #include <tracking/trackFindingCDC/mva/PyEstimator.h>
11 #include <tracking/trackFindingCDC/utilities/TimeIt.h>
12 
13 #include <framework/logging/Logger.h>
14 
15 #include <boost/python/import.hpp>
16 #include <boost/python/exec.hpp>
17 #include <boost/python/extract.hpp>
18 #include "Python.h"
19 #include <vector>
20 #include <cstdio>
21 
22 namespace {
23  using namespace Belle2::TrackFindingCDC;
24  TEST(TrackFindingCDCTest, PyEstimator_predict_regression)
25  {
26  Py_Initialize();
27  // Create dummy regession object
28  std::string first_var_regressor_code = R"code(
29 class FirstVarRegressor(object):
30  def predict(self, ys):
31  return ys[0, 0:1]
32 
33 first_var_regressor = FirstVarRegressor()
34 import pickle
35 with open("first_var_regressor.pickle", "wb") as first_var_regressor_file:
36  pickle.dump(first_var_regressor, first_var_regressor_file)
37 )code";
38 
39  // Retrieve the globals of the main module.
40  boost::python::object main = boost::python::import("__main__");
41  boost::python::object global = main.attr("__dict__");
42 
43  // Pickle the dummy regressor
44  boost::python::exec(first_var_regressor_code.c_str(), global);
45  PyEstimator pyRegressor("first_var_regressor.pickle");
46  std::vector<double> variables;
47  variables.resize(5);
48  const double expected = 42.;
49  variables[0] = expected;
50  try {
51  double actual = pyRegressor.predict(variables);
52  EXPECT_EQ(expected, actual);
53  } catch (...) {
54  PyErr_Print();
55  throw;
56  }
57  remove("first_var_regressor.pickle");
58  }
59 
60  TEST(TrackFindingCDCTest, PyEstimator_predict_classifier)
61  {
62  Py_Initialize();
63  // Create dummy regession object
64  std::string first_var_classifier_code = R"code(
65 import numpy
66 class FirstVarClassifier(object):
67  def predict_proba(self, ys):
68  signal_proba = ys[0:1, 0:1]
69  bkg_proba = 1.0 - signal_proba
70  result = numpy.hstack([bkg_proba, signal_proba])
71  return result
72 
73 first_var_classifier = FirstVarClassifier()
74 import pickle
75 with open("first_var_classifier.pickle", "wb") as first_var_classifier_file:
76  pickle.dump(first_var_classifier, first_var_classifier_file)
77 )code";
78 
79  // Retrieve the globals of the main module.
80  boost::python::object main = boost::python::import("__main__");
81  boost::python::object global = main.attr("__dict__");
82 
83  // Pickle the dummy classifier
84  boost::python::exec(first_var_classifier_code.c_str(), global);
85  PyEstimator pyClassifier("first_var_classifier.pickle");
86  std::vector<double> variables;
87  variables.resize(5);
88  const double expected = 0.7;
89  variables[0] = expected;
90  try {
91  double actual = pyClassifier.predict(variables);
92  EXPECT_EQ(expected, actual);
93  } catch (...) {
94  PyErr_Print();
95  throw;
96  }
97  remove("first_var_classifier.pickle");
98  }
99 
100 
101  TEST(DISABLED_TrackFindingCDCTest, PyEstimator_predict_sklearn_regressor)
102  {
103  Py_Initialize();
104  try {
105  boost::python::object main = boost::python::import("sklearn");
106  } catch (...) {
107  // This test only works for people who installed sklearn
108  // Do a pip3 install sklearn to enjoy it.
109  PyErr_Clear();
110  return;
111  }
112 
113  // Create dummy regession object
114  std::string bdt_regressor_code = R"code(
115 
116 from sklearn import ensemble
117 from sklearn import datasets
118 from sklearn.utils import shuffle
119 import numpy as np
120 
121 boston = datasets.load_boston()
122 x, y = shuffle(boston.data, boston.target, random_state=13)
123 x = x.astype(np.float64)
124 
125 offset = int(x.shape[0] * 0.9)
126 trainX, trainY = x[:offset], y[:offset]
127 testX, testY = x[offset:], y[offset:]
128 
129 params = {'n_estimators': 500, 'max_depth': 4, 'min_samples_split': 1,
130  'learning_rate': 0.01, 'loss': 'ls'}
131 
132 clf = ensemble.GradientBoostingRegressor(**params)
133 clf.fit(trainX, trainY)
134 
135 import pickle
136 with open("bdt_regressor.pickle", "wb") as bdt_regressor_file:
137  pickle.dump(clf, bdt_regressor_file)
138 )code";
139 
140  // Retrieve the globals of the main module.
141  boost::python::object main = boost::python::import("__main__");
142  boost::python::object global = main.attr("__dict__");
143 
144  // Pickle the dummy classifier
145  try {
146  boost::python::exec(bdt_regressor_code.c_str(), global);
147  } catch (...) {
148  PyErr_Print();
149  throw;
150  }
151 
152  try {
153  PyEstimator pyClassifier("bdt_regressor.pickle");
154 
155  boost::python::object testX = global["testX"];
156  boost::python::object shape = testX.attr("shape");
157  size_t nRowsTestX = boost::python::extract<size_t>(shape[0]);
158  size_t nColsTestX = boost::python::extract<size_t>(shape[1]);
159 
160  std::vector<std::vector<double> > knowledgeX(nRowsTestX);
161  for (size_t iRowTestX = 0; iRowTestX < nRowsTestX; ++iRowTestX) {
162  boost::python::object testXRow = testX[iRowTestX];
163  std::vector<double>& variables = knowledgeX[iRowTestX];
164  for (size_t iColTestX = 0; iColTestX < nColsTestX; ++iColTestX) {
165  variables.push_back(boost::python::extract<double>(testXRow[iColTestX]));
166  }
167  }
168 
169  std::vector<double> predictions;
170  predictions.reserve(nRowsTestX);
171 
172  // Make predictions
173  auto timeItResult = timeIt(1, false, [&](){
174  predictions.clear();
175  for (size_t iRowTestX = 0; iRowTestX < nRowsTestX; ++iRowTestX) {
176  const std::vector<double>& variables = knowledgeX[iRowTestX];
177  double prediction = pyClassifier.predict(variables);
178  predictions.push_back(prediction);
179  }
180  });
181  remove("bdt_regressor.pickle");
182 
183  timeItResult.printSummary();
184  B2INFO("This might be to slow for serious stuff");
185 
186  // Compare prediciton
187  boost::python::object testY = global["testY"];
188  double squareSum = 0;
189  for (size_t iRowTestX = 0; iRowTestX < nRowsTestX; ++iRowTestX) {
190  double prediction = predictions[iRowTestX];
191  double target = boost::python::extract<double>(testY[iRowTestX]);
192  squareSum += (prediction - target) * (prediction - target);
193  }
194 
195  double mean_square_error = squareSum / nRowsTestX;
196  EXPECT_GT(7, mean_square_error);
197 
198  } catch (...) {
199  PyErr_Print();
200  throw;
201  }
202 
203  }
204 }
Class to invoke a pretrained python estimator that follows the sklearn interface.
Definition: PyEstimator.h:23
TEST(TestgetDetectorRegion, TestgetDetectorRegion)
Test Constructors.
int main(int argc, char **argv)
Run all tests.
Definition: test_main.cc:91