Belle II Software development
PyEstimator.test.cc
1/**************************************************************************
2 * basf2 (Belle II Analysis Software Framework) *
3 * Author: The Belle II Collaboration *
4 * *
5 * See git log for contributors and copyright holders. *
6 * This file is licensed under LGPL-3.0, see LICENSE.md. *
7 **************************************************************************/
8#include <gtest/gtest.h>
9
10#include <tracking/trackFindingCDC/mva/PyEstimator.h>
11#include <tracking/trackFindingCDC/utilities/TimeIt.h>
12
13#include <framework/logging/Logger.h>
14
15#include <boost/python/import.hpp>
16#include <boost/python/exec.hpp>
17#include <boost/python/extract.hpp>
18#include "Python.h"
19#include <vector>
20#include <cstdio>
21
22namespace {
23 using namespace Belle2::TrackFindingCDC;
24 TEST(TrackFindingCDCTest, PyEstimator_predict_regression)
25 {
26 Py_Initialize();
27 // Create dummy regression object
28 std::string first_var_regressor_code = R"code(
29class FirstVarRegressor(object):
30 def predict(self, ys):
31 return ys[0, 0:1]
32
33first_var_regressor = FirstVarRegressor()
34import pickle
35with open("first_var_regressor.pickle", "wb") as first_var_regressor_file:
36 pickle.dump(first_var_regressor, first_var_regressor_file)
37)code";
38
39 // Retrieve the globals of the main module.
40 boost::python::object main = boost::python::import("__main__");
41 boost::python::object global = main.attr("__dict__");
42
43 // Pickle the dummy regressor
44 boost::python::exec(first_var_regressor_code.c_str(), global);
45 PyEstimator pyRegressor("first_var_regressor.pickle");
46 std::vector<double> variables;
47 variables.resize(5);
48 const double expected = 42.;
49 variables[0] = expected;
50 try {
51 double actual = pyRegressor.predict(variables);
52 EXPECT_EQ(expected, actual);
53 } catch (...) {
54 PyErr_Print();
55 throw;
56 }
57 remove("first_var_regressor.pickle");
58 }
59
60 TEST(TrackFindingCDCTest, PyEstimator_predict_classifier)
61 {
62 Py_Initialize();
63 // Create dummy regression object
64 std::string first_var_classifier_code = R"code(
65import numpy
66class FirstVarClassifier(object):
67 def predict_proba(self, ys):
68 signal_proba = ys[0:1, 0:1]
69 bkg_proba = 1.0 - signal_proba
70 result = numpy.hstack([bkg_proba, signal_proba])
71 return result
72
73first_var_classifier = FirstVarClassifier()
74import pickle
75with open("first_var_classifier.pickle", "wb") as first_var_classifier_file:
76 pickle.dump(first_var_classifier, first_var_classifier_file)
77)code";
78
79 // Retrieve the globals of the main module.
80 boost::python::object main = boost::python::import("__main__");
81 boost::python::object global = main.attr("__dict__");
82
83 // Pickle the dummy classifier
84 boost::python::exec(first_var_classifier_code.c_str(), global);
85 PyEstimator pyClassifier("first_var_classifier.pickle");
86 std::vector<double> variables;
87 variables.resize(5);
88 const double expected = 0.7;
89 variables[0] = expected;
90 try {
91 double actual = pyClassifier.predict(variables);
92 EXPECT_EQ(expected, actual);
93 } catch (...) {
94 PyErr_Print();
95 throw;
96 }
97 remove("first_var_classifier.pickle");
98 }
99
100
101 TEST(TrackFindingCDCTest, PyEstimator_predict_sklearn_regressor)
102 {
103 Py_Initialize();
104 try {
105 boost::python::object main = boost::python::import("sklearn");
106 } catch (...) {
107 // This test only works for people who installed sklearn
108 // Do a pip3 install sklearn to enjoy it.
109 PyErr_Clear();
110 return;
111 }
112
113 // Create dummy regression object
114 std::string bdt_regressor_code = R"code(
115
116from sklearn import ensemble
117from sklearn import datasets
118from sklearn.utils import shuffle
119import numpy as np
120
121housing = datasets.fetch_california_housing()
122x, y = shuffle(housing.data, housing.target, random_state=13)
123x = x.astype(np.float64)
124
125max_samples = 1000
126train_fraction = 900
127trainX, trainY = x[:train_fraction], y[:train_fraction]
128testX, testY = x[train_fraction:max_samples], y[train_fraction:max_samples]
129
130params = {'n_estimators': 500, 'max_depth': 4, 'min_samples_split': 0.1,
131 'learning_rate': 0.01, 'loss': 'squared_error'}
132
133clf = ensemble.GradientBoostingRegressor(**params)
134clf.fit(trainX, trainY)
135
136import pickle
137with open("bdt_regressor.pickle", "wb") as bdt_regressor_file:
138 pickle.dump(clf, bdt_regressor_file)
139)code";
140
141 // Retrieve the globals of the main module.
142 boost::python::object main = boost::python::import("__main__");
143 boost::python::object global = main.attr("__dict__");
144
145 // Pickle the dummy classifier
146 try {
147 boost::python::exec(bdt_regressor_code.c_str(), global);
148 } catch (...) {
149 PyErr_Print();
150 throw;
151 }
152
153 try {
154 PyEstimator pyClassifier("bdt_regressor.pickle");
155
156 boost::python::object testX = global["testX"];
157 boost::python::object shape = testX.attr("shape");
158 size_t nRowsTestX = boost::python::extract<size_t>(shape[0]);
159 size_t nColsTestX = boost::python::extract<size_t>(shape[1]);
160
161 std::vector<std::vector<double> > knowledgeX(nRowsTestX);
162 for (size_t iRowTestX = 0; iRowTestX < nRowsTestX; ++iRowTestX) {
163 boost::python::object testXRow = testX[iRowTestX];
164 std::vector<double>& variables = knowledgeX[iRowTestX];
165 for (size_t iColTestX = 0; iColTestX < nColsTestX; ++iColTestX) {
166 variables.push_back(boost::python::extract<double>(testXRow[iColTestX]));
167 }
168 }
169
170 std::vector<double> predictions;
171 predictions.reserve(nRowsTestX);
172
173 // Make predictions
174 auto timeItResult = timeIt(1, false, [&](){
175 predictions.clear();
176 for (size_t iRowTestX = 0; iRowTestX < nRowsTestX; ++iRowTestX) {
177 const std::vector<double>& variables = knowledgeX[iRowTestX];
178 double prediction = pyClassifier.predict(variables);
179 predictions.push_back(prediction);
180 }
181 });
182 remove("bdt_regressor.pickle");
183
184 timeItResult.printSummary();
185 B2INFO("This might be to slow for serious stuff");
186
187 // Compare predictions
188 boost::python::object testY = global["testY"];
189 double squareSum = 0;
190 for (size_t iRowTestX = 0; iRowTestX < nRowsTestX; ++iRowTestX) {
191 double prediction = predictions[iRowTestX];
192 double target = boost::python::extract<double>(testY[iRowTestX]);
193 squareSum += (prediction - target) * (prediction - target);
194 }
195
196 double mean_square_error = squareSum / nRowsTestX;
197 EXPECT_GT(1, mean_square_error);
198
199 } catch (...) {
200 PyErr_Print();
201 throw;
202 }
203
204 }
205}
Class to invoke a pretrained python estimator that follows the sklearn interface.
Definition: PyEstimator.h:23