8#include <gtest/gtest.h>
10#include <tracking/trackFindingCDC/mva/PyEstimator.h>
11#include <tracking/trackFindingCDC/utilities/TimeIt.h>
13#include <framework/logging/Logger.h>
15#include <boost/python/import.hpp>
16#include <boost/python/exec.hpp>
17#include <boost/python/extract.hpp>
23 using namespace Belle2::TrackFindingCDC;
24 TEST(TrackFindingCDCTest, PyEstimator_predict_regression)
28 std::string first_var_regressor_code = R
"code(
29class FirstVarRegressor(object):
30 def predict(self, ys):
33first_var_regressor = FirstVarRegressor()
35with open("first_var_regressor.pickle", "wb") as first_var_regressor_file:
36 pickle.dump(first_var_regressor, first_var_regressor_file)
40 boost::python::object main = boost::python::import(
"__main__");
41 boost::python::object global = main.attr(
"__dict__");
44 boost::python::exec(first_var_regressor_code.c_str(), global);
45 PyEstimator pyRegressor(
"first_var_regressor.pickle");
46 std::vector<double> variables;
48 const double expected = 42.;
49 variables[0] = expected;
51 double actual = pyRegressor.predict(variables);
52 EXPECT_EQ(expected, actual);
57 remove(
"first_var_regressor.pickle");
60 TEST(TrackFindingCDCTest, PyEstimator_predict_classifier)
64 std::string first_var_classifier_code = R
"code(
66class FirstVarClassifier(object):
67 def predict_proba(self, ys):
68 signal_proba = ys[0:1, 0:1]
69 bkg_proba = 1.0 - signal_proba
70 result = numpy.hstack([bkg_proba, signal_proba])
73first_var_classifier = FirstVarClassifier()
75with open("first_var_classifier.pickle", "wb") as first_var_classifier_file:
76 pickle.dump(first_var_classifier, first_var_classifier_file)
80 boost::python::object main = boost::python::import(
"__main__");
81 boost::python::object global = main.attr(
"__dict__");
84 boost::python::exec(first_var_classifier_code.c_str(), global);
85 PyEstimator pyClassifier(
"first_var_classifier.pickle");
86 std::vector<double> variables;
88 const double expected = 0.7;
89 variables[0] = expected;
91 double actual = pyClassifier.predict(variables);
92 EXPECT_EQ(expected, actual);
97 remove(
"first_var_classifier.pickle");
101 TEST(TrackFindingCDCTest, PyEstimator_predict_sklearn_regressor)
105 boost::python::object main = boost::python::import(
"sklearn");
114 std::string bdt_regressor_code = R
"code(
116from sklearn import ensemble
117from sklearn import datasets
118from sklearn.utils import shuffle
121housing = datasets.fetch_california_housing()
122x, y = shuffle(housing.data, housing.target, random_state=13)
123x = x.astype(np.float64)
127trainX, trainY = x[:train_fraction], y[:train_fraction]
128testX, testY = x[train_fraction:max_samples], y[train_fraction:max_samples]
130params = {'n_estimators': 500, 'max_depth': 4, 'min_samples_split': 0.1,
131 'learning_rate': 0.01, 'loss': 'squared_error'}
133clf = ensemble.GradientBoostingRegressor(**params)
134clf.fit(trainX, trainY)
137with open("bdt_regressor.pickle", "wb") as bdt_regressor_file:
138 pickle.dump(clf, bdt_regressor_file)
142 boost::python::object main = boost::python::import(
"__main__");
143 boost::python::object global = main.attr(
"__dict__");
147 boost::python::exec(bdt_regressor_code.c_str(), global);
156 boost::python::object testX = global[
"testX"];
157 boost::python::object shape = testX.attr(
"shape");
158 size_t nRowsTestX = boost::python::extract<size_t>(shape[0]);
159 size_t nColsTestX = boost::python::extract<size_t>(shape[1]);
161 std::vector<std::vector<double> > knowledgeX(nRowsTestX);
162 for (
size_t iRowTestX = 0; iRowTestX < nRowsTestX; ++iRowTestX) {
163 boost::python::object testXRow = testX[iRowTestX];
164 std::vector<double>& variables = knowledgeX[iRowTestX];
165 for (
size_t iColTestX = 0; iColTestX < nColsTestX; ++iColTestX) {
166 variables.push_back(boost::python::extract<double>(testXRow[iColTestX]));
170 std::vector<double> predictions;
171 predictions.reserve(nRowsTestX);
174 auto timeItResult = timeIt(1,
false, [&](){
176 for (
size_t iRowTestX = 0; iRowTestX < nRowsTestX; ++iRowTestX) {
177 const std::vector<double>& variables = knowledgeX[iRowTestX];
178 double prediction = pyClassifier.predict(variables);
179 predictions.push_back(prediction);
182 remove(
"bdt_regressor.pickle");
184 timeItResult.printSummary();
185 B2INFO(
"This might be to slow for serious stuff");
188 boost::python::object testY = global[
"testY"];
189 double squareSum = 0;
190 for (
size_t iRowTestX = 0; iRowTestX < nRowsTestX; ++iRowTestX) {
191 double prediction = predictions[iRowTestX];
192 double target = boost::python::extract<double>(testY[iRowTestX]);
193 squareSum += (prediction - target) * (prediction - target);
196 double mean_square_error = squareSum / nRowsTestX;
197 EXPECT_GT(1, mean_square_error);
Class to invoke a pretrained python estimator that follows the sklearn interface.