10 #include <gtest/gtest.h>
12 #include <tracking/trackFindingCDC/mva/PyEstimator.h>
13 #include <tracking/trackFindingCDC/utilities/TimeIt.h>
15 #include <framework/logging/Logger.h>
17 #include <boost/python/import.hpp>
18 #include <boost/python/exec.hpp>
19 #include <boost/python/extract.hpp>
25 using namespace Belle2::TrackFindingCDC;
26 TEST(TrackFindingCDCTest, PyEstimator_predict_regression)
30 std::string first_var_regressor_code = R
"code(
31 class FirstVarRegressor(object):
32 def predict(self, ys):
35 first_var_regressor = FirstVarRegressor()
37 with open("first_var_regressor.pickle", "wb") as first_var_regressor_file:
38 pickle.dump(first_var_regressor, first_var_regressor_file)
42 boost::python::object
main = boost::python::import(
"__main__");
43 boost::python::object global =
main.attr(
"__dict__");
46 boost::python::exec(first_var_regressor_code.c_str(), global);
47 PyEstimator pyRegressor(
"first_var_regressor.pickle");
48 std::vector<double> variables;
50 const double expected = 42.;
51 variables[0] = expected;
53 double actual = pyRegressor.predict(variables);
54 EXPECT_EQ(expected, actual);
59 remove(
"first_var_regressor.pickle");
62 TEST(TrackFindingCDCTest, PyEstimator_predict_classifier)
66 std::string first_var_classifier_code = R
"code(
68 class FirstVarClassifier(object):
69 def predict_proba(self, ys):
70 signal_proba = ys[0:1, 0:1]
71 bkg_proba = 1.0 - signal_proba
72 result = numpy.hstack([bkg_proba, signal_proba])
75 first_var_classifier = FirstVarClassifier()
77 with open("first_var_classifier.pickle", "wb") as first_var_classifier_file:
78 pickle.dump(first_var_classifier, first_var_classifier_file)
82 boost::python::object
main = boost::python::import(
"__main__");
83 boost::python::object global =
main.attr(
"__dict__");
86 boost::python::exec(first_var_classifier_code.c_str(), global);
87 PyEstimator pyClassifier(
"first_var_classifier.pickle");
88 std::vector<double> variables;
90 const double expected = 0.7;
91 variables[0] = expected;
93 double actual = pyClassifier.predict(variables);
94 EXPECT_EQ(expected, actual);
99 remove(
"first_var_classifier.pickle");
103 TEST(DISABLED_TrackFindingCDCTest, PyEstimator_predict_sklearn_regressor)
107 boost::python::object
main = boost::python::import(
"sklearn");
116 std::string bdt_regressor_code = R
"code(
118 from sklearn import ensemble
119 from sklearn import datasets
120 from sklearn.utils import shuffle
123 boston = datasets.load_boston()
124 x, y = shuffle(boston.data, boston.target, random_state=13)
125 x = x.astype(np.float64)
127 offset = int(x.shape[0] * 0.9)
128 trainX, trainY = x[:offset], y[:offset]
129 testX, testY = x[offset:], y[offset:]
131 params = {'n_estimators': 500, 'max_depth': 4, 'min_samples_split': 1,
132 'learning_rate': 0.01, 'loss': 'ls'}
134 clf = ensemble.GradientBoostingRegressor(**params)
135 clf.fit(trainX, trainY)
138 with open("bdt_regressor.pickle", "wb") as bdt_regressor_file:
139 pickle.dump(clf, bdt_regressor_file)
143 boost::python::object
main = boost::python::import(
"__main__");
144 boost::python::object global =
main.attr(
"__dict__");
148 boost::python::exec(bdt_regressor_code.c_str(), global);
157 boost::python::object testX = global[
"testX"];
158 boost::python::object shape = testX.attr(
"shape");
159 size_t nRowsTestX = boost::python::extract<size_t>(shape[0]);
160 size_t nColsTestX = boost::python::extract<size_t>(shape[1]);
162 std::vector<std::vector<double> > knowledgeX(nRowsTestX);
163 for (
size_t iRowTestX = 0; iRowTestX < nRowsTestX; ++iRowTestX) {
164 boost::python::object testXRow = testX[iRowTestX];
165 std::vector<double>& variables = knowledgeX[iRowTestX];
166 for (
size_t iColTestX = 0; iColTestX < nColsTestX; ++iColTestX) {
167 variables.push_back(boost::python::extract<double>(testXRow[iColTestX]));
171 std::vector<double> predictions;
172 predictions.reserve(nRowsTestX);
175 auto timeItResult = timeIt(1,
false, [&](){
177 for (
size_t iRowTestX = 0; iRowTestX < nRowsTestX; ++iRowTestX) {
178 const std::vector<double>& variables = knowledgeX[iRowTestX];
179 double prediction = pyClassifier.predict(variables);
180 predictions.push_back(prediction);
183 remove(
"bdt_regressor.pickle");
185 timeItResult.printSummary();
186 B2INFO(
"This might be to slow for serious stuff");
189 boost::python::object testY = global[
"testY"];
190 double squareSum = 0;
191 for (
size_t iRowTestX = 0; iRowTestX < nRowsTestX; ++iRowTestX) {
192 double prediction = predictions[iRowTestX];
193 double target = boost::python::extract<double>(testY[iRowTestX]);
194 squareSum += (prediction - target) * (prediction - target);
197 double mean_square_error = squareSum / nRowsTestX;
198 EXPECT_GT(7, mean_square_error);