Belle II Software  release-05-01-25
PyEstimator.cc
1 /**************************************************************************
2  * BASF2 (Belle Analysis Framework 2) *
3  * Copyright(C) 2015 - Belle II Collaboration *
4  * *
5  * Author: The Belle II Collaboration *
6  * Contributors: Oliver Frost *
7  * *
8  * This software is provided "as is" without any warranty. *
9  **************************************************************************/
10 #include <tracking/trackFindingCDC/mva/PyEstimator.h>
11 #include <framework/utilities/FileSystem.h>
12 
13 #include <boost/python/import.hpp>
14 #include <boost/python/extract.hpp>
15 #include <boost/python/list.hpp>
16 #include <boost/python/tuple.hpp>
17 
18 #include <framework/logging/Logger.h>
19 #include <cstdio>
20 
21 using namespace Belle2;
22 using namespace TrackFindingCDC;
23 
24 PyEstimator::PyEstimator(const std::string& pickleFileName) try
25 : m_pickleFileName(pickleFileName)
26 {
27  // Construct an array with one entry
28  // Expand it once the number of variables is known.
29  boost::python::object numpy = boost::python::import("numpy");
30  boost::python::list initValues;
31  initValues.append(0.0);
32  m_array = numpy.attr("array")(initValues);
33  m_nCurrent = boost::python::len(m_array);
34  // boost::python::object array = boost::python::import("array");
35  // m_array = array.attr("array")("d");
36  unpickleEstimator(pickleFileName);
37 } catch (const boost::python::error_already_set&)
38 {
39  PyErr_Print();
40  B2ERROR("Could not construct PyEstimator from " << pickleFileName);
41 }
42 
43 double PyEstimator::predict(const std::vector<double>& inputVariables)
44 {
45  size_t nVars = inputVariables.size();
46  expand(nVars);
47 
48  for (size_t iVar = 0; iVar < nVars; ++iVar) {
49  m_array[boost::python::make_tuple(0, iVar)] = inputVariables[iVar];
50  }
51  return predict(m_array);
52 }
53 
54 double PyEstimator::predict(const std::vector<NamedFloatTuple*>& floatTuples)
55 {
56  size_t nVars = 0;
57  for (NamedFloatTuple* floatTuple : floatTuples) {
58  nVars += floatTuple->size();
59  }
60  expand(nVars);
61  size_t iVar = 0;
62 
63  for (NamedFloatTuple* floatTuple : floatTuples) {
64  for (size_t iTuple = 0; iTuple < floatTuple->size(); ++iTuple) {
65  m_array[boost::python::make_tuple(0, iVar)] = floatTuple->get(iTuple);
66  ++iVar;
67  }
68  }
69  return predict(m_array);
70 }
71 
72 double PyEstimator::predict(boost::python::object& array)
73 {
74  boost::python::object predictions;
75  try {
76  predictions = m_predict(array);
78  // In case of a binary classification we take the signal probability
79  boost::python::object prediction = predictions[0];
80  return boost::python::extract<double>(prediction[1]);
81  } else {
82  // In case of regression we take the regression value
83  boost::python::object prediction = predictions[0];
84  return boost::python::extract<double>(prediction);
85  }
86  } catch (const boost::python::error_already_set&) {
87  PyErr_Print();
88  B2ERROR("Estimation failed in python object");
89  }
90  return NAN;
91 }
92 
93 void PyEstimator::unpickleEstimator(const std::string& pickleFileName)
94 {
95  try {
96  std::string absPickleFilePath = FileSystem::findFile(pickleFileName);
97  boost::python::object io = boost::python::import("io");
98  boost::python::object pickle = boost::python::import("pickle");
99  boost::python::object pickleFile = io.attr("open")(absPickleFilePath, "rb");
100  boost::python::object estimator = pickle.attr("load")(pickleFile);
101  m_estimator = estimator;
102  } catch (const boost::python::error_already_set&) {
103  PyErr_Print();
104  B2ERROR("Could not open pickle file " << pickleFileName);
105  }
106 
107  try {
108  m_predict = m_estimator.attr("predict_proba");
110  } catch (const boost::python::error_already_set&) {
111  // AttributeError occured, but this is allowed to fail
112  // Clear the exception and carry on.
113  PyErr_Clear();
114  B2INFO("Estimator in " << m_pickleFileName << " is not a binary classifier. Trying as regressor");
115  try {
116  m_predict = m_estimator.attr("predict");
118  } catch (const boost::python::error_already_set&) {
119  PyErr_Print();
120  B2ERROR("Could neither find 'predict' not 'predict_proba' in python estimator from file " << pickleFileName);
121  }
122  }
123 }
124 
125 void PyEstimator::expand(size_t nVars)
126 {
127  m_nCurrent = boost::python::len(m_array);
128  if (nVars == m_nCurrent) return;
129  try {
130  boost::python::object numpy = boost::python::import("numpy");
131  boost::python::object shape = boost::python::make_tuple(1, nVars); // one sample with nVars columns
132  m_array = numpy.attr("resize")(m_array, shape);
133  } catch (const boost::python::error_already_set&) {
134  PyErr_Print();
135  B2ERROR("Resize failed");
136  }
137 }
Belle2::TrackFindingCDC::PyEstimator::m_nCurrent
size_t m_nCurrent
Cache for the current length of the input array.
Definition: PyEstimator.h:70
Belle2::TrackFindingCDC::PyEstimator::m_pickleFileName
std::string m_pickleFileName
File name of the pickle file that contains the trained estimator.
Definition: PyEstimator.h:58
Belle2::TrackFindingCDC::PyEstimator::m_array
boost::python::object m_array
Array to be served to the estimator.
Definition: PyEstimator.h:67
Belle2::TrackFindingCDC::PyEstimator::expand
void expand(size_t nVars)
Reserves space for at least n variable in the input array.
Definition: PyEstimator.cc:125
Belle2::TrackFindingCDC::NamedFloatTuple
An abstract tuple of float value where each value has an associated name.
Definition: NamedFloatTuple.h:37
Belle2::TrackFindingCDC::PyEstimator::m_is_binary_classification
bool m_is_binary_classification
Internal flag to keep track whether a binary classification with predict_proba is evaluated.
Definition: PyEstimator.h:73
Belle2::TrackFindingCDC::PyEstimator::m_estimator
boost::python::object m_estimator
Retrained python estimator object.
Definition: PyEstimator.h:61
Belle2
Abstract base class for different kinds of events.
Definition: MillepedeAlgorithm.h:19
Belle2::TrackFindingCDC::PyEstimator::PyEstimator
PyEstimator(const std::string &pickleFileName)
Construct the Estimator.
Definition: PyEstimator.cc:24
Belle2::TrackFindingCDC::PyEstimator::unpickleEstimator
void unpickleEstimator(const std::string &pickleFileName)
Load the estimator object from the pickled file.
Definition: PyEstimator.cc:93
Belle2::TrackFindingCDC::PyEstimator::m_predict
boost::python::object m_predict
Python bound prediction method - cached to avoid repeated lookup.
Definition: PyEstimator.h:64
Belle2::FileSystem::findFile
static std::string findFile(const std::string &path, bool silent=false)
Search for given file or directory in local or central release directory, and return absolute path if...
Definition: FileSystem.cc:147
Belle2::TrackFindingCDC::PyEstimator::predict
double predict(const std::vector< double > &inputVariables)
Call the predict method of the estimator.
Definition: PyEstimator.cc:43