Belle II Software  light-2303-iriomote
Python.cc
1 /**************************************************************************
2  * basf2 (Belle II Analysis Software Framework) *
3  * Author: The Belle II Collaboration *
4  * *
5  * See git log for contributors and copyright holders. *
6  * This file is licensed under LGPL-3.0, see LICENSE.md. *
7  **************************************************************************/
8 
9 #include <mva/methods/Python.h>
10 
11 #include <boost/filesystem/convenience.hpp>
12 #include <numpy/npy_common.h>
13 #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
14 #include <numpy/arrayobject.h>
15 
16 #include <framework/logging/Logger.h>
17 #include <framework/utilities/FileSystem.h>
18 #include <fstream>
19 #include <numeric>
20 
21 namespace Belle2 {
26  namespace MVA {
27 
28  void PythonOptions::load(const boost::property_tree::ptree& pt)
29  {
30  int version = pt.get<int>("Python_version");
31  if (version < 1 or version > 2) {
32  B2ERROR("Unknown weightfile version " << std::to_string(version));
33  throw std::runtime_error("Unknown weightfile version " + std::to_string(version));
34  }
35  m_framework = pt.get<std::string>("Python_framework");
36  m_steering_file = pt.get<std::string>("Python_steering_file");
37  m_mini_batch_size = pt.get<unsigned int>("Python_mini_batch_size");
38  m_nIterations = pt.get<unsigned int>("Python_n_iterations");
39  m_config = pt.get<std::string>("Python_config");
40  m_training_fraction = pt.get<double>("Python_training_fraction");
41  if (version == 2) {
42  m_normalize = pt.get<bool>("Python_normalize");
43  } else {
44  m_normalize = false;
45  }
46 
47  }
48 
49  void PythonOptions::save(boost::property_tree::ptree& pt) const
50  {
51  pt.put("Python_version", 2);
52  pt.put("Python_framework", m_framework);
53  pt.put("Python_steering_file", m_steering_file);
54  pt.put("Python_mini_batch_size", m_mini_batch_size);
55  pt.put("Python_n_iterations", m_nIterations);
56  pt.put("Python_config", m_config);
57  pt.put("Python_training_fraction", m_training_fraction);
58  pt.put("Python_normalize", m_normalize);
59  }
60 
61  po::options_description PythonOptions::getDescription()
62  {
63  po::options_description description("Python options");
64  description.add_options()
65  ("framework", po::value<std::string>(&m_framework),
66  "Framework which should be used. Currently supported are sklearn, tensorflow and theano")
67  ("steering_file", po::value<std::string>(&m_steering_file), "Steering file which describes")
68  ("mini_batch_size", po::value<unsigned int>(&m_mini_batch_size), "Size of the mini batch given to partial_fit function")
69  ("nIterations", po::value<unsigned int>(&m_nIterations), "Number of iterations")
70  ("normalize", po::value<bool>(&m_normalize), "Normalize input data (shift mean to 0 and std to 1)")
71  ("training_fraction", po::value<double>(&m_training_fraction),
72  "Training fraction used to split up dataset in training and validation sample.")
73  ("config", po::value<std::string>(&m_config), "Json encoded python object passed to begin_fit function");
74  return description;
75  }
76 
82 
83  public:
88 
93 
94  private:
99  {
100  if (not Py_IsInitialized()) {
101  Py_Initialize();
102  // wchar_t* bla[] = {L""};
103  wchar_t** bla = nullptr;
104  PySys_SetArgvEx(0, bla, 0);
105  m_initialized_python = true;
106  }
107 
108  if (PyArray_API == nullptr) {
109  init_numpy();
110  }
111  }
112 
117  {
118  if (m_initialized_python) {
119  if (Py_IsInitialized()) {
120  // We don't finalize Python because this call only frees some memory,
121  // but can cause crashes in loaded python-modules like Theano
122  // https://docs.python.org/3/c-api/init.html
123  // Py_Finalize();
124  }
125  }
126  }
127 
133  void* init_numpy()
134  {
135  // Import array is a macro which returns NUMPY_IMPORT_ARRAY_RETVAL
136  import_array();
137  return nullptr;
138  }
139 
140  bool m_initialized_python = false;
141  };
142 
144  {
145  static PythonInitializerSingleton singleton;
146  return singleton;
147  }
148 
149 
151  const PythonOptions& specific_options) : Teacher(general_options),
152  m_specific_options(specific_options)
153  {
155  }
156 
157 
159  {
160 
161  Weightfile weightfile;
162  std::string custom_weightfile = weightfile.generateFileName();
163  std::string custom_steeringfile = weightfile.generateFileName();
164 
165  uint64_t numberOfFeatures = training_data.getNumberOfFeatures();
166  uint64_t numberOfSpectators = training_data.getNumberOfSpectators();
167  uint64_t numberOfEvents = training_data.getNumberOfEvents();
168 
169  auto numberOfValidationEvents = static_cast<uint64_t>(numberOfEvents * (1 - m_specific_options.m_training_fraction));
170  auto numberOfTrainingEvents = static_cast<uint64_t>(numberOfEvents * m_specific_options.m_training_fraction);
171 
172  uint64_t batch_size = m_specific_options.m_mini_batch_size;
173  if (batch_size == 0) {
174  batch_size = numberOfTrainingEvents;
175  }
176 
177  if (batch_size > numberOfTrainingEvents) {
178  B2WARNING("Mini batch size (" << batch_size << ") is larger than the number of training events (" << numberOfTrainingEvents << ")"\
179  " The batch size has been set equal to the number of training events.");
180  batch_size = numberOfTrainingEvents;
181  };
182 
184  B2ERROR("Please provide a positive training fraction");
185  throw std::runtime_error("Please provide a training fraction between (0.0,1.0]");
186  }
187 
188  auto X = std::unique_ptr<float[]>(new float[batch_size * numberOfFeatures]);
189  auto S = std::unique_ptr<float[]>(new float[batch_size * numberOfSpectators]);
190  auto y = std::unique_ptr<float[]>(new float[batch_size]);
191  auto w = std::unique_ptr<float[]>(new float[batch_size]);
192  npy_intp dimensions_X[2] = {static_cast<npy_intp>(batch_size), static_cast<npy_intp>(numberOfFeatures)};
193  npy_intp dimensions_S[2] = {static_cast<npy_intp>(batch_size), static_cast<npy_intp>(numberOfSpectators)};
194  npy_intp dimensions_y[2] = {static_cast<npy_intp>(batch_size), 1};
195  npy_intp dimensions_w[2] = {static_cast<npy_intp>(batch_size), 1};
196 
197  auto X_v = std::unique_ptr<float[]>(new float[numberOfValidationEvents * numberOfFeatures]);
198  auto S_v = std::unique_ptr<float[]>(new float[numberOfValidationEvents * numberOfSpectators]);
199  auto y_v = std::unique_ptr<float[]>(new float[numberOfValidationEvents]);
200  auto w_v = std::unique_ptr<float[]>(new float[numberOfValidationEvents]);
201  npy_intp dimensions_X_v[2] = {static_cast<npy_intp>(numberOfValidationEvents), static_cast<npy_intp>(numberOfFeatures)};
202  npy_intp dimensions_S_v[2] = {static_cast<npy_intp>(numberOfValidationEvents), static_cast<npy_intp>(numberOfSpectators)};
203  npy_intp dimensions_y_v[2] = {static_cast<npy_intp>(numberOfValidationEvents), 1};
204  npy_intp dimensions_w_v[2] = {static_cast<npy_intp>(numberOfValidationEvents), 1};
205 
206  std::string steering_file_source_code;
209  std::ifstream steering_file(filename);
210  if (not steering_file) {
211  throw std::runtime_error(std::string("Couldn't open file ") + filename);
212  }
213  steering_file.seekg(0, std::ios::end);
214  steering_file_source_code.resize(steering_file.tellg());
215  steering_file.seekg(0, std::ios::beg);
216  steering_file.read(&steering_file_source_code[0], steering_file_source_code.size());
217  }
218 
219  std::vector<float> means(numberOfFeatures, 0.0);
220  std::vector<float> stds(numberOfFeatures, 0.0);
221 
223  // Stable calculation of mean and variance with weights
224  // see https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
225  auto weights = training_data.getWeights();
226  for (uint64_t iFeature = 0; iFeature < numberOfFeatures; ++iFeature) {
227  double wSum = 0.0;
228  double mean = 0.0;
229  double running_std = 0.0;
230  auto feature = training_data.getFeature(iFeature);
231  for (uint64_t i = 0; i < weights.size(); ++i) {
232  wSum += weights[i];
233  double meanOld = mean;
234  mean += (weights[i] / wSum) * (feature[i] - meanOld);
235  running_std += weights[i] * (feature[i] - meanOld) * (feature[i] - mean);
236  }
237  means[iFeature] = mean;
238  stds[iFeature] = std::sqrt(running_std / (wSum - 1));
239  }
240  }
241 
242  try {
243  // Load python modules
244  auto json = boost::python::import("json");
245  auto builtins = boost::python::import("builtins");
246  auto inspect = boost::python::import("inspect");
247 
248  // Load framework
249  auto framework = boost::python::import((std::string("basf2_mva_python_interface.") + m_specific_options.m_framework).c_str());
250  // Overwrite framework with user-defined code from the steering file
251  builtins.attr("exec")(steering_file_source_code.c_str(), boost::python::object(framework.attr("__dict__")));
252 
253  // Call get_model with the parameters provided by the user
254  auto parameters = json.attr("loads")(m_specific_options.m_config.c_str());
255  auto model = framework.attr("get_model")(numberOfFeatures, numberOfSpectators,
256  numberOfEvents, m_specific_options.m_training_fraction, parameters);
257 
258  // Call begin_fit with validation sample
259  for (uint64_t iEvent = 0; iEvent < numberOfValidationEvents; ++iEvent) {
260  training_data.loadEvent(iEvent);
262  for (uint64_t iFeature = 0; iFeature < numberOfFeatures; ++iFeature)
263  X_v[iEvent * numberOfFeatures + iFeature] = (training_data.m_input[iFeature] - means[iFeature]) / stds[iFeature];
264  } else {
265  for (uint64_t iFeature = 0; iFeature < numberOfFeatures; ++iFeature)
266  X_v[iEvent * numberOfFeatures + iFeature] = training_data.m_input[iFeature];
267  }
268  for (uint64_t iSpectator = 0; iSpectator < numberOfSpectators; ++iSpectator)
269  S_v[iEvent * numberOfSpectators + iSpectator] = training_data.m_spectators[iSpectator];
270  y_v[iEvent] = training_data.m_target;
271  w_v[iEvent] = training_data.m_weight;
272  }
273 
274  auto ndarray_X_v = boost::python::handle<>(PyArray_SimpleNewFromData(2, dimensions_X_v, NPY_FLOAT32, X_v.get()));
275  auto ndarray_S_v = boost::python::handle<>(PyArray_SimpleNewFromData(2, dimensions_S_v, NPY_FLOAT32, S_v.get()));
276  auto ndarray_y_v = boost::python::handle<>(PyArray_SimpleNewFromData(2, dimensions_y_v, NPY_FLOAT32, y_v.get()));
277  auto ndarray_w_v = boost::python::handle<>(PyArray_SimpleNewFromData(2, dimensions_w_v, NPY_FLOAT32, w_v.get()));
278 
279  uint64_t nBatches = std::floor(numberOfTrainingEvents / batch_size);
280 
281  auto state = framework.attr("begin_fit")(model, ndarray_X_v, ndarray_S_v, ndarray_y_v, ndarray_w_v, nBatches);
282 
283  bool continue_loop = true;
284 
285  std::vector<uint64_t> iteration_index_vector(numberOfTrainingEvents);
286  std::iota(std::begin(iteration_index_vector), std::end(iteration_index_vector), 0);
287 
288  for (uint64_t iIteration = 0; (iIteration < m_specific_options.m_nIterations or m_specific_options.m_nIterations == 0)
289  and continue_loop; ++iIteration) {
290 
291  // shuffle the indices on each iteration to get randomised batches
292  if (iIteration > 0) std::shuffle(std::begin(iteration_index_vector), std::end(iteration_index_vector), TRandomWrapper());
293 
294  for (uint64_t iBatch = 0; iBatch < nBatches and continue_loop; ++iBatch) {
295 
296  // Release Global Interpreter Lock in python to allow multithreading while reading root files
297  // also see: https://docs.python.org/3.5/c-api/init.html
298  PyThreadState* m_thread_state = PyEval_SaveThread();
299  for (uint64_t iEvent = 0; iEvent < batch_size; ++iEvent) {
300  training_data.loadEvent(iteration_index_vector.at(iEvent + iBatch * batch_size) + numberOfValidationEvents);
302  for (uint64_t iFeature = 0; iFeature < numberOfFeatures; ++iFeature)
303  X[iEvent * numberOfFeatures + iFeature] = (training_data.m_input[iFeature] - means[iFeature]) / stds[iFeature];
304  } else {
305  for (uint64_t iFeature = 0; iFeature < numberOfFeatures; ++iFeature)
306  X[iEvent * numberOfFeatures + iFeature] = training_data.m_input[iFeature];
307  }
308  for (uint64_t iSpectator = 0; iSpectator < numberOfSpectators; ++iSpectator)
309  S[iEvent * numberOfSpectators + iSpectator] = training_data.m_spectators[iSpectator];
310  y[iEvent] = training_data.m_target;
311  w[iEvent] = training_data.m_weight;
312  }
313 
314  // Maybe slow, create ndarrays outside of loop?
315  auto ndarray_X = boost::python::handle<>(PyArray_SimpleNewFromData(2, dimensions_X, NPY_FLOAT32, X.get()));
316  auto ndarray_S = boost::python::handle<>(PyArray_SimpleNewFromData(2, dimensions_S, NPY_FLOAT32, S.get()));
317  auto ndarray_y = boost::python::handle<>(PyArray_SimpleNewFromData(2, dimensions_y, NPY_FLOAT32, y.get()));
318  auto ndarray_w = boost::python::handle<>(PyArray_SimpleNewFromData(2, dimensions_w, NPY_FLOAT32, w.get()));
319 
320  // Reactivate Global Interpreter Lock to safely execute python code
321  PyEval_RestoreThread(m_thread_state);
322  auto r = framework.attr("partial_fit")(state, ndarray_X, ndarray_S, ndarray_y,
323  ndarray_w, iIteration, iBatch);
324  boost::python::extract<bool> proxy(r);
325  if (proxy.check())
326  continue_loop = static_cast<bool>(proxy);
327  }
328  }
329 
330  auto result = framework.attr("end_fit")(state);
331 
332  auto pickle = boost::python::import("pickle");
333  auto file = builtins.attr("open")(custom_weightfile.c_str(), "wb");
334  pickle.attr("dump")(result, file);
335 
336  auto steeringfile = builtins.attr("open")(custom_steeringfile.c_str(), "wb");
337  pickle.attr("dump")(steering_file_source_code.c_str(), steeringfile);
338 
339  auto importances = framework.attr("feature_importance")(state);
340  if (len(importances) == 0) {
341  B2INFO("Python method returned empty feature importance. There won't be any information about the feature importance in the weightfile.");
342  } else if (numberOfFeatures != static_cast<uint64_t>(len(importances))) {
343  B2WARNING("Python method didn't return the correct number of importance value. I ignore the importances");
344  } else {
345  std::map<std::string, float> feature_importances;
346  for (uint64_t iFeature = 0; iFeature < numberOfFeatures; ++iFeature) {
347  boost::python::extract<float> proxy(importances[iFeature]);
348  if (proxy.check()) {
349  feature_importances[m_general_options.m_variables[iFeature]] = static_cast<float>(proxy);
350  } else {
351  B2WARNING("Failed to convert importance output of the method to a float, using 0 instead");
352  feature_importances[m_general_options.m_variables[iFeature]] = 0.0;
353  }
354  }
355  weightfile.addFeatureImportance(feature_importances);
356  }
357 
358  } catch (...) {
359  PyErr_Print();
360  PyErr_Clear();
361  B2ERROR("Failed calling train in PythonTeacher");
362  throw std::runtime_error(std::string("Failed calling train in PythonTeacher"));
363  }
364 
365  weightfile.addOptions(m_general_options);
366  weightfile.addOptions(m_specific_options);
367  weightfile.addFile("Python_Weightfile", custom_weightfile);
368  weightfile.addFile("Python_Steeringfile", custom_steeringfile);
369  weightfile.addSignalFraction(training_data.getSignalFraction());
371  weightfile.addVector("Python_Means", means);
372  weightfile.addVector("Python_Stds", stds);
373  }
374 
375  return weightfile;
376 
377  }
378 
380  {
382  }
383 
384 
385  void PythonExpert::load(Weightfile& weightfile)
386  {
387 
388  std::string custom_weightfile = weightfile.generateFileName();
389  weightfile.getFile("Python_Weightfile", custom_weightfile);
390  weightfile.getOptions(m_general_options);
391  weightfile.getOptions(m_specific_options);
392 
394  m_means = weightfile.getVector<float>("Python_Means");
395  m_stds = weightfile.getVector<float>("Python_Stds");
396  }
397 
398  try {
399  auto pickle = boost::python::import("pickle");
400  auto builtins = boost::python::import("builtins");
401  m_framework = boost::python::import((std::string("basf2_mva_python_interface.") + m_specific_options.m_framework).c_str());
402 
403  if (weightfile.containsElement("Python_Steeringfile")) {
404  std::string custom_steeringfile = weightfile.generateFileName();
405  weightfile.getFile("Python_Steeringfile", custom_steeringfile);
406  auto steeringfile = builtins.attr("open")(custom_steeringfile.c_str(), "rb");
407  auto source_code = pickle.attr("load")(steeringfile);
408  builtins.attr("exec")(boost::python::object(source_code), boost::python::object(m_framework.attr("__dict__")));
409  }
410 
411  auto file = builtins.attr("open")(custom_weightfile.c_str(), "rb");
412  auto unpickled_fit_object = pickle.attr("load")(file);
413  m_state = m_framework.attr("load")(unpickled_fit_object);
414  } catch (...) {
415  PyErr_Print();
416  PyErr_Clear();
417  B2ERROR("Failed calling load in PythonExpert");
418  throw std::runtime_error("Failed calling load in PythonExpert");
419  }
420 
421  }
422 
423  std::vector<float> PythonExpert::apply(Dataset& test_data) const
424  {
425 
426  uint64_t numberOfFeatures = test_data.getNumberOfFeatures();
427  uint64_t numberOfEvents = test_data.getNumberOfEvents();
428 
429  auto X = std::unique_ptr<float[]>(new float[numberOfEvents * numberOfFeatures]);
430  npy_intp dimensions_X[2] = {static_cast<npy_intp>(numberOfEvents), static_cast<npy_intp>(numberOfFeatures)};
431 
432  for (uint64_t iEvent = 0; iEvent < numberOfEvents; ++iEvent) {
433  test_data.loadEvent(iEvent);
435  for (uint64_t iFeature = 0; iFeature < numberOfFeatures; ++iFeature)
436  X[iEvent * numberOfFeatures + iFeature] = (test_data.m_input[iFeature] - m_means[iFeature]) / m_stds[iFeature];
437  } else {
438  for (uint64_t iFeature = 0; iFeature < numberOfFeatures; ++iFeature)
439  X[iEvent * numberOfFeatures + iFeature] = test_data.m_input[iFeature];
440  }
441  }
442 
443  std::vector<float> probabilities(test_data.getNumberOfEvents(), std::numeric_limits<float>::quiet_NaN());
444 
445  try {
446  auto ndarray_X = boost::python::handle<>(PyArray_SimpleNewFromData(2, dimensions_X, NPY_FLOAT32, X.get()));
447  auto result = m_framework.attr("apply")(m_state, ndarray_X);
448  for (uint64_t iEvent = 0; iEvent < numberOfEvents; ++iEvent) {
449  // We have to do some nasty casting here, because the Python C-Api uses structs which are binary compatible
450  // to a PyObject but do not inherit from it!
451  probabilities[iEvent] = static_cast<float>(*static_cast<float*>(PyArray_GETPTR1(reinterpret_cast<PyArrayObject*>(result.ptr()),
452  iEvent)));
453  }
454  } catch (...) {
455  PyErr_Print();
456  PyErr_Clear();
457  B2ERROR("Failed calling applying PythonExpert");
458  throw std::runtime_error("Failed calling applying PythonExpert");
459  }
460 
461  return probabilities;
462  }
463 
464  std::vector<std::vector<float>> PythonExpert::applyMulticlass(Dataset& test_data) const
465  {
466 
467  uint64_t numberOfFeatures = test_data.getNumberOfFeatures();
468  uint64_t numberOfEvents = test_data.getNumberOfEvents();
469 
470  auto X = std::unique_ptr<float[]>(new float[numberOfEvents * numberOfFeatures]);
471  npy_intp dimensions_X[2] = {static_cast<npy_intp>(numberOfEvents), static_cast<npy_intp>(numberOfFeatures)};
472 
473  for (uint64_t iEvent = 0; iEvent < numberOfEvents; ++iEvent) {
474  test_data.loadEvent(iEvent);
476  for (uint64_t iFeature = 0; iFeature < numberOfFeatures; ++iFeature)
477  X[iEvent * numberOfFeatures + iFeature] = (test_data.m_input[iFeature] - m_means[iFeature]) / m_stds[iFeature];
478  } else {
479  for (uint64_t iFeature = 0; iFeature < numberOfFeatures; ++iFeature)
480  X[iEvent * numberOfFeatures + iFeature] = test_data.m_input[iFeature];
481  }
482  }
483 
484  unsigned int nClasses = m_general_options.m_nClasses;
485  std::vector<std::vector<float>> probabilities(test_data.getNumberOfEvents(), std::vector<float>(nClasses,
486  std::numeric_limits<float>::quiet_NaN()));
487 
488  try {
489  auto ndarray_X = boost::python::handle<>(PyArray_SimpleNewFromData(2, dimensions_X, NPY_FLOAT32, X.get()));
490  auto result = m_framework.attr("apply")(m_state, ndarray_X);
491  for (uint64_t iEvent = 0; iEvent < numberOfEvents; ++iEvent) {
492  // We have to do some nasty casting here, because the Python C-Api uses structs which are binary compatible
493  // to a PyObject but do not inherit from it!
494  for (uint64_t iClass = 0; iClass < nClasses; ++iClass) {
495  probabilities[iEvent][iClass] = static_cast<float>(*static_cast<float*>(PyArray_GETPTR2(reinterpret_cast<PyArrayObject*>
496  (result.ptr()),
497  iEvent, iClass)));
498  }
499  }
500  } catch (...) {
501  PyErr_Print();
502  PyErr_Clear();
503  B2ERROR("Failed calling applying PythonExpert");
504  throw std::runtime_error("Failed calling applying PythonExpert");
505  }
506 
507  return probabilities;
508  }
509  }
511 }
static std::string findFile(const std::string &path, bool silent=false)
Search for given file or directory in local or central release directory, and return absolute path if...
Definition: FileSystem.cc:145
Abstract base class of all Datasets given to the MVA interface The current event can always be access...
Definition: Dataset.h:33
GeneralOptions m_general_options
General options loaded from the weightfile.
Definition: Expert.h:70
General options which are shared by all MVA trainings.
Definition: Options.h:62
std::vector< std::string > m_variables
Vector of all variables (branch names) used in the training.
Definition: Options.h:86
unsigned int m_nClasses
Number of classes in a classification problem.
Definition: Options.h:89
PythonExpert()
Constructs a new Python Expert.
Definition: Python.cc:379
boost::python::object m_state
current state object of method
Definition: Python.h:160
std::vector< float > m_stds
Stds of all features for normalization.
Definition: Python.h:162
boost::python::object m_framework
Framework module.
Definition: Python.h:159
virtual std::vector< float > apply(Dataset &test_data) const override
Apply this expert onto a dataset.
Definition: Python.cc:423
PythonOptions m_specific_options
Method specific options.
Definition: Python.h:158
virtual void load(Weightfile &weightfile) override
Load the expert from a Weightfile.
Definition: Python.cc:385
std::vector< float > m_means
Means of all features for normalization.
Definition: Python.h:161
virtual std::vector< std::vector< float > > applyMulticlass(Dataset &test_data) const override
Apply this expert onto a dataset for multiclass problem.
Definition: Python.cc:464
Singleton class which handles the initialization and finalization of Python and numpy.
Definition: Python.cc:81
void * init_numpy()
Helper function which initializes array system of numpy.
Definition: Python.cc:133
~PythonInitializerSingleton()
Destructor of PythonInitializerSingleton.
Definition: Python.cc:116
bool m_initialized_python
Member which keeps indicate if this class initialized python.
Definition: Python.cc:140
static PythonInitializerSingleton & GetInstance()
Return static instance of PythonInitializerSingleton.
Definition: Python.cc:143
PythonInitializerSingleton()
Constructor of PythonInitializerSingleton.
Definition: Python.cc:98
PythonInitializerSingleton(const PythonInitializerSingleton &)=delete
Forbid copy constructor of PythonInitializerSingleton.
Options for the Python MVA method.
Definition: Python.h:54
unsigned int m_nIterations
Number of iterations through the whole data.
Definition: Python.h:83
std::string m_steering_file
steering file provided by the user to override the functions in the framework
Definition: Python.h:80
std::string m_framework
framework to use e.g.
Definition: Python.h:79
std::string m_config
Config string in json, which is passed to the get model function.
Definition: Python.h:81
virtual po::options_description getDescription() override
Returns a program options description for all available options.
Definition: Python.cc:61
bool m_normalize
Normalize the inputs (shift mean to zero and std to 1)
Definition: Python.h:85
double m_training_fraction
Fraction of data passed as training data, rest is passed as test data.
Definition: Python.h:84
virtual void load(const boost::property_tree::ptree &pt) override
Load mechanism to load Options from a xml tree.
Definition: Python.cc:28
virtual void save(boost::property_tree::ptree &pt) const override
Save mechanism to store Options in a xml tree.
Definition: Python.cc:49
unsigned int m_mini_batch_size
Mini batch size, 0 passes the whole data in one call.
Definition: Python.h:82
PythonTeacher(const GeneralOptions &general_options, const PythonOptions &specific_options)
Constructs a new teacher using the GeneralOptions and specific options of this training.
Definition: Python.cc:150
PythonOptions m_specific_options
Method specific options.
Definition: Python.h:109
virtual Weightfile train(Dataset &training_data) const override
Train a mva method using the given dataset returning a Weightfile.
Definition: Python.cc:158
Abstract base class of all Teachers Each MVA library has its own implementation of this class,...
Definition: Teacher.h:29
GeneralOptions m_general_options
GeneralOptions containing all shared options.
Definition: Teacher.h:49
The Weightfile class serializes all information about a training into an xml tree.
Definition: Weightfile.h:38
void addFile(const std::string &identifier, const std::string &custom_weightfile)
Add a file (mostly a weightfile from a MVA library) to our Weightfile.
Definition: Weightfile.cc:114
bool containsElement(const std::string &identifier) const
Returns true if given element is stored in the property tree.
Definition: Weightfile.h:160
void addOptions(const Options &options)
Add an Option object to the xml tree.
Definition: Weightfile.cc:61
std::vector< T > getVector(const std::string &identifier) const
Returns a stored vector from the xml tree.
Definition: Weightfile.h:181
void getOptions(Options &options) const
Fills an Option object from the xml tree.
Definition: Weightfile.cc:66
void addSignalFraction(float signal_fraction)
Saves the signal fraction in the xml tree.
Definition: Weightfile.cc:94
void addFeatureImportance(const std::map< std::string, float > &importance)
Add variable importance.
Definition: Weightfile.cc:71
void addVector(const std::string &identifier, const std::vector< T > &vector)
Add a vector to the xml tree.
Definition: Weightfile.h:125
std::string generateFileName(const std::string &suffix="")
Returns a temporary filename with the given suffix.
Definition: Weightfile.cc:104
void getFile(const std::string &identifier, const std::string &custom_weightfile)
Creates a file from our weightfile (mostly this will be a weightfile of an MVA library)
Definition: Weightfile.cc:137
Abstract base class for different kinds of events.
Definition: ClusterUtils.h:23
Wrap TRandom to be useable as a uniform random number generator with std algorithms like std::shuffle...
Definition: Python.h:113