Belle II Software development
Python.cc
1/**************************************************************************
2 * basf2 (Belle II Analysis Software Framework) *
3 * Author: The Belle II Collaboration *
4 * *
5 * See git log for contributors and copyright holders. *
6 * This file is licensed under LGPL-3.0, see LICENSE.md. *
7 **************************************************************************/
8
9#include <mva/methods/Python.h>
10
11#include <numpy/npy_common.h>
12#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
13#include <numpy/arrayobject.h>
14
15#include <framework/logging/Logger.h>
16#include <framework/utilities/FileSystem.h>
17#include <framework/utilities/TRandomWrapper.h>
18
19#include <boost/uuid/uuid.hpp>
20#include <boost/uuid/uuid_generators.hpp>
21#include <boost/uuid/uuid_io.hpp>
22
23#include <fstream>
24#include <numeric>
25
26namespace Belle2 {
31 namespace MVA {
32 static std::string loadPythonFileAsString(std::string name)
33 {
34 std::string filename = FileSystem::findFile(name);
35 std::ifstream steering_file(filename);
36 if (not steering_file) {
37 throw std::runtime_error(std::string("Couldn't open file ") + filename);
38 }
39 steering_file.seekg(0, std::ios::end);
40 std::string output_string;
41 output_string.resize(steering_file.tellg());
42 steering_file.seekg(0, std::ios::beg);
43 steering_file.read(&output_string[0], output_string.size());
44 return output_string;
45 }
46
47 void PythonOptions::load(const boost::property_tree::ptree& pt)
48 {
49 int version = pt.get<int>("Python_version");
50 if (version < 1 or version > 2) {
51 B2ERROR("Unknown weightfile version " << std::to_string(version));
52 throw std::runtime_error("Unknown weightfile version " + std::to_string(version));
53 }
54 m_framework = pt.get<std::string>("Python_framework");
55 m_steering_file = pt.get<std::string>("Python_steering_file");
56 m_mini_batch_size = pt.get<unsigned int>("Python_mini_batch_size");
57 m_nIterations = pt.get<unsigned int>("Python_n_iterations");
58 m_config = pt.get<std::string>("Python_config");
59 m_training_fraction = pt.get<double>("Python_training_fraction");
60 if (version == 2) {
61 m_normalize = pt.get<bool>("Python_normalize");
62 } else {
63 m_normalize = false;
64 }
65
66 }
67
68 void PythonOptions::save(boost::property_tree::ptree& pt) const
69 {
70 pt.put("Python_version", 2);
71 pt.put("Python_framework", m_framework);
72 pt.put("Python_steering_file", m_steering_file);
73 pt.put("Python_mini_batch_size", m_mini_batch_size);
74 pt.put("Python_n_iterations", m_nIterations);
75 pt.put("Python_config", m_config);
76 pt.put("Python_training_fraction", m_training_fraction);
77 pt.put("Python_normalize", m_normalize);
78 }
79
80 po::options_description PythonOptions::getDescription()
81 {
82 po::options_description description("Python options");
83 description.add_options()
84 ("framework", po::value<std::string>(&m_framework),
85 "Framework which should be used. Currently supported are sklearn, xgboost, tensorflow, keras, torch, and theano")
86 ("steering_file", po::value<std::string>(&m_steering_file), "Steering file which describes the model")
87 ("mini_batch_size", po::value<unsigned int>(&m_mini_batch_size), "Size of the mini batch given to partial_fit function")
88 ("nIterations", po::value<unsigned int>(&m_nIterations), "Number of iterations")
89 ("normalize", po::value<bool>(&m_normalize), "Normalize input data (shift mean to 0 and std to 1)")
90 ("training_fraction", po::value<double>(&m_training_fraction),
91 "Training fraction used to split up dataset in training and validation sample.")
92 ("config", po::value<std::string>(&m_config), "Json encoded python object passed to begin_fit function");
93 return description;
94 }
95
101
102 public:
107
112
113 private:
118 {
119 if (not Py_IsInitialized()) {
120 Py_Initialize();
122 }
123
124 if (PyArray_API == nullptr) {
125 init_numpy();
126 }
127 }
128
133 {
135 if (Py_IsInitialized()) {
136 // We don't finalize Python because this call only frees some memory,
137 // but can cause crashes in loaded python-modules like Theano
138 // https://docs.python.org/3/c-api/init.html
139 // Py_Finalize();
140 }
141 }
142 }
143
150 {
151 // Import array is a macro which returns NUMPY_IMPORT_ARRAY_RETVAL
152 import_array();
153 return nullptr;
154 }
155
156 bool m_initialized_python = false;
157 };
158
164
165
167 const PythonOptions& specific_options) : Teacher(general_options),
168 m_specific_options(specific_options)
169 {
171 }
172
174 {
175
176 Weightfile weightfile;
177 std::string custom_weightfile = weightfile.generateFileName();
178 std::string custom_steeringfile = weightfile.generateFileName();
179
180 uint64_t numberOfFeatures = training_data.getNumberOfFeatures();
181 uint64_t numberOfSpectators = training_data.getNumberOfSpectators();
182 uint64_t numberOfEvents = training_data.getNumberOfEvents();
183
184 if (m_specific_options.m_training_fraction <= 0.0 or m_specific_options.m_training_fraction > 1.0) {
185 B2ERROR("Please provide a positive training fraction");
186 throw std::runtime_error("Please provide a training fraction between (0.0,1.0]");
187 }
188
189 auto numberOfTrainingEvents = static_cast<uint64_t>(numberOfEvents * 100 * m_specific_options.m_training_fraction);
190 numberOfTrainingEvents = numberOfTrainingEvents / 100 + (numberOfTrainingEvents % 100 != 0);
191 auto numberOfValidationEvents = numberOfEvents - numberOfTrainingEvents;
192
193 uint64_t batch_size = m_specific_options.m_mini_batch_size;
194 if (batch_size == 0) {
195 batch_size = numberOfTrainingEvents;
196 }
197
198 if (batch_size > numberOfTrainingEvents) {
199 B2WARNING("Mini batch size (" << batch_size << ") is larger than the number of training events (" << numberOfTrainingEvents << ")"\
200 " The batch size has been set equal to the number of training events.");
201 batch_size = numberOfTrainingEvents;
202 };
203
204 auto X = std::unique_ptr<float[]>(new float[batch_size * numberOfFeatures]);
205 auto S = std::unique_ptr<float[]>(new float[batch_size * numberOfSpectators]);
206 auto y = std::unique_ptr<float[]>(new float[batch_size]);
207 auto w = std::unique_ptr<float[]>(new float[batch_size]);
208 npy_intp dimensions_X[2] = {static_cast<npy_intp>(batch_size), static_cast<npy_intp>(numberOfFeatures)};
209 npy_intp dimensions_S[2] = {static_cast<npy_intp>(batch_size), static_cast<npy_intp>(numberOfSpectators)};
210 npy_intp dimensions_y[2] = {static_cast<npy_intp>(batch_size), 1};
211 npy_intp dimensions_w[2] = {static_cast<npy_intp>(batch_size), 1};
212
213 auto X_v = std::unique_ptr<float[]>(new float[numberOfValidationEvents * numberOfFeatures]);
214 auto S_v = std::unique_ptr<float[]>(new float[numberOfValidationEvents * numberOfSpectators]);
215 auto y_v = std::unique_ptr<float[]>(new float[numberOfValidationEvents]);
216 auto w_v = std::unique_ptr<float[]>(new float[numberOfValidationEvents]);
217 npy_intp dimensions_X_v[2] = {static_cast<npy_intp>(numberOfValidationEvents), static_cast<npy_intp>(numberOfFeatures)};
218 npy_intp dimensions_S_v[2] = {static_cast<npy_intp>(numberOfValidationEvents), static_cast<npy_intp>(numberOfSpectators)};
219 npy_intp dimensions_y_v[2] = {static_cast<npy_intp>(numberOfValidationEvents), 1};
220 npy_intp dimensions_w_v[2] = {static_cast<npy_intp>(numberOfValidationEvents), 1};
221
222 std::string steering_file_source_code;
223 if (m_specific_options.m_steering_file != "") {
224 steering_file_source_code = loadPythonFileAsString(m_specific_options.m_steering_file);
225 }
226
227 std::vector<float> means(numberOfFeatures, 0.0);
228 std::vector<float> stds(numberOfFeatures, 0.0);
229
230 if (m_specific_options.m_normalize) {
231 // Stable calculation of mean and variance with weights
232 // see https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
233 auto weights = training_data.getWeights();
234 for (uint64_t iFeature = 0; iFeature < numberOfFeatures; ++iFeature) {
235 double wSum = 0.0;
236 double mean = 0.0;
237 double running_std = 0.0;
238 auto feature = training_data.getFeature(iFeature);
239 for (uint64_t i = 0; i < weights.size(); ++i) {
240 wSum += weights[i];
241 double meanOld = mean;
242 mean += (weights[i] / wSum) * (feature[i] - meanOld);
243 running_std += weights[i] * (feature[i] - meanOld) * (feature[i] - mean);
244 }
245 means[iFeature] = mean;
246 stds[iFeature] = std::sqrt(running_std / (wSum - 1));
247 }
248 }
249
250 try {
251 // Load python modules
252 auto json = boost::python::import("json");
253 auto builtins = boost::python::import("builtins");
254 auto inspect = boost::python::import("inspect");
255
256 // Create a new empty module with a unique name.
257 // This way we dont end up with multiple mvas trying to overwrite the same apply method with the last one being used by all.
258 boost::python::object type = boost::python::import("types");
259
260 // Generate a unique module
261 boost::uuids::random_generator uuid_gen;
262 std::string unique_mva_module_name = "unique_module_name" + boost::uuids::to_string(uuid_gen());
263 boost::python::object unique_mva_module = type.attr("ModuleType")(unique_mva_module_name.c_str());
264
265 // Find the framework file. Then execute it in the scope of the new module
266 auto framework = boost::python::import((std::string("basf2_mva_python_interface.") + m_specific_options.m_framework).c_str());
267 auto framework_file = framework.attr("__file__");
268 auto framework_file_source_code = loadPythonFileAsString(boost::python::extract<std::string>(boost::python::object(
269 framework_file)));
270 builtins.attr("exec")(framework_file_source_code.c_str(), boost::python::object(unique_mva_module.attr("__dict__")));
271 // Overwrite framework with user-defined code from the steering file
272 builtins.attr("exec")(steering_file_source_code.c_str(), boost::python::object(unique_mva_module.attr("__dict__")));
273
274 // Call get_model with the parameters provided by the user
275 auto parameters = json.attr("loads")(m_specific_options.m_config.c_str());
276 auto model = unique_mva_module.attr("get_model")(numberOfFeatures, numberOfSpectators,
277 numberOfEvents, m_specific_options.m_training_fraction, parameters);
278
279 // Call begin_fit with validation sample
280 for (uint64_t iEvent = 0; iEvent < numberOfValidationEvents; ++iEvent) {
281 training_data.loadEvent(iEvent);
282 if (m_specific_options.m_normalize) {
283 for (uint64_t iFeature = 0; iFeature < numberOfFeatures; ++iFeature)
284 X_v[iEvent * numberOfFeatures + iFeature] = (training_data.m_input[iFeature] - means[iFeature]) / stds[iFeature];
285 } else {
286 for (uint64_t iFeature = 0; iFeature < numberOfFeatures; ++iFeature)
287 X_v[iEvent * numberOfFeatures + iFeature] = training_data.m_input[iFeature];
288 }
289 for (uint64_t iSpectator = 0; iSpectator < numberOfSpectators; ++iSpectator)
290 S_v[iEvent * numberOfSpectators + iSpectator] = training_data.m_spectators[iSpectator];
291 y_v[iEvent] = training_data.m_target;
292 w_v[iEvent] = training_data.m_weight;
293 }
294
295 auto ndarray_X_v = boost::python::handle<>(PyArray_SimpleNewFromData(2, dimensions_X_v, NPY_FLOAT32, X_v.get()));
296 auto ndarray_S_v = boost::python::handle<>(PyArray_SimpleNewFromData(2, dimensions_S_v, NPY_FLOAT32, S_v.get()));
297 auto ndarray_y_v = boost::python::handle<>(PyArray_SimpleNewFromData(2, dimensions_y_v, NPY_FLOAT32, y_v.get()));
298 auto ndarray_w_v = boost::python::handle<>(PyArray_SimpleNewFromData(2, dimensions_w_v, NPY_FLOAT32, w_v.get()));
299
300 uint64_t nBatches = std::floor(numberOfTrainingEvents / batch_size);
301
302 auto state = unique_mva_module.attr("begin_fit")(model, ndarray_X_v, ndarray_S_v, ndarray_y_v, ndarray_w_v, nBatches);
303
304 bool continue_loop = true;
305
306 std::vector<uint64_t> iteration_index_vector(numberOfTrainingEvents);
307 std::iota(std::begin(iteration_index_vector), std::end(iteration_index_vector), 0);
308
309 for (uint64_t iIteration = 0; (iIteration < m_specific_options.m_nIterations or m_specific_options.m_nIterations == 0)
310 and continue_loop; ++iIteration) {
311
312 // shuffle the indices on each iteration to get randomised batches
313 if (iIteration > 0) std::shuffle(std::begin(iteration_index_vector), std::end(iteration_index_vector), TRandomWrapper());
314
315 for (uint64_t iBatch = 0; iBatch < nBatches and continue_loop; ++iBatch) {
316
317 // Release Global Interpreter Lock in python to allow multithreading while reading root files
318 // also see: https://docs.python.org/3.5/c-api/init.html
319 PyThreadState* m_thread_state = PyEval_SaveThread();
320 for (uint64_t iEvent = 0; iEvent < batch_size; ++iEvent) {
321 training_data.loadEvent(iteration_index_vector.at(iEvent + iBatch * batch_size) + numberOfValidationEvents);
322 if (m_specific_options.m_normalize) {
323 for (uint64_t iFeature = 0; iFeature < numberOfFeatures; ++iFeature)
324 X[iEvent * numberOfFeatures + iFeature] = (training_data.m_input[iFeature] - means[iFeature]) / stds[iFeature];
325 } else {
326 for (uint64_t iFeature = 0; iFeature < numberOfFeatures; ++iFeature)
327 X[iEvent * numberOfFeatures + iFeature] = training_data.m_input[iFeature];
328 }
329 for (uint64_t iSpectator = 0; iSpectator < numberOfSpectators; ++iSpectator)
330 S[iEvent * numberOfSpectators + iSpectator] = training_data.m_spectators[iSpectator];
331 y[iEvent] = training_data.m_target;
332 w[iEvent] = training_data.m_weight;
333 }
334
335 // Maybe slow, create ndarrays outside of loop?
336 auto ndarray_X = boost::python::handle<>(PyArray_SimpleNewFromData(2, dimensions_X, NPY_FLOAT32, X.get()));
337 auto ndarray_S = boost::python::handle<>(PyArray_SimpleNewFromData(2, dimensions_S, NPY_FLOAT32, S.get()));
338 auto ndarray_y = boost::python::handle<>(PyArray_SimpleNewFromData(2, dimensions_y, NPY_FLOAT32, y.get()));
339 auto ndarray_w = boost::python::handle<>(PyArray_SimpleNewFromData(2, dimensions_w, NPY_FLOAT32, w.get()));
340
341 // Reactivate Global Interpreter Lock to safely execute python code
342 PyEval_RestoreThread(m_thread_state);
343 auto r = unique_mva_module.attr("partial_fit")(state, ndarray_X, ndarray_S, ndarray_y,
344 ndarray_w, iIteration, iBatch);
345 boost::python::extract<bool> proxy(r);
346 if (proxy.check())
347 continue_loop = static_cast<bool>(proxy);
348 }
349 }
350
351 auto result = unique_mva_module.attr("end_fit")(state);
352
353 auto pickle = boost::python::import("pickle");
354 auto file = builtins.attr("open")(custom_weightfile.c_str(), "wb");
355 pickle.attr("dump")(result, file);
356
357 auto steeringfile = builtins.attr("open")(custom_steeringfile.c_str(), "wb");
358 pickle.attr("dump")(steering_file_source_code.c_str(), steeringfile);
359
360 auto importances = unique_mva_module.attr("feature_importance")(state);
361 if (len(importances) == 0) {
362 B2INFO("Python method returned empty feature importance. There won't be any information about the feature importance in the weightfile.");
363 } else if (numberOfFeatures != static_cast<uint64_t>(len(importances))) {
364 B2WARNING("Python method didn't return the correct number of importance value. I ignore the importances");
365 } else {
366 std::map<std::string, float> feature_importances;
367 for (uint64_t iFeature = 0; iFeature < numberOfFeatures; ++iFeature) {
368 boost::python::extract<float> proxy(importances[iFeature]);
369 if (proxy.check()) {
370 feature_importances[m_general_options.m_variables[iFeature]] = static_cast<float>(proxy);
371 } else {
372 B2WARNING("Failed to convert importance output of the method to a float, using 0 instead");
373 feature_importances[m_general_options.m_variables[iFeature]] = 0.0;
374 }
375 }
376 weightfile.addFeatureImportance(feature_importances);
377 }
378
379 } catch (...) {
380 PyErr_Print();
381 PyErr_Clear();
382 B2ERROR("Failed calling train in PythonTeacher");
383 throw std::runtime_error(std::string("Failed calling train in PythonTeacher"));
384 }
385
386 weightfile.addOptions(m_general_options);
387 weightfile.addOptions(m_specific_options);
388 weightfile.addFile("Python_Weightfile", custom_weightfile);
389 weightfile.addFile("Python_Steeringfile", custom_steeringfile);
390 weightfile.addSignalFraction(training_data.getSignalFraction());
391 if (m_specific_options.m_normalize) {
392 weightfile.addVector("Python_Means", means);
393 weightfile.addVector("Python_Stds", stds);
394 }
395
396 return weightfile;
397
398 }
399
404
405
407 {
408
409 std::string custom_weightfile = weightfile.generateFileName();
410 weightfile.getFile("Python_Weightfile", custom_weightfile);
411 weightfile.getOptions(m_general_options);
412 weightfile.getOptions(m_specific_options);
413
414 if (m_specific_options.m_normalize) {
415 m_means = weightfile.getVector<float>("Python_Means");
416 m_stds = weightfile.getVector<float>("Python_Stds");
417 }
418
419 try {
420 auto pickle = boost::python::import("pickle");
421 auto builtins = boost::python::import("builtins");
422
423 // Create a new empty module with a unique name.
424 // This way we dont end up with multiple mvas trying to implement
425 // the same apply method with the last one being used by all.
426 boost::uuids::random_generator uuid_gen;
427 std::string unique_mva_module_name = "custom_framework_" + boost::uuids::to_string(uuid_gen());
428 boost::python::object type = boost::python::import("types");
429 m_unique_mva_module = type.attr("ModuleType")(unique_mva_module_name.c_str());
430
431 // Find the framework file. Then execute it in the scope of the new module
432 auto framework = boost::python::import((std::string("basf2_mva_python_interface.") + m_specific_options.m_framework).c_str());
433 auto framework_file = framework.attr("__file__");
434 auto framework_file_source_code = loadPythonFileAsString(boost::python::extract<std::string>(boost::python::object(
435 framework_file)));
436 builtins.attr("exec")(framework_file_source_code.c_str(), boost::python::object(m_unique_mva_module.attr("__dict__")));
437
438 // Overwrite framework with user-defined code from the steering file if defined
439 if (weightfile.containsElement("Python_Steeringfile")) {
440 std::string custom_steeringfile = weightfile.generateFileName();
441 weightfile.getFile("Python_Steeringfile", custom_steeringfile);
442 auto steeringfile = builtins.attr("open")(custom_steeringfile.c_str(), "rb");
443 auto source_code = pickle.attr("load")(steeringfile);
444 builtins.attr("exec")(boost::python::object(source_code), boost::python::object(m_unique_mva_module.attr("__dict__")));
445 }
446
447 auto file = builtins.attr("open")(custom_weightfile.c_str(), "rb");
448 auto unpickled_fit_object = pickle.attr("load")(file);
449 m_state = m_unique_mva_module.attr("load")(unpickled_fit_object);
450 } catch (...) {
451 PyErr_Print();
452 PyErr_Clear();
453 B2ERROR("Failed calling load in PythonExpert");
454 throw std::runtime_error("Failed calling load in PythonExpert");
455 }
456
457 }
458
459 std::vector<float> PythonExpert::apply(Dataset& test_data) const
460 {
461
462 uint64_t numberOfFeatures = test_data.getNumberOfFeatures();
463 uint64_t numberOfEvents = test_data.getNumberOfEvents();
464
465 auto X = std::unique_ptr<float[]>(new float[numberOfEvents * numberOfFeatures]);
466 npy_intp dimensions_X[2] = {static_cast<npy_intp>(numberOfEvents), static_cast<npy_intp>(numberOfFeatures)};
467
468 for (uint64_t iEvent = 0; iEvent < numberOfEvents; ++iEvent) {
469 test_data.loadEvent(iEvent);
470 if (m_specific_options.m_normalize) {
471 for (uint64_t iFeature = 0; iFeature < numberOfFeatures; ++iFeature)
472 X[iEvent * numberOfFeatures + iFeature] = (test_data.m_input[iFeature] - m_means[iFeature]) / m_stds[iFeature];
473 } else {
474 for (uint64_t iFeature = 0; iFeature < numberOfFeatures; ++iFeature)
475 X[iEvent * numberOfFeatures + iFeature] = test_data.m_input[iFeature];
476 }
477 }
478
479 std::vector<float> probabilities(test_data.getNumberOfEvents(), std::numeric_limits<float>::quiet_NaN());
480
481 try {
482 auto ndarray_X = boost::python::handle<>(PyArray_SimpleNewFromData(2, dimensions_X, NPY_FLOAT32, X.get()));
483 auto result = m_unique_mva_module.attr("apply")(m_state, ndarray_X);
484 for (uint64_t iEvent = 0; iEvent < numberOfEvents; ++iEvent) {
485 // We have to do some nasty casting here, because the Python C-Api uses structs which are binary compatible
486 // to a PyObject but do not inherit from it!
487 probabilities[iEvent] = static_cast<float>(*static_cast<float*>(PyArray_GETPTR1(reinterpret_cast<PyArrayObject*>(result.ptr()),
488 iEvent)));
489 }
490 } catch (...) {
491 PyErr_Print();
492 PyErr_Clear();
493 B2ERROR("Failed calling applying PythonExpert");
494 throw std::runtime_error("Failed calling applying PythonExpert");
495 }
496
497 return probabilities;
498 }
499
500 std::vector<std::vector<float>> PythonExpert::applyMulticlass(Dataset& test_data) const
501 {
502
503 uint64_t numberOfFeatures = test_data.getNumberOfFeatures();
504 uint64_t numberOfEvents = test_data.getNumberOfEvents();
505
506 auto X = std::unique_ptr<float[]>(new float[numberOfEvents * numberOfFeatures]);
507 npy_intp dimensions_X[2] = {static_cast<npy_intp>(numberOfEvents), static_cast<npy_intp>(numberOfFeatures)};
508
509 for (uint64_t iEvent = 0; iEvent < numberOfEvents; ++iEvent) {
510 test_data.loadEvent(iEvent);
511 if (m_specific_options.m_normalize) {
512 for (uint64_t iFeature = 0; iFeature < numberOfFeatures; ++iFeature)
513 X[iEvent * numberOfFeatures + iFeature] = (test_data.m_input[iFeature] - m_means[iFeature]) / m_stds[iFeature];
514 } else {
515 for (uint64_t iFeature = 0; iFeature < numberOfFeatures; ++iFeature)
516 X[iEvent * numberOfFeatures + iFeature] = test_data.m_input[iFeature];
517 }
518 }
519
520 unsigned int nClasses = m_general_options.m_nClasses;
521 std::vector<std::vector<float>> probabilities(test_data.getNumberOfEvents(), std::vector<float>(nClasses,
522 std::numeric_limits<float>::quiet_NaN()));
523
524 try {
525 auto ndarray_X = boost::python::handle<>(PyArray_SimpleNewFromData(2, dimensions_X, NPY_FLOAT32, X.get()));
526 auto result = m_unique_mva_module.attr("apply")(m_state, ndarray_X);
527 for (uint64_t iEvent = 0; iEvent < numberOfEvents; ++iEvent) {
528 // We have to do some nasty casting here, because the Python C-Api uses structs which are binary compatible
529 // to a PyObject but do not inherit from it!
530 for (uint64_t iClass = 0; iClass < nClasses; ++iClass) {
531 probabilities[iEvent][iClass] = static_cast<float>(*static_cast<float*>(PyArray_GETPTR2(reinterpret_cast<PyArrayObject*>
532 (result.ptr()),
533 iEvent, iClass)));
534 }
535 }
536 } catch (...) {
537 PyErr_Print();
538 PyErr_Clear();
539 B2ERROR("Failed calling applying PythonExpert");
540 throw std::runtime_error("Failed calling applying PythonExpert");
541 }
542
543 return probabilities;
544 }
545 }
547}
static std::string findFile(const std::string &path, bool silent=false)
Search for given file or directory in local or central release directory, and return absolute path if...
Abstract base class of all Datasets given to the MVA interface The current event can always be access...
Definition Dataset.h:33
GeneralOptions m_general_options
General options loaded from the weightfile.
Definition Expert.h:70
General options which are shared by all MVA trainings.
Definition Options.h:62
boost::python::object m_unique_mva_module
python module containing the mva methods
Definition Python.h:141
PythonExpert()
Constructs a new Python Expert.
Definition Python.cc:400
boost::python::object m_state
current state object of method
Definition Python.h:142
std::vector< float > m_stds
Stds of all features for normalization.
Definition Python.h:144
virtual std::vector< float > apply(Dataset &test_data) const override
Apply this expert onto a dataset.
Definition Python.cc:459
PythonOptions m_specific_options
Method specific options.
Definition Python.h:140
virtual void load(Weightfile &weightfile) override
Load the expert from a Weightfile.
Definition Python.cc:406
std::vector< float > m_means
Means of all features for normalization.
Definition Python.h:143
virtual std::vector< std::vector< float > > applyMulticlass(Dataset &test_data) const override
Apply this expert onto a dataset for multiclass problem.
Definition Python.cc:500
void * init_numpy()
Helper function which initializes array system of numpy.
Definition Python.cc:149
~PythonInitializerSingleton()
Destructor of PythonInitializerSingleton.
Definition Python.cc:132
bool m_initialized_python
Member which keeps indicate if this class initialized python.
Definition Python.cc:156
static PythonInitializerSingleton & GetInstance()
Return static instance of PythonInitializerSingleton.
Definition Python.cc:159
PythonInitializerSingleton()
Constructor of PythonInitializerSingleton.
Definition Python.cc:117
PythonInitializerSingleton(const PythonInitializerSingleton &)=delete
Forbid copy constructor of PythonInitializerSingleton.
Options for the Python MVA method.
Definition Python.h:52
unsigned int m_nIterations
Number of iterations through the whole data.
Definition Python.h:81
std::string m_steering_file
steering file provided by the user to override the functions in the framework
Definition Python.h:78
std::string m_framework
framework to use e.g.
Definition Python.h:77
std::string m_config
Config string in json, which is passed to the get model function.
Definition Python.h:79
virtual po::options_description getDescription() override
Returns a program options description for all available options.
Definition Python.cc:80
bool m_normalize
Normalize the inputs (shift mean to zero and std to 1)
Definition Python.h:83
double m_training_fraction
Fraction of data passed as training data, rest is passed as test data.
Definition Python.h:82
virtual void load(const boost::property_tree::ptree &pt) override
Load mechanism to load Options from a xml tree.
Definition Python.cc:47
virtual void save(boost::property_tree::ptree &pt) const override
Save mechanism to store Options in a xml tree.
Definition Python.cc:68
unsigned int m_mini_batch_size
Mini batch size, 0 passes the whole data in one call.
Definition Python.h:80
PythonTeacher(const GeneralOptions &general_options, const PythonOptions &specific_options)
Constructs a new teacher using the GeneralOptions and specific options of this training.
Definition Python.cc:166
PythonOptions m_specific_options
Method specific options.
Definition Python.h:107
virtual Weightfile train(Dataset &training_data) const override
Train a mva method using the given dataset returning a Weightfile.
Definition Python.cc:173
GeneralOptions m_general_options
GeneralOptions containing all shared options.
Definition Teacher.h:49
Teacher(const GeneralOptions &general_options)
Constructs a new teacher using the GeneralOptions for this training.
Definition Teacher.cc:18
The Weightfile class serializes all information about a training into an xml tree.
Definition Weightfile.h:38
void addFile(const std::string &identifier, const std::string &custom_weightfile)
Add a file (mostly a weightfile from a MVA library) to our Weightfile.
bool containsElement(const std::string &identifier) const
Returns true if given element is stored in the property tree.
Definition Weightfile.h:160
std::vector< T > getVector(const std::string &identifier) const
Returns a stored vector from the xml tree.
Definition Weightfile.h:181
void addOptions(const Options &options)
Add an Option object to the xml tree.
Definition Weightfile.cc:62
void getOptions(Options &options) const
Fills an Option object from the xml tree.
Definition Weightfile.cc:67
void addSignalFraction(float signal_fraction)
Saves the signal fraction in the xml tree.
Definition Weightfile.cc:95
void addFeatureImportance(const std::map< std::string, float > &importance)
Add variable importance.
Definition Weightfile.cc:72
void addVector(const std::string &identifier, const std::vector< T > &vector)
Add a vector to the xml tree.
Definition Weightfile.h:125
std::string generateFileName(const std::string &suffix="")
Returns a temporary filename with the given suffix.
void getFile(const std::string &identifier, const std::string &custom_weightfile)
Creates a file from our weightfile (mostly this will be a weightfile of an MVA library)
Abstract base class for different kinds of events.
Wrap TRandom to be usable as a uniform random number generator with STL algorithms like std::shuffle.