Belle II Software development
Python.cc
1/**************************************************************************
2 * basf2 (Belle II Analysis Software Framework) *
3 * Author: The Belle II Collaboration *
4 * *
5 * See git log for contributors and copyright holders. *
6 * This file is licensed under LGPL-3.0, see LICENSE.md. *
7 **************************************************************************/
8
9#include <mva/methods/Python.h>
10
11#include <numpy/npy_common.h>
12#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
13#include <numpy/arrayobject.h>
14
15#include <framework/logging/Logger.h>
16#include <framework/utilities/FileSystem.h>
17#include <framework/utilities/TRandomWrapper.h>
18
19#include <boost/uuid/uuid.hpp>
20#include <boost/uuid/uuid_generators.hpp>
21#include <boost/uuid/uuid_io.hpp>
22
23#include <fstream>
24#include <numeric>
25
26namespace Belle2 {
31 namespace MVA {
32 static std::string loadPythonFileAsString(std::string name)
33 {
34 std::string filename = FileSystem::findFile(name);
35 std::ifstream steering_file(filename);
36 if (not steering_file) {
37 throw std::runtime_error(std::string("Couldn't open file ") + filename);
38 }
39 steering_file.seekg(0, std::ios::end);
40 std::string output_string;
41 output_string.resize(steering_file.tellg());
42 steering_file.seekg(0, std::ios::beg);
43 steering_file.read(&output_string[0], output_string.size());
44 return output_string;
45 }
46
47 void PythonOptions::load(const boost::property_tree::ptree& pt)
48 {
49 int version = pt.get<int>("Python_version");
50 if (version < 1 or version > 2) {
51 B2ERROR("Unknown weightfile version " << std::to_string(version));
52 throw std::runtime_error("Unknown weightfile version " + std::to_string(version));
53 }
54 m_framework = pt.get<std::string>("Python_framework");
55 m_steering_file = pt.get<std::string>("Python_steering_file");
56 m_mini_batch_size = pt.get<unsigned int>("Python_mini_batch_size");
57 m_nIterations = pt.get<unsigned int>("Python_n_iterations");
58 m_config = pt.get<std::string>("Python_config");
59 m_training_fraction = pt.get<double>("Python_training_fraction");
60 if (version == 2) {
61 m_normalize = pt.get<bool>("Python_normalize");
62 } else {
63 m_normalize = false;
64 }
65
66 }
67
68 void PythonOptions::save(boost::property_tree::ptree& pt) const
69 {
70 pt.put("Python_version", 2);
71 pt.put("Python_framework", m_framework);
72 pt.put("Python_steering_file", m_steering_file);
73 pt.put("Python_mini_batch_size", m_mini_batch_size);
74 pt.put("Python_n_iterations", m_nIterations);
75 pt.put("Python_config", m_config);
76 pt.put("Python_training_fraction", m_training_fraction);
77 pt.put("Python_normalize", m_normalize);
78 }
79
80 po::options_description PythonOptions::getDescription()
81 {
82 po::options_description description("Python options");
83 description.add_options()
84 ("framework", po::value<std::string>(&m_framework),
85 "Framework which should be used. Currently supported are sklearn, xgboost, tensorflow, keras and torch")
86 ("steering_file", po::value<std::string>(&m_steering_file), "Steering file which describes the model")
87 ("mini_batch_size", po::value<unsigned int>(&m_mini_batch_size), "Size of the mini batch given to partial_fit function")
88 ("nIterations", po::value<unsigned int>(&m_nIterations), "Number of iterations")
89 ("normalize", po::value<bool>(&m_normalize), "Normalize input data (shift mean to 0 and std to 1)")
90 ("training_fraction", po::value<double>(&m_training_fraction),
91 "Training fraction used to split up dataset in training and validation sample.")
92 ("config", po::value<std::string>(&m_config), "Json encoded python object passed to begin_fit function");
93 return description;
94 }
95
101
102 public:
107
112
113 private:
118 {
119 if (not Py_IsInitialized()) {
120 Py_Initialize();
122 }
123
124 if (PyArray_API == nullptr) {
125 init_numpy();
126 }
127 }
128
133 {
135 if (Py_IsInitialized()) {
136 // We don't finalize Python because this call only frees some memory,
137 // but can cause crashes in loaded python-modules
138 // https://docs.python.org/3/c-api/init.html
139 // Py_Finalize();
140 }
141 }
142 }
143
150 {
151 // Import array is a macro which returns NUMPY_IMPORT_ARRAY_RETVAL
152 import_array();
153 return nullptr;
154 }
155
156 bool m_initialized_python = false;
157 };
158
164
165
167 const PythonOptions& specific_options) : Teacher(general_options),
168 m_specific_options(specific_options)
169 {
171 }
172
174 {
175
176 Weightfile weightfile;
177 std::string custom_weightfile = weightfile.generateFileName();
178 std::string custom_steeringfile = weightfile.generateFileName();
179
180 uint64_t numberOfFeatures = training_data.getNumberOfFeatures();
181 uint64_t numberOfSpectators = training_data.getNumberOfSpectators();
182 uint64_t numberOfEvents = training_data.getNumberOfEvents();
183
184 if (m_specific_options.m_training_fraction <= 0.0 or m_specific_options.m_training_fraction > 1.0) {
185 B2ERROR("Please provide a positive training fraction");
186 throw std::runtime_error("Please provide a training fraction between (0.0,1.0]");
187 }
188
189 auto numberOfTrainingEvents = static_cast<uint64_t>(numberOfEvents * 100 * m_specific_options.m_training_fraction);
190 numberOfTrainingEvents = numberOfTrainingEvents / 100 + (numberOfTrainingEvents % 100 != 0);
191 auto numberOfValidationEvents = numberOfEvents - numberOfTrainingEvents;
192
193 uint64_t batch_size = m_specific_options.m_mini_batch_size;
194 if (batch_size == 0) {
195 batch_size = numberOfTrainingEvents;
196 }
197
198 if (batch_size > numberOfTrainingEvents) {
199 B2WARNING("Mini batch size (" << batch_size << ") is larger than the number of training events (" << numberOfTrainingEvents << ")"\
200 " The batch size has been set equal to the number of training events.");
201 batch_size = numberOfTrainingEvents;
202 };
203
204 auto X = std::unique_ptr<float[]>(new float[batch_size * numberOfFeatures]);
205 auto S = std::unique_ptr<float[]>(new float[batch_size * numberOfSpectators]);
206 auto y = std::unique_ptr<float[]>(new float[batch_size]);
207 auto w = std::unique_ptr<float[]>(new float[batch_size]);
208 npy_intp dimensions_X[2] = {static_cast<npy_intp>(batch_size), static_cast<npy_intp>(numberOfFeatures)};
209 npy_intp dimensions_S[2] = {static_cast<npy_intp>(batch_size), static_cast<npy_intp>(numberOfSpectators)};
210 npy_intp dimensions_y[2] = {static_cast<npy_intp>(batch_size), 1};
211 npy_intp dimensions_w[2] = {static_cast<npy_intp>(batch_size), 1};
212
213 auto X_v = std::unique_ptr<float[]>(new float[numberOfValidationEvents * numberOfFeatures]);
214 auto S_v = std::unique_ptr<float[]>(new float[numberOfValidationEvents * numberOfSpectators]);
215 auto y_v = std::unique_ptr<float[]>(new float[numberOfValidationEvents]);
216 auto w_v = std::unique_ptr<float[]>(new float[numberOfValidationEvents]);
217 npy_intp dimensions_X_v[2] = {static_cast<npy_intp>(numberOfValidationEvents), static_cast<npy_intp>(numberOfFeatures)};
218 npy_intp dimensions_S_v[2] = {static_cast<npy_intp>(numberOfValidationEvents), static_cast<npy_intp>(numberOfSpectators)};
219 npy_intp dimensions_y_v[2] = {static_cast<npy_intp>(numberOfValidationEvents), 1};
220 npy_intp dimensions_w_v[2] = {static_cast<npy_intp>(numberOfValidationEvents), 1};
221
222 std::string steering_file_source_code;
223 if (m_specific_options.m_steering_file != "") {
224 steering_file_source_code = loadPythonFileAsString(m_specific_options.m_steering_file);
225 }
226
227 std::vector<float> means(numberOfFeatures, 0.0);
228 std::vector<float> stds(numberOfFeatures, 0.0);
229
230 if (m_specific_options.m_normalize) {
231 // Stable calculation of mean and variance with weights
232 // see https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
233 auto weights = training_data.getWeights();
234 for (uint64_t iFeature = 0; iFeature < numberOfFeatures; ++iFeature) {
235 double wSum = 0.0;
236 double mean = 0.0;
237 double running_std = 0.0;
238 auto feature = training_data.getFeature(iFeature);
239 for (uint64_t i = 0; i < weights.size(); ++i) {
240 wSum += weights[i];
241 double meanOld = mean;
242 mean += (weights[i] / wSum) * (feature[i] - meanOld);
243 running_std += weights[i] * (feature[i] - meanOld) * (feature[i] - mean);
244 }
245 means[iFeature] = mean;
246 stds[iFeature] = std::sqrt(running_std / (wSum - 1));
247 }
248 }
249
250 try {
251 // Load python modules
252 auto json = boost::python::import("json");
253 auto builtins = boost::python::import("builtins");
254 auto inspect = boost::python::import("inspect");
255
256 // Create a new empty module with a unique name.
257 // This way we dont end up with multiple mvas trying to overwrite the same apply method with the last one being used by all.
258 boost::python::object type = boost::python::import("types");
259
260 // Generate a unique module
261 boost::uuids::random_generator uuid_gen;
262 std::string unique_mva_module_name = "unique_module_name" + boost::uuids::to_string(uuid_gen());
263 boost::python::object unique_mva_module = type.attr("ModuleType")(unique_mva_module_name.c_str());
264
265 // Find the framework file. Then execute it in the scope of the new module
266 auto framework = boost::python::import((std::string("basf2_mva_python_interface.") + m_specific_options.m_framework).c_str());
267 auto framework_file = framework.attr("__file__");
268 boost::python::extract<std::string> extractor(framework_file);
269 std::string framework_filename = extractor();
270 auto framework_file_source_code = loadPythonFileAsString(framework_filename);
271 builtins.attr("exec")(framework_file_source_code.c_str(), boost::python::object(unique_mva_module.attr("__dict__")));
272 // Overwrite framework with user-defined code from the steering file
273 builtins.attr("exec")(steering_file_source_code.c_str(), boost::python::object(unique_mva_module.attr("__dict__")));
274
275 // Call get_model with the parameters provided by the user
276 auto parameters = json.attr("loads")(m_specific_options.m_config.c_str());
277 auto model = unique_mva_module.attr("get_model")(numberOfFeatures, numberOfSpectators,
278 numberOfEvents, m_specific_options.m_training_fraction, parameters);
279
280 // Call begin_fit with validation sample
281 for (uint64_t iEvent = 0; iEvent < numberOfValidationEvents; ++iEvent) {
282 training_data.loadEvent(iEvent);
283 if (m_specific_options.m_normalize) {
284 for (uint64_t iFeature = 0; iFeature < numberOfFeatures; ++iFeature)
285 X_v[iEvent * numberOfFeatures + iFeature] = (training_data.m_input[iFeature] - means[iFeature]) / stds[iFeature];
286 } else {
287 for (uint64_t iFeature = 0; iFeature < numberOfFeatures; ++iFeature)
288 X_v[iEvent * numberOfFeatures + iFeature] = training_data.m_input[iFeature];
289 }
290 for (uint64_t iSpectator = 0; iSpectator < numberOfSpectators; ++iSpectator)
291 S_v[iEvent * numberOfSpectators + iSpectator] = training_data.m_spectators[iSpectator];
292 y_v[iEvent] = training_data.m_target;
293 w_v[iEvent] = training_data.m_weight;
294 }
295
296 auto ndarray_X_v = boost::python::handle<>(PyArray_SimpleNewFromData(2, dimensions_X_v, NPY_FLOAT32, X_v.get()));
297 auto ndarray_S_v = boost::python::handle<>(PyArray_SimpleNewFromData(2, dimensions_S_v, NPY_FLOAT32, S_v.get()));
298 auto ndarray_y_v = boost::python::handle<>(PyArray_SimpleNewFromData(2, dimensions_y_v, NPY_FLOAT32, y_v.get()));
299 auto ndarray_w_v = boost::python::handle<>(PyArray_SimpleNewFromData(2, dimensions_w_v, NPY_FLOAT32, w_v.get()));
300
301 uint64_t nBatches = std::floor(numberOfTrainingEvents / batch_size);
302
303 auto state = unique_mva_module.attr("begin_fit")(model, ndarray_X_v, ndarray_S_v, ndarray_y_v, ndarray_w_v, nBatches);
304
305 bool continue_loop = true;
306
307 std::vector<uint64_t> iteration_index_vector(numberOfTrainingEvents);
308 std::iota(std::begin(iteration_index_vector), std::end(iteration_index_vector), 0);
309
310 for (uint64_t iIteration = 0; (iIteration < m_specific_options.m_nIterations or m_specific_options.m_nIterations == 0)
311 and continue_loop; ++iIteration) {
312
313 // shuffle the indices on each iteration to get randomised batches
314 if (iIteration > 0) std::shuffle(std::begin(iteration_index_vector), std::end(iteration_index_vector), TRandomWrapper());
315
316 for (uint64_t iBatch = 0; iBatch < nBatches and continue_loop; ++iBatch) {
317
318 // Release Global Interpreter Lock in python to allow multithreading while reading root files
319 // also see: https://docs.python.org/3.5/c-api/init.html
320 Py_BEGIN_ALLOW_THREADS;
321 for (uint64_t iEvent = 0; iEvent < batch_size; ++iEvent) {
322 training_data.loadEvent(iteration_index_vector.at(iEvent + iBatch * batch_size) + numberOfValidationEvents);
323 if (m_specific_options.m_normalize) {
324 for (uint64_t iFeature = 0; iFeature < numberOfFeatures; ++iFeature)
325 X[iEvent * numberOfFeatures + iFeature] = (training_data.m_input[iFeature] - means[iFeature]) / stds[iFeature];
326 } else {
327 for (uint64_t iFeature = 0; iFeature < numberOfFeatures; ++iFeature)
328 X[iEvent * numberOfFeatures + iFeature] = training_data.m_input[iFeature];
329 }
330 for (uint64_t iSpectator = 0; iSpectator < numberOfSpectators; ++iSpectator)
331 S[iEvent * numberOfSpectators + iSpectator] = training_data.m_spectators[iSpectator];
332 y[iEvent] = training_data.m_target;
333 w[iEvent] = training_data.m_weight;
334 }
335
336 // Reactivate Global Interpreter Lock to safely execute python code
337 Py_END_ALLOW_THREADS;
338
339 // Maybe slow, create ndarrays outside of loop?
340 auto ndarray_X = boost::python::handle<>(PyArray_SimpleNewFromData(2, dimensions_X, NPY_FLOAT32, X.get()));
341 auto ndarray_S = boost::python::handle<>(PyArray_SimpleNewFromData(2, dimensions_S, NPY_FLOAT32, S.get()));
342 auto ndarray_y = boost::python::handle<>(PyArray_SimpleNewFromData(2, dimensions_y, NPY_FLOAT32, y.get()));
343 auto ndarray_w = boost::python::handle<>(PyArray_SimpleNewFromData(2, dimensions_w, NPY_FLOAT32, w.get()));
344
345 auto r = unique_mva_module.attr("partial_fit")(state, ndarray_X, ndarray_S, ndarray_y,
346 ndarray_w, iIteration, iBatch);
347 boost::python::extract<bool> proxy(r);
348 if (proxy.check())
349 continue_loop = static_cast<bool>(proxy);
350 }
351 }
352
353 auto result = unique_mva_module.attr("end_fit")(state);
354
355 auto pickle = boost::python::import("pickle");
356 auto file = builtins.attr("open")(custom_weightfile.c_str(), "wb");
357 pickle.attr("dump")(result, file);
358
359 auto steeringfile = builtins.attr("open")(custom_steeringfile.c_str(), "wb");
360 pickle.attr("dump")(steering_file_source_code.c_str(), steeringfile);
361
362 auto importances = unique_mva_module.attr("feature_importance")(state);
363 if (len(importances) == 0) {
364 B2INFO("Python method returned empty feature importance. There won't be any information about the feature importance in the weightfile.");
365 } else if (numberOfFeatures != static_cast<uint64_t>(len(importances))) {
366 B2WARNING("Python method didn't return the correct number of importance value. I ignore the importances");
367 } else {
368 std::map<std::string, float> feature_importances;
369 for (uint64_t iFeature = 0; iFeature < numberOfFeatures; ++iFeature) {
370 boost::python::extract<float> proxy(importances[iFeature]);
371 if (proxy.check()) {
372 feature_importances[m_general_options.m_variables[iFeature]] = static_cast<float>(proxy);
373 } else {
374 B2WARNING("Failed to convert importance output of the method to a float, using 0 instead");
375 feature_importances[m_general_options.m_variables[iFeature]] = 0.0;
376 }
377 }
378 weightfile.addFeatureImportance(feature_importances);
379 }
380
381 } catch (...) {
382 PyErr_Print();
383 PyErr_Clear();
384 B2ERROR("Failed calling train in PythonTeacher");
385 throw std::runtime_error(std::string("Failed calling train in PythonTeacher"));
386 }
387
388 weightfile.addOptions(m_general_options);
389 weightfile.addOptions(m_specific_options);
390 weightfile.addFile("Python_Weightfile", custom_weightfile);
391 weightfile.addFile("Python_Steeringfile", custom_steeringfile);
392 weightfile.addSignalFraction(training_data.getSignalFraction());
393 if (m_specific_options.m_normalize) {
394 weightfile.addVector("Python_Means", means);
395 weightfile.addVector("Python_Stds", stds);
396 }
397
398 return weightfile;
399
400 }
401
406
407
409 {
410
411 std::string custom_weightfile = weightfile.generateFileName();
412 weightfile.getFile("Python_Weightfile", custom_weightfile);
413 weightfile.getOptions(m_general_options);
414 weightfile.getOptions(m_specific_options);
415
416 if (m_specific_options.m_normalize) {
417 m_means = weightfile.getVector<float>("Python_Means");
418 m_stds = weightfile.getVector<float>("Python_Stds");
419 }
420
421 try {
422 auto pickle = boost::python::import("pickle");
423 auto builtins = boost::python::import("builtins");
424
425 // Create a new empty module with a unique name.
426 // This way we dont end up with multiple mvas trying to implement
427 // the same apply method with the last one being used by all.
428 boost::uuids::random_generator uuid_gen;
429 std::string unique_mva_module_name = "custom_framework_" + boost::uuids::to_string(uuid_gen());
430 boost::python::object type = boost::python::import("types");
431 m_unique_mva_module = type.attr("ModuleType")(unique_mva_module_name.c_str());
432
433 // Find the framework file. Then execute it in the scope of the new module
434 auto framework = boost::python::import((std::string("basf2_mva_python_interface.") + m_specific_options.m_framework).c_str());
435 auto framework_file = framework.attr("__file__");
436 boost::python::extract<std::string> extractor(framework_file);
437 std::string framework_filename = extractor();
438 auto framework_file_source_code = loadPythonFileAsString(framework_filename);
439 builtins.attr("exec")(framework_file_source_code.c_str(), boost::python::object(m_unique_mva_module.attr("__dict__")));
440
441 // Overwrite framework with user-defined code from the steering file if defined
442 if (weightfile.containsElement("Python_Steeringfile")) {
443 std::string custom_steeringfile = weightfile.generateFileName();
444 weightfile.getFile("Python_Steeringfile", custom_steeringfile);
445 auto steeringfile = builtins.attr("open")(custom_steeringfile.c_str(), "rb");
446 auto source_code = pickle.attr("load")(steeringfile);
447 builtins.attr("exec")(boost::python::object(source_code), boost::python::object(m_unique_mva_module.attr("__dict__")));
448 }
449
450 auto file = builtins.attr("open")(custom_weightfile.c_str(), "rb");
451 auto unpickled_fit_object = pickle.attr("load")(file);
452 m_state = m_unique_mva_module.attr("load")(unpickled_fit_object);
453 } catch (...) {
454 PyErr_Print();
455 PyErr_Clear();
456 B2ERROR("Failed calling load in PythonExpert");
457 throw std::runtime_error("Failed calling load in PythonExpert");
458 }
459
460 }
461
462 std::vector<float> PythonExpert::apply(Dataset& test_data) const
463 {
464
465 uint64_t numberOfFeatures = test_data.getNumberOfFeatures();
466 uint64_t numberOfEvents = test_data.getNumberOfEvents();
467
468 auto X = std::unique_ptr<float[]>(new float[numberOfEvents * numberOfFeatures]);
469 npy_intp dimensions_X[2] = {static_cast<npy_intp>(numberOfEvents), static_cast<npy_intp>(numberOfFeatures)};
470
471 for (uint64_t iEvent = 0; iEvent < numberOfEvents; ++iEvent) {
472 test_data.loadEvent(iEvent);
473 if (m_specific_options.m_normalize) {
474 for (uint64_t iFeature = 0; iFeature < numberOfFeatures; ++iFeature)
475 X[iEvent * numberOfFeatures + iFeature] = (test_data.m_input[iFeature] - m_means[iFeature]) / m_stds[iFeature];
476 } else {
477 for (uint64_t iFeature = 0; iFeature < numberOfFeatures; ++iFeature)
478 X[iEvent * numberOfFeatures + iFeature] = test_data.m_input[iFeature];
479 }
480 }
481
482 std::vector<float> probabilities(test_data.getNumberOfEvents(), std::numeric_limits<float>::quiet_NaN());
483
484 try {
485 auto ndarray_X = boost::python::handle<>(PyArray_SimpleNewFromData(2, dimensions_X, NPY_FLOAT32, X.get()));
486 auto result = m_unique_mva_module.attr("apply")(m_state, ndarray_X);
487 for (uint64_t iEvent = 0; iEvent < numberOfEvents; ++iEvent) {
488 // We have to do some nasty casting here, because the Python C-Api uses structs which are binary compatible
489 // to a PyObject but do not inherit from it!
490 probabilities[iEvent] = static_cast<float>(*static_cast<float*>(PyArray_GETPTR1(reinterpret_cast<PyArrayObject*>(result.ptr()),
491 iEvent)));
492 }
493 } catch (...) {
494 PyErr_Print();
495 PyErr_Clear();
496 B2ERROR("Failed calling applying PythonExpert");
497 throw std::runtime_error("Failed calling applying PythonExpert");
498 }
499
500 return probabilities;
501 }
502
503 std::vector<std::vector<float>> PythonExpert::applyMulticlass(Dataset& test_data) const
504 {
505
506 uint64_t numberOfFeatures = test_data.getNumberOfFeatures();
507 uint64_t numberOfEvents = test_data.getNumberOfEvents();
508
509 auto X = std::unique_ptr<float[]>(new float[numberOfEvents * numberOfFeatures]);
510 npy_intp dimensions_X[2] = {static_cast<npy_intp>(numberOfEvents), static_cast<npy_intp>(numberOfFeatures)};
511
512 for (uint64_t iEvent = 0; iEvent < numberOfEvents; ++iEvent) {
513 test_data.loadEvent(iEvent);
514 if (m_specific_options.m_normalize) {
515 for (uint64_t iFeature = 0; iFeature < numberOfFeatures; ++iFeature)
516 X[iEvent * numberOfFeatures + iFeature] = (test_data.m_input[iFeature] - m_means[iFeature]) / m_stds[iFeature];
517 } else {
518 for (uint64_t iFeature = 0; iFeature < numberOfFeatures; ++iFeature)
519 X[iEvent * numberOfFeatures + iFeature] = test_data.m_input[iFeature];
520 }
521 }
522
523 unsigned int nClasses = m_general_options.m_nClasses;
524 std::vector<std::vector<float>> probabilities(test_data.getNumberOfEvents(), std::vector<float>(nClasses,
525 std::numeric_limits<float>::quiet_NaN()));
526
527 try {
528 auto ndarray_X = boost::python::handle<>(PyArray_SimpleNewFromData(2, dimensions_X, NPY_FLOAT32, X.get()));
529 auto result = m_unique_mva_module.attr("apply")(m_state, ndarray_X);
530 for (uint64_t iEvent = 0; iEvent < numberOfEvents; ++iEvent) {
531 // We have to do some nasty casting here, because the Python C-Api uses structs which are binary compatible
532 // to a PyObject but do not inherit from it!
533 for (uint64_t iClass = 0; iClass < nClasses; ++iClass) {
534 probabilities[iEvent][iClass] = static_cast<float>(*static_cast<float*>(PyArray_GETPTR2(reinterpret_cast<PyArrayObject*>
535 (result.ptr()),
536 iEvent, iClass)));
537 }
538 }
539 } catch (...) {
540 PyErr_Print();
541 PyErr_Clear();
542 B2ERROR("Failed calling applying PythonExpert");
543 throw std::runtime_error("Failed calling applying PythonExpert");
544 }
545
546 return probabilities;
547 }
548 }
550}
static std::string findFile(const std::string &path, bool silent=false)
Search for given file or directory in local or central release directory, and return absolute path if...
Abstract base class of all Datasets given to the MVA interface The current event can always be access...
Definition Dataset.h:33
GeneralOptions m_general_options
General options loaded from the weightfile.
Definition Expert.h:70
General options which are shared by all MVA trainings.
Definition Options.h:62
boost::python::object m_unique_mva_module
python module containing the mva methods
Definition Python.h:139
PythonExpert()
Constructs a new Python Expert.
Definition Python.cc:402
boost::python::object m_state
current state object of method
Definition Python.h:140
std::vector< float > m_stds
Stds of all features for normalization.
Definition Python.h:142
virtual std::vector< float > apply(Dataset &test_data) const override
Apply this expert onto a dataset.
Definition Python.cc:462
PythonOptions m_specific_options
Method specific options.
Definition Python.h:138
virtual void load(Weightfile &weightfile) override
Load the expert from a Weightfile.
Definition Python.cc:408
std::vector< float > m_means
Means of all features for normalization.
Definition Python.h:141
virtual std::vector< std::vector< float > > applyMulticlass(Dataset &test_data) const override
Apply this expert onto a dataset for multiclass problem.
Definition Python.cc:503
void * init_numpy()
Helper function which initializes array system of numpy.
Definition Python.cc:149
~PythonInitializerSingleton()
Destructor of PythonInitializerSingleton.
Definition Python.cc:132
bool m_initialized_python
Member which keeps indicate if this class initialized python.
Definition Python.cc:156
static PythonInitializerSingleton & GetInstance()
Return static instance of PythonInitializerSingleton.
Definition Python.cc:159
PythonInitializerSingleton()
Constructor of PythonInitializerSingleton.
Definition Python.cc:117
PythonInitializerSingleton(const PythonInitializerSingleton &)=delete
Forbid copy constructor of PythonInitializerSingleton.
Options for the Python MVA method.
Definition Python.h:50
unsigned int m_nIterations
Number of iterations through the whole data.
Definition Python.h:79
std::string m_steering_file
steering file provided by the user to override the functions in the framework
Definition Python.h:76
std::string m_framework
framework to use e.g.
Definition Python.h:75
std::string m_config
Config string in json, which is passed to the get model function.
Definition Python.h:77
virtual po::options_description getDescription() override
Returns a program options description for all available options.
Definition Python.cc:80
bool m_normalize
Normalize the inputs (shift mean to zero and std to 1)
Definition Python.h:81
double m_training_fraction
Fraction of data passed as training data, rest is passed as test data.
Definition Python.h:80
virtual void load(const boost::property_tree::ptree &pt) override
Load mechanism to load Options from a xml tree.
Definition Python.cc:47
virtual void save(boost::property_tree::ptree &pt) const override
Save mechanism to store Options in a xml tree.
Definition Python.cc:68
unsigned int m_mini_batch_size
Mini batch size, 0 passes the whole data in one call.
Definition Python.h:78
PythonTeacher(const GeneralOptions &general_options, const PythonOptions &specific_options)
Constructs a new teacher using the GeneralOptions and specific options of this training.
Definition Python.cc:166
PythonOptions m_specific_options
Method specific options.
Definition Python.h:105
virtual Weightfile train(Dataset &training_data) const override
Train a mva method using the given dataset returning a Weightfile.
Definition Python.cc:173
GeneralOptions m_general_options
GeneralOptions containing all shared options.
Definition Teacher.h:49
Teacher(const GeneralOptions &general_options)
Constructs a new teacher using the GeneralOptions for this training.
Definition Teacher.cc:18
The Weightfile class serializes all information about a training into an xml tree.
Definition Weightfile.h:38
Abstract base class for different kinds of events.
Wrap TRandom to be usable as a uniform random number generator with STL algorithms like std::shuffle.