Belle II Software light-2405-quaxo
Python.cc
1/**************************************************************************
2 * basf2 (Belle II Analysis Software Framework) *
3 * Author: The Belle II Collaboration *
4 * *
5 * See git log for contributors and copyright holders. *
6 * This file is licensed under LGPL-3.0, see LICENSE.md. *
7 **************************************************************************/
8
9#include <mva/methods/Python.h>
10
11#include <boost/filesystem/convenience.hpp>
12#include <numpy/npy_common.h>
13#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
14#include <numpy/arrayobject.h>
15
16#include <framework/logging/Logger.h>
17#include <framework/utilities/FileSystem.h>
18#include <framework/utilities/TRandomWrapper.h>
19
20#include <fstream>
21#include <numeric>
22
23namespace Belle2 {
28 namespace MVA {
29
30 void PythonOptions::load(const boost::property_tree::ptree& pt)
31 {
32 int version = pt.get<int>("Python_version");
33 if (version < 1 or version > 2) {
34 B2ERROR("Unknown weightfile version " << std::to_string(version));
35 throw std::runtime_error("Unknown weightfile version " + std::to_string(version));
36 }
37 m_framework = pt.get<std::string>("Python_framework");
38 m_steering_file = pt.get<std::string>("Python_steering_file");
39 m_mini_batch_size = pt.get<unsigned int>("Python_mini_batch_size");
40 m_nIterations = pt.get<unsigned int>("Python_n_iterations");
41 m_config = pt.get<std::string>("Python_config");
42 m_training_fraction = pt.get<double>("Python_training_fraction");
43 if (version == 2) {
44 m_normalize = pt.get<bool>("Python_normalize");
45 } else {
46 m_normalize = false;
47 }
48
49 }
50
51 void PythonOptions::save(boost::property_tree::ptree& pt) const
52 {
53 pt.put("Python_version", 2);
54 pt.put("Python_framework", m_framework);
55 pt.put("Python_steering_file", m_steering_file);
56 pt.put("Python_mini_batch_size", m_mini_batch_size);
57 pt.put("Python_n_iterations", m_nIterations);
58 pt.put("Python_config", m_config);
59 pt.put("Python_training_fraction", m_training_fraction);
60 pt.put("Python_normalize", m_normalize);
61 }
62
63 po::options_description PythonOptions::getDescription()
64 {
65 po::options_description description("Python options");
66 description.add_options()
67 ("framework", po::value<std::string>(&m_framework),
68 "Framework which should be used. Currently supported are sklearn, tensorflow and theano")
69 ("steering_file", po::value<std::string>(&m_steering_file), "Steering file which describes")
70 ("mini_batch_size", po::value<unsigned int>(&m_mini_batch_size), "Size of the mini batch given to partial_fit function")
71 ("nIterations", po::value<unsigned int>(&m_nIterations), "Number of iterations")
72 ("normalize", po::value<bool>(&m_normalize), "Normalize input data (shift mean to 0 and std to 1)")
73 ("training_fraction", po::value<double>(&m_training_fraction),
74 "Training fraction used to split up dataset in training and validation sample.")
75 ("config", po::value<std::string>(&m_config), "Json encoded python object passed to begin_fit function");
76 return description;
77 }
78
84
85 public:
90
95
96 private:
101 {
102 if (not Py_IsInitialized()) {
103 Py_Initialize();
104 // wchar_t* bla[] = {L""};
105 wchar_t** bla = nullptr;
106 PySys_SetArgvEx(0, bla, 0);
108 }
109
110 if (PyArray_API == nullptr) {
111 init_numpy();
112 }
113 }
114
119 {
121 if (Py_IsInitialized()) {
122 // We don't finalize Python because this call only frees some memory,
123 // but can cause crashes in loaded python-modules like Theano
124 // https://docs.python.org/3/c-api/init.html
125 // Py_Finalize();
126 }
127 }
128 }
129
136 {
137 // Import array is a macro which returns NUMPY_IMPORT_ARRAY_RETVAL
138 import_array();
139 return nullptr;
140 }
141
142 bool m_initialized_python = false;
143 };
144
146 {
147 static PythonInitializerSingleton singleton;
148 return singleton;
149 }
150
151
153 const PythonOptions& specific_options) : Teacher(general_options),
154 m_specific_options(specific_options)
155 {
157 }
158
159
161 {
162
163 Weightfile weightfile;
164 std::string custom_weightfile = weightfile.generateFileName();
165 std::string custom_steeringfile = weightfile.generateFileName();
166
167 uint64_t numberOfFeatures = training_data.getNumberOfFeatures();
168 uint64_t numberOfSpectators = training_data.getNumberOfSpectators();
169 uint64_t numberOfEvents = training_data.getNumberOfEvents();
170
172 B2ERROR("Please provide a positive training fraction");
173 throw std::runtime_error("Please provide a training fraction between (0.0,1.0]");
174 }
175
176 auto numberOfTrainingEvents = static_cast<uint64_t>(numberOfEvents * 100 * m_specific_options.m_training_fraction);
177 numberOfTrainingEvents = numberOfTrainingEvents / 100 + (numberOfTrainingEvents % 100 != 0);
178 auto numberOfValidationEvents = numberOfEvents - numberOfTrainingEvents;
179
180 uint64_t batch_size = m_specific_options.m_mini_batch_size;
181 if (batch_size == 0) {
182 batch_size = numberOfTrainingEvents;
183 }
184
185 if (batch_size > numberOfTrainingEvents) {
186 B2WARNING("Mini batch size (" << batch_size << ") is larger than the number of training events (" << numberOfTrainingEvents << ")"\
187 " The batch size has been set equal to the number of training events.");
188 batch_size = numberOfTrainingEvents;
189 };
190
191 auto X = std::unique_ptr<float[]>(new float[batch_size * numberOfFeatures]);
192 auto S = std::unique_ptr<float[]>(new float[batch_size * numberOfSpectators]);
193 auto y = std::unique_ptr<float[]>(new float[batch_size]);
194 auto w = std::unique_ptr<float[]>(new float[batch_size]);
195 npy_intp dimensions_X[2] = {static_cast<npy_intp>(batch_size), static_cast<npy_intp>(numberOfFeatures)};
196 npy_intp dimensions_S[2] = {static_cast<npy_intp>(batch_size), static_cast<npy_intp>(numberOfSpectators)};
197 npy_intp dimensions_y[2] = {static_cast<npy_intp>(batch_size), 1};
198 npy_intp dimensions_w[2] = {static_cast<npy_intp>(batch_size), 1};
199
200 auto X_v = std::unique_ptr<float[]>(new float[numberOfValidationEvents * numberOfFeatures]);
201 auto S_v = std::unique_ptr<float[]>(new float[numberOfValidationEvents * numberOfSpectators]);
202 auto y_v = std::unique_ptr<float[]>(new float[numberOfValidationEvents]);
203 auto w_v = std::unique_ptr<float[]>(new float[numberOfValidationEvents]);
204 npy_intp dimensions_X_v[2] = {static_cast<npy_intp>(numberOfValidationEvents), static_cast<npy_intp>(numberOfFeatures)};
205 npy_intp dimensions_S_v[2] = {static_cast<npy_intp>(numberOfValidationEvents), static_cast<npy_intp>(numberOfSpectators)};
206 npy_intp dimensions_y_v[2] = {static_cast<npy_intp>(numberOfValidationEvents), 1};
207 npy_intp dimensions_w_v[2] = {static_cast<npy_intp>(numberOfValidationEvents), 1};
208
209 std::string steering_file_source_code;
212 std::ifstream steering_file(filename);
213 if (not steering_file) {
214 throw std::runtime_error(std::string("Couldn't open file ") + filename);
215 }
216 steering_file.seekg(0, std::ios::end);
217 steering_file_source_code.resize(steering_file.tellg());
218 steering_file.seekg(0, std::ios::beg);
219 steering_file.read(&steering_file_source_code[0], steering_file_source_code.size());
220 }
221
222 std::vector<float> means(numberOfFeatures, 0.0);
223 std::vector<float> stds(numberOfFeatures, 0.0);
224
226 // Stable calculation of mean and variance with weights
227 // see https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
228 auto weights = training_data.getWeights();
229 for (uint64_t iFeature = 0; iFeature < numberOfFeatures; ++iFeature) {
230 double wSum = 0.0;
231 double mean = 0.0;
232 double running_std = 0.0;
233 auto feature = training_data.getFeature(iFeature);
234 for (uint64_t i = 0; i < weights.size(); ++i) {
235 wSum += weights[i];
236 double meanOld = mean;
237 mean += (weights[i] / wSum) * (feature[i] - meanOld);
238 running_std += weights[i] * (feature[i] - meanOld) * (feature[i] - mean);
239 }
240 means[iFeature] = mean;
241 stds[iFeature] = std::sqrt(running_std / (wSum - 1));
242 }
243 }
244
245 try {
246 // Load python modules
247 auto json = boost::python::import("json");
248 auto builtins = boost::python::import("builtins");
249 auto inspect = boost::python::import("inspect");
250
251 // Load framework
252 auto framework = boost::python::import((std::string("basf2_mva_python_interface.") + m_specific_options.m_framework).c_str());
253 // Overwrite framework with user-defined code from the steering file
254 builtins.attr("exec")(steering_file_source_code.c_str(), boost::python::object(framework.attr("__dict__")));
255
256 // Call get_model with the parameters provided by the user
257 auto parameters = json.attr("loads")(m_specific_options.m_config.c_str());
258 auto model = framework.attr("get_model")(numberOfFeatures, numberOfSpectators,
259 numberOfEvents, m_specific_options.m_training_fraction, parameters);
260
261 // Call begin_fit with validation sample
262 for (uint64_t iEvent = 0; iEvent < numberOfValidationEvents; ++iEvent) {
263 training_data.loadEvent(iEvent);
265 for (uint64_t iFeature = 0; iFeature < numberOfFeatures; ++iFeature)
266 X_v[iEvent * numberOfFeatures + iFeature] = (training_data.m_input[iFeature] - means[iFeature]) / stds[iFeature];
267 } else {
268 for (uint64_t iFeature = 0; iFeature < numberOfFeatures; ++iFeature)
269 X_v[iEvent * numberOfFeatures + iFeature] = training_data.m_input[iFeature];
270 }
271 for (uint64_t iSpectator = 0; iSpectator < numberOfSpectators; ++iSpectator)
272 S_v[iEvent * numberOfSpectators + iSpectator] = training_data.m_spectators[iSpectator];
273 y_v[iEvent] = training_data.m_target;
274 w_v[iEvent] = training_data.m_weight;
275 }
276
277 auto ndarray_X_v = boost::python::handle<>(PyArray_SimpleNewFromData(2, dimensions_X_v, NPY_FLOAT32, X_v.get()));
278 auto ndarray_S_v = boost::python::handle<>(PyArray_SimpleNewFromData(2, dimensions_S_v, NPY_FLOAT32, S_v.get()));
279 auto ndarray_y_v = boost::python::handle<>(PyArray_SimpleNewFromData(2, dimensions_y_v, NPY_FLOAT32, y_v.get()));
280 auto ndarray_w_v = boost::python::handle<>(PyArray_SimpleNewFromData(2, dimensions_w_v, NPY_FLOAT32, w_v.get()));
281
282 uint64_t nBatches = std::floor(numberOfTrainingEvents / batch_size);
283
284 auto state = framework.attr("begin_fit")(model, ndarray_X_v, ndarray_S_v, ndarray_y_v, ndarray_w_v, nBatches);
285
286 bool continue_loop = true;
287
288 std::vector<uint64_t> iteration_index_vector(numberOfTrainingEvents);
289 std::iota(std::begin(iteration_index_vector), std::end(iteration_index_vector), 0);
290
291 for (uint64_t iIteration = 0; (iIteration < m_specific_options.m_nIterations or m_specific_options.m_nIterations == 0)
292 and continue_loop; ++iIteration) {
293
294 // shuffle the indices on each iteration to get randomised batches
295 if (iIteration > 0) std::shuffle(std::begin(iteration_index_vector), std::end(iteration_index_vector), TRandomWrapper());
296
297 for (uint64_t iBatch = 0; iBatch < nBatches and continue_loop; ++iBatch) {
298
299 // Release Global Interpreter Lock in python to allow multithreading while reading root files
300 // also see: https://docs.python.org/3.5/c-api/init.html
301 PyThreadState* m_thread_state = PyEval_SaveThread();
302 for (uint64_t iEvent = 0; iEvent < batch_size; ++iEvent) {
303 training_data.loadEvent(iteration_index_vector.at(iEvent + iBatch * batch_size) + numberOfValidationEvents);
305 for (uint64_t iFeature = 0; iFeature < numberOfFeatures; ++iFeature)
306 X[iEvent * numberOfFeatures + iFeature] = (training_data.m_input[iFeature] - means[iFeature]) / stds[iFeature];
307 } else {
308 for (uint64_t iFeature = 0; iFeature < numberOfFeatures; ++iFeature)
309 X[iEvent * numberOfFeatures + iFeature] = training_data.m_input[iFeature];
310 }
311 for (uint64_t iSpectator = 0; iSpectator < numberOfSpectators; ++iSpectator)
312 S[iEvent * numberOfSpectators + iSpectator] = training_data.m_spectators[iSpectator];
313 y[iEvent] = training_data.m_target;
314 w[iEvent] = training_data.m_weight;
315 }
316
317 // Maybe slow, create ndarrays outside of loop?
318 auto ndarray_X = boost::python::handle<>(PyArray_SimpleNewFromData(2, dimensions_X, NPY_FLOAT32, X.get()));
319 auto ndarray_S = boost::python::handle<>(PyArray_SimpleNewFromData(2, dimensions_S, NPY_FLOAT32, S.get()));
320 auto ndarray_y = boost::python::handle<>(PyArray_SimpleNewFromData(2, dimensions_y, NPY_FLOAT32, y.get()));
321 auto ndarray_w = boost::python::handle<>(PyArray_SimpleNewFromData(2, dimensions_w, NPY_FLOAT32, w.get()));
322
323 // Reactivate Global Interpreter Lock to safely execute python code
324 PyEval_RestoreThread(m_thread_state);
325 auto r = framework.attr("partial_fit")(state, ndarray_X, ndarray_S, ndarray_y,
326 ndarray_w, iIteration, iBatch);
327 boost::python::extract<bool> proxy(r);
328 if (proxy.check())
329 continue_loop = static_cast<bool>(proxy);
330 }
331 }
332
333 auto result = framework.attr("end_fit")(state);
334
335 auto pickle = boost::python::import("pickle");
336 auto file = builtins.attr("open")(custom_weightfile.c_str(), "wb");
337 pickle.attr("dump")(result, file);
338
339 auto steeringfile = builtins.attr("open")(custom_steeringfile.c_str(), "wb");
340 pickle.attr("dump")(steering_file_source_code.c_str(), steeringfile);
341
342 auto importances = framework.attr("feature_importance")(state);
343 if (len(importances) == 0) {
344 B2INFO("Python method returned empty feature importance. There won't be any information about the feature importance in the weightfile.");
345 } else if (numberOfFeatures != static_cast<uint64_t>(len(importances))) {
346 B2WARNING("Python method didn't return the correct number of importance value. I ignore the importances");
347 } else {
348 std::map<std::string, float> feature_importances;
349 for (uint64_t iFeature = 0; iFeature < numberOfFeatures; ++iFeature) {
350 boost::python::extract<float> proxy(importances[iFeature]);
351 if (proxy.check()) {
352 feature_importances[m_general_options.m_variables[iFeature]] = static_cast<float>(proxy);
353 } else {
354 B2WARNING("Failed to convert importance output of the method to a float, using 0 instead");
355 feature_importances[m_general_options.m_variables[iFeature]] = 0.0;
356 }
357 }
358 weightfile.addFeatureImportance(feature_importances);
359 }
360
361 } catch (...) {
362 PyErr_Print();
363 PyErr_Clear();
364 B2ERROR("Failed calling train in PythonTeacher");
365 throw std::runtime_error(std::string("Failed calling train in PythonTeacher"));
366 }
367
368 weightfile.addOptions(m_general_options);
369 weightfile.addOptions(m_specific_options);
370 weightfile.addFile("Python_Weightfile", custom_weightfile);
371 weightfile.addFile("Python_Steeringfile", custom_steeringfile);
372 weightfile.addSignalFraction(training_data.getSignalFraction());
374 weightfile.addVector("Python_Means", means);
375 weightfile.addVector("Python_Stds", stds);
376 }
377
378 return weightfile;
379
380 }
381
383 {
385 }
386
387
389 {
390
391 std::string custom_weightfile = weightfile.generateFileName();
392 weightfile.getFile("Python_Weightfile", custom_weightfile);
393 weightfile.getOptions(m_general_options);
394 weightfile.getOptions(m_specific_options);
395
397 m_means = weightfile.getVector<float>("Python_Means");
398 m_stds = weightfile.getVector<float>("Python_Stds");
399 }
400
401 try {
402 auto pickle = boost::python::import("pickle");
403 auto builtins = boost::python::import("builtins");
404 m_framework = boost::python::import((std::string("basf2_mva_python_interface.") + m_specific_options.m_framework).c_str());
405
406 if (weightfile.containsElement("Python_Steeringfile")) {
407 std::string custom_steeringfile = weightfile.generateFileName();
408 weightfile.getFile("Python_Steeringfile", custom_steeringfile);
409 auto steeringfile = builtins.attr("open")(custom_steeringfile.c_str(), "rb");
410 auto source_code = pickle.attr("load")(steeringfile);
411 builtins.attr("exec")(boost::python::object(source_code), boost::python::object(m_framework.attr("__dict__")));
412 }
413
414 auto file = builtins.attr("open")(custom_weightfile.c_str(), "rb");
415 auto unpickled_fit_object = pickle.attr("load")(file);
416 m_state = m_framework.attr("load")(unpickled_fit_object);
417 } catch (...) {
418 PyErr_Print();
419 PyErr_Clear();
420 B2ERROR("Failed calling load in PythonExpert");
421 throw std::runtime_error("Failed calling load in PythonExpert");
422 }
423
424 }
425
426 std::vector<float> PythonExpert::apply(Dataset& test_data) const
427 {
428
429 uint64_t numberOfFeatures = test_data.getNumberOfFeatures();
430 uint64_t numberOfEvents = test_data.getNumberOfEvents();
431
432 auto X = std::unique_ptr<float[]>(new float[numberOfEvents * numberOfFeatures]);
433 npy_intp dimensions_X[2] = {static_cast<npy_intp>(numberOfEvents), static_cast<npy_intp>(numberOfFeatures)};
434
435 for (uint64_t iEvent = 0; iEvent < numberOfEvents; ++iEvent) {
436 test_data.loadEvent(iEvent);
438 for (uint64_t iFeature = 0; iFeature < numberOfFeatures; ++iFeature)
439 X[iEvent * numberOfFeatures + iFeature] = (test_data.m_input[iFeature] - m_means[iFeature]) / m_stds[iFeature];
440 } else {
441 for (uint64_t iFeature = 0; iFeature < numberOfFeatures; ++iFeature)
442 X[iEvent * numberOfFeatures + iFeature] = test_data.m_input[iFeature];
443 }
444 }
445
446 std::vector<float> probabilities(test_data.getNumberOfEvents(), std::numeric_limits<float>::quiet_NaN());
447
448 try {
449 auto ndarray_X = boost::python::handle<>(PyArray_SimpleNewFromData(2, dimensions_X, NPY_FLOAT32, X.get()));
450 auto result = m_framework.attr("apply")(m_state, ndarray_X);
451 for (uint64_t iEvent = 0; iEvent < numberOfEvents; ++iEvent) {
452 // We have to do some nasty casting here, because the Python C-Api uses structs which are binary compatible
453 // to a PyObject but do not inherit from it!
454 probabilities[iEvent] = static_cast<float>(*static_cast<float*>(PyArray_GETPTR1(reinterpret_cast<PyArrayObject*>(result.ptr()),
455 iEvent)));
456 }
457 } catch (...) {
458 PyErr_Print();
459 PyErr_Clear();
460 B2ERROR("Failed calling applying PythonExpert");
461 throw std::runtime_error("Failed calling applying PythonExpert");
462 }
463
464 return probabilities;
465 }
466
467 std::vector<std::vector<float>> PythonExpert::applyMulticlass(Dataset& test_data) const
468 {
469
470 uint64_t numberOfFeatures = test_data.getNumberOfFeatures();
471 uint64_t numberOfEvents = test_data.getNumberOfEvents();
472
473 auto X = std::unique_ptr<float[]>(new float[numberOfEvents * numberOfFeatures]);
474 npy_intp dimensions_X[2] = {static_cast<npy_intp>(numberOfEvents), static_cast<npy_intp>(numberOfFeatures)};
475
476 for (uint64_t iEvent = 0; iEvent < numberOfEvents; ++iEvent) {
477 test_data.loadEvent(iEvent);
479 for (uint64_t iFeature = 0; iFeature < numberOfFeatures; ++iFeature)
480 X[iEvent * numberOfFeatures + iFeature] = (test_data.m_input[iFeature] - m_means[iFeature]) / m_stds[iFeature];
481 } else {
482 for (uint64_t iFeature = 0; iFeature < numberOfFeatures; ++iFeature)
483 X[iEvent * numberOfFeatures + iFeature] = test_data.m_input[iFeature];
484 }
485 }
486
487 unsigned int nClasses = m_general_options.m_nClasses;
488 std::vector<std::vector<float>> probabilities(test_data.getNumberOfEvents(), std::vector<float>(nClasses,
489 std::numeric_limits<float>::quiet_NaN()));
490
491 try {
492 auto ndarray_X = boost::python::handle<>(PyArray_SimpleNewFromData(2, dimensions_X, NPY_FLOAT32, X.get()));
493 auto result = m_framework.attr("apply")(m_state, ndarray_X);
494 for (uint64_t iEvent = 0; iEvent < numberOfEvents; ++iEvent) {
495 // We have to do some nasty casting here, because the Python C-Api uses structs which are binary compatible
496 // to a PyObject but do not inherit from it!
497 for (uint64_t iClass = 0; iClass < nClasses; ++iClass) {
498 probabilities[iEvent][iClass] = static_cast<float>(*static_cast<float*>(PyArray_GETPTR2(reinterpret_cast<PyArrayObject*>
499 (result.ptr()),
500 iEvent, iClass)));
501 }
502 }
503 } catch (...) {
504 PyErr_Print();
505 PyErr_Clear();
506 B2ERROR("Failed calling applying PythonExpert");
507 throw std::runtime_error("Failed calling applying PythonExpert");
508 }
509
510 return probabilities;
511 }
512 }
514}
static std::string findFile(const std::string &path, bool silent=false)
Search for given file or directory in local or central release directory, and return absolute path if...
Definition: FileSystem.cc:151
Abstract base class of all Datasets given to the MVA interface The current event can always be access...
Definition: Dataset.h:33
GeneralOptions m_general_options
General options loaded from the weightfile.
Definition: Expert.h:70
General options which are shared by all MVA trainings.
Definition: Options.h:62
std::vector< std::string > m_variables
Vector of all variables (branch names) used in the training.
Definition: Options.h:86
unsigned int m_nClasses
Number of classes in a classification problem.
Definition: Options.h:89
PythonExpert()
Constructs a new Python Expert.
Definition: Python.cc:382
boost::python::object m_state
current state object of method
Definition: Python.h:142
std::vector< float > m_stds
Stds of all features for normalization.
Definition: Python.h:144
boost::python::object m_framework
Framework module.
Definition: Python.h:141
virtual std::vector< float > apply(Dataset &test_data) const override
Apply this expert onto a dataset.
Definition: Python.cc:426
PythonOptions m_specific_options
Method specific options.
Definition: Python.h:140
virtual void load(Weightfile &weightfile) override
Load the expert from a Weightfile.
Definition: Python.cc:388
std::vector< float > m_means
Means of all features for normalization.
Definition: Python.h:143
virtual std::vector< std::vector< float > > applyMulticlass(Dataset &test_data) const override
Apply this expert onto a dataset for multiclass problem.
Definition: Python.cc:467
Singleton class which handles the initialization and finalization of Python and numpy.
Definition: Python.cc:83
void * init_numpy()
Helper function which initializes array system of numpy.
Definition: Python.cc:135
~PythonInitializerSingleton()
Destructor of PythonInitializerSingleton.
Definition: Python.cc:118
bool m_initialized_python
Member which keeps indicate if this class initialized python.
Definition: Python.cc:142
static PythonInitializerSingleton & GetInstance()
Return static instance of PythonInitializerSingleton.
Definition: Python.cc:145
PythonInitializerSingleton()
Constructor of PythonInitializerSingleton.
Definition: Python.cc:100
PythonInitializerSingleton(const PythonInitializerSingleton &)=delete
Forbid copy constructor of PythonInitializerSingleton.
Options for the Python MVA method.
Definition: Python.h:52
unsigned int m_nIterations
Number of iterations through the whole data.
Definition: Python.h:81
std::string m_steering_file
steering file provided by the user to override the functions in the framework
Definition: Python.h:78
std::string m_framework
framework to use e.g.
Definition: Python.h:77
std::string m_config
Config string in json, which is passed to the get model function.
Definition: Python.h:79
virtual po::options_description getDescription() override
Returns a program options description for all available options.
Definition: Python.cc:63
bool m_normalize
Normalize the inputs (shift mean to zero and std to 1)
Definition: Python.h:83
double m_training_fraction
Fraction of data passed as training data, rest is passed as test data.
Definition: Python.h:82
virtual void load(const boost::property_tree::ptree &pt) override
Load mechanism to load Options from a xml tree.
Definition: Python.cc:30
virtual void save(boost::property_tree::ptree &pt) const override
Save mechanism to store Options in a xml tree.
Definition: Python.cc:51
unsigned int m_mini_batch_size
Mini batch size, 0 passes the whole data in one call.
Definition: Python.h:80
PythonTeacher(const GeneralOptions &general_options, const PythonOptions &specific_options)
Constructs a new teacher using the GeneralOptions and specific options of this training.
Definition: Python.cc:152
PythonOptions m_specific_options
Method specific options.
Definition: Python.h:107
virtual Weightfile train(Dataset &training_data) const override
Train a mva method using the given dataset returning a Weightfile.
Definition: Python.cc:160
Abstract base class of all Teachers Each MVA library has its own implementation of this class,...
Definition: Teacher.h:29
GeneralOptions m_general_options
GeneralOptions containing all shared options.
Definition: Teacher.h:49
The Weightfile class serializes all information about a training into an xml tree.
Definition: Weightfile.h:38
void addFile(const std::string &identifier, const std::string &custom_weightfile)
Add a file (mostly a weightfile from a MVA library) to our Weightfile.
Definition: Weightfile.cc:115
bool containsElement(const std::string &identifier) const
Returns true if given element is stored in the property tree.
Definition: Weightfile.h:160
std::vector< T > getVector(const std::string &identifier) const
Returns a stored vector from the xml tree.
Definition: Weightfile.h:181
void addOptions(const Options &options)
Add an Option object to the xml tree.
Definition: Weightfile.cc:62
void getOptions(Options &options) const
Fills an Option object from the xml tree.
Definition: Weightfile.cc:67
void addSignalFraction(float signal_fraction)
Saves the signal fraction in the xml tree.
Definition: Weightfile.cc:95
void addFeatureImportance(const std::map< std::string, float > &importance)
Add variable importance.
Definition: Weightfile.cc:72
void addVector(const std::string &identifier, const std::vector< T > &vector)
Add a vector to the xml tree.
Definition: Weightfile.h:125
std::string generateFileName(const std::string &suffix="")
Returns a temporary filename with the given suffix.
Definition: Weightfile.cc:105
void getFile(const std::string &identifier, const std::string &custom_weightfile)
Creates a file from our weightfile (mostly this will be a weightfile of an MVA library)
Definition: Weightfile.cc:138
Abstract base class for different kinds of events.
Definition: ClusterUtils.h:24
Wrap TRandom to be useable as a uniform random number generator with STL algorithms like std::shuffle...