Belle II Software development
MVAMultipleExpertsModule.cc
1/**************************************************************************
2 * basf2 (Belle II Analysis Software Framework) *
3 * Author: The Belle II Collaboration *
4 * *
5 * See git log for contributors and copyright holders. *
6 * This file is licensed under LGPL-3.0, see LICENSE.md. *
7 **************************************************************************/
8
9
10#include <mva/modules/MVAExpert/MVAMultipleExpertsModule.h>
11
12#include <analysis/dataobjects/Particle.h>
13#include <analysis/dataobjects/ParticleList.h>
14#include <analysis/dataobjects/ParticleExtraInfoMap.h>
15
16#include <mva/interface/Interface.h>
17
18#include <boost/algorithm/string/predicate.hpp>
19
20#include <framework/logging/Logger.h>
21
22
23using namespace Belle2;
24
25REG_MODULE(MVAMultipleExperts);
26
28{
29 setDescription("Adds ExtraInfos to the Particle objects in given ParticleLists which is calcuated by multiple experts defined by the given weightfiles.");
31
32 std::vector<std::string> empty;
33 addParam("listNames", m_listNames,
34 "Particles from these ParticleLists are used as input. If no name is given the experts are applied to every event once, and one can only use variables which accept nullptr as Particle*",
35 empty);
36 addParam("extraInfoNames", m_extraInfoNames,
37 "Names under which the output of the experts is stored in the ExtraInfo of the Particle object.");
38 addParam("identifiers", m_identifiers, "The database identifiers which is used to load the weights during the training.");
39 addParam("signalFraction", m_signal_fraction_override,
40 "signalFraction to calculate probability (if -1 the signalFraction of the training data is used)", -1.0);
41 std::vector<int> empty_vec;
42 addParam("overwriteExistingExtraInfo", m_overwriteExistingExtraInfo,
43 "If true, when the given extraInfo has already defined, the old extraInfo value is overwritten. If false, the original value is kept.",
44 empty_vec);
45}
46
48{
49 // All specified ParticleLists are required to exist
50 for (auto& name : m_listNames) {
52 list.isRequired();
53 }
54
55 if (m_listNames.empty()) {
57 extraInfo.isRequired();
58 } else {
60 extraInfo.isRequired();
61 }
62
63 if (m_extraInfoNames.size() != m_identifiers.size()) {
64 B2FATAL("The number of given m_extraInfoNames is not equal to the number of m_identifiers. The output the ith method in m_identifiers is saved as extraInfo under the ith name in m_extraInfoNames! Set also different names for each method!");
65 }
66
68 m_experts.resize(m_identifiers.size());
70 m_datasets.resize(m_identifiers.size());
71 m_nClasses.resize(m_identifiers.size());
72 // if the size of m_overwriteExistingExtraInfo is smaller than that of m_identifiers, 2 will be filled.
74 m_existGivenExtraInfo.resize(m_identifiers.size(), false);
75
76 for (unsigned int i = 0; i < m_identifiers.size(); ++i) {
77 if (not(boost::ends_with(m_identifiers[i], ".root") or boost::ends_with(m_identifiers[i], ".xml"))) {
78 m_weightfile_representations[i] = std::make_unique<DBObjPtr<DatabaseRepresentationOfWeightfile>>(
79 MVA::makeSaveForDatabase(m_identifiers[i]));
80 }
81 }
82
84}
85
87{
88
89 if (!m_weightfile_representations.empty()) {
90 for (unsigned int i = 0; i < m_weightfile_representations.size(); ++i) {
92 if (m_weightfile_representations[i]->hasChanged()) {
93 std::stringstream ss((*m_weightfile_representations[i])->m_data);
94 auto weightfile = MVA::Weightfile::loadFromStream(ss);
95 init_mva(weightfile, i);
96 }
97 } else {
98 auto weightfile = MVA::Weightfile::loadFromFile(m_identifiers[i]);
99 init_mva(weightfile, i);
100 }
101 }
102
103 } else B2FATAL("No m_identifiers given. At least one is needed!");
104}
105
107{
108
109 auto supported_interfaces = MVA::AbstractInterface::getSupportedInterfaces();
111
112
113 MVA::GeneralOptions general_options;
114 weightfile.getOptions(general_options);
115
116 // Overwrite signal fraction from training
119
120 m_experts[i] = supported_interfaces[general_options.m_method]->getExpert();
121 m_experts[i]->load(weightfile);
122
123
124 m_individual_feature_variables[i] = manager.getVariables(general_options.m_variables);
125 if (m_individual_feature_variables[i].size() != general_options.m_variables.size()) {
126 B2FATAL("One or more feature variables could not be loaded via the Variable::Manager. Check the names!");
127 }
128
129 for (auto& iVariable : m_individual_feature_variables[i]) {
130 if (m_feature_variables.find(iVariable) == m_feature_variables.end()) {
131 m_feature_variables.insert(std::pair<const Variable::Manager::Var*, float>(iVariable, 0));
132 }
133 }
134
135 std::vector<float> dummy;
136 dummy.resize(m_individual_feature_variables[i].size(), 0);
137 m_datasets[i] = std::make_unique<MVA::SingleDataset>(general_options, dummy, 0);
138
139 m_nClasses[i] = general_options.m_nClasses;
140
141}
142
144{
145 for (auto const& iVariable : m_feature_variables) {
146 if (iVariable.first->variabletype == Variable::Manager::VariableDataType::c_double) {
147 m_feature_variables[iVariable.first] = std::get<double>(iVariable.first->function(particle));
148 } else if (iVariable.first->variabletype == Variable::Manager::VariableDataType::c_int) {
149 m_feature_variables[iVariable.first] = std::get<int>(iVariable.first->function(particle));
150 } else if (iVariable.first->variabletype == Variable::Manager::VariableDataType::c_bool) {
151 m_feature_variables[iVariable.first] = std::get<bool>(iVariable.first->function(particle));
152 }
153 }
154
155 for (unsigned int i = 0; i < m_identifiers.size(); ++i) {
156 for (unsigned int j = 0; j < m_individual_feature_variables[i].size(); ++j) {
158 }
159 }
160}
161
162std::vector<std::vector<float>> MVAMultipleExpertsModule::analyse(Particle* particle)
163{
164 std::vector<std::vector<float>> responseValues;
165 fillDatasets(particle);
166
167 for (unsigned int i = 0; i < m_identifiers.size(); ++i) {
168 if (m_nClasses[i] == 2) {
169 responseValues.push_back({m_experts[i]->apply(*m_datasets[i])[0],});
170 } else if (m_nClasses[i] > 2) {
171 responseValues.push_back(m_experts[i]->applyMulticlass(*m_datasets[i])[0]);
172 } else {
173 B2ERROR("Received a value of " << m_nClasses[i] <<
174 " for the number of classes considered by the MVA Expert. This value should be >=2.");
175 }
176 }
177 return responseValues;
178}
179
180void MVAMultipleExpertsModule::setExtraInfoField(Particle* particle, std::string extraInfoName, float responseValue, unsigned int i)
181{
182 if (particle->hasExtraInfo(extraInfoName)) {
183 if (particle->getExtraInfo(extraInfoName) != responseValue) {
184 m_existGivenExtraInfo[i] = true;
185 double current = particle->getExtraInfo(extraInfoName);
186 if (m_overwriteExistingExtraInfo[i] == -1) {
187 if (responseValue < current) particle->setExtraInfo(extraInfoName, responseValue);
188 } else if (m_overwriteExistingExtraInfo[i] == 0) {
189 // don't overwrite!
190 } else if (m_overwriteExistingExtraInfo[i] == 1) {
191 if (responseValue > current) particle->setExtraInfo(extraInfoName, responseValue);
192 } else if (m_overwriteExistingExtraInfo[i] == 2) {
193 particle->setExtraInfo(extraInfoName, responseValue);
194 } else {
195 B2FATAL("m_overwriteExistingExtraInfo must be one of {-1,0,1,2}. Received '" << m_overwriteExistingExtraInfo[i] << "'.");
196 }
197 }
198 } else {
199 particle->addExtraInfo(extraInfoName, responseValue);
200 }
201}
202
204 float responseValue, unsigned int i)
205{
206 if (eventExtraInfo->hasExtraInfo(extraInfoName)) {
207 m_existGivenExtraInfo[i] = true;
208 double current = eventExtraInfo->getExtraInfo(extraInfoName);
209 if (m_overwriteExistingExtraInfo[i] == -1) {
210 if (responseValue < current) eventExtraInfo->setExtraInfo(extraInfoName, responseValue);
211 } else if (m_overwriteExistingExtraInfo[i] == 0) {
212 // don't overwrite!
213 } else if (m_overwriteExistingExtraInfo[i] == 1) {
214 if (responseValue > current) eventExtraInfo->setExtraInfo(extraInfoName, responseValue);
215 } else if (m_overwriteExistingExtraInfo[i] == 2) {
216 eventExtraInfo->setExtraInfo(extraInfoName, responseValue);
217 } else {
218 B2FATAL("m_overwriteExistingExtraInfo must be one of {-1,0,1,2}. Received '" << m_overwriteExistingExtraInfo[i] << "'.");
219 }
220 } else {
221 eventExtraInfo->addExtraInfo(extraInfoName, responseValue);
222 }
223}
224
226{
227 for (auto& listName : m_listNames) {
228 StoreObjPtr<ParticleList> list(listName);
229 // Calculate target Value for Particles
230 for (unsigned i = 0; i < list->getListSize(); ++i) {
231 Particle* particle = list->getParticle(i);
232 std::vector<std::vector<float>> responseValues = analyse(particle);
233 for (unsigned int j = 0; j < m_identifiers.size(); ++j) {
234 if (m_nClasses[j] == 2) {
235 setExtraInfoField(particle, m_extraInfoNames[j], responseValues[j][0], j);
236 } else if (m_nClasses[j] > 2) {
237 if (responseValues[j].size() != m_nClasses[j]) {
238 B2ERROR("Size of results returned by MVA Expert applyMulticlass (" << responseValues[j].size() <<
239 ") does not match the declared number of classes (" << m_nClasses[j] << ").");
240 }
241 for (unsigned int iClass = 0; iClass < m_nClasses[j]; iClass++) {
242 setExtraInfoField(particle, m_extraInfoNames[j] + "_" + std::to_string(iClass), responseValues[j][iClass], j);
243 }
244 } else {
245 B2ERROR("Received a value of " << m_nClasses[j] <<
246 " for the number of classes considered by the MVA Expert. This value should be >=2.");
247 }
248 } //identifiers
249 }
250 } // listnames
251 if (m_listNames.empty()) {
252 StoreObjPtr<EventExtraInfo> eventExtraInfo;
253 if (not eventExtraInfo.isValid())
254 eventExtraInfo.create();
255 std::vector<std::vector<float>> responseValues = analyse(nullptr);
256 for (unsigned int j = 0; j < m_identifiers.size(); ++j) {
257 if (m_nClasses[j] == 2) {
258 setEventExtraInfoField(eventExtraInfo, m_extraInfoNames[j], responseValues[j][0], j);
259 } else if (m_nClasses[j] > 2) {
260 if (responseValues[j].size() != m_nClasses[j]) {
261 B2ERROR("Size of results returned by MVA Expert applyMulticlass (" << responseValues[j].size() <<
262 ") does not match the declared number of classes (" << m_nClasses[j] << ").");
263 }
264 for (unsigned int iClass = 0; iClass < m_nClasses[j]; iClass++) {
265 setEventExtraInfoField(eventExtraInfo, m_extraInfoNames[j] + "_" + std::to_string(iClass), responseValues[j][iClass], j);
266 }
267 } else {
268 B2ERROR("Received a value of " << m_nClasses[j] <<
269 " for the number of classes considered by the MVA Expert. This value should be >=2.");
270 }
271 } //identifiers
272 }
273}
274
276{
277 for (unsigned int i = 0; i < m_identifiers.size(); ++i) {
278 m_experts[i].reset();
279 m_datasets[i].reset();
280
281 if (m_existGivenExtraInfo[i]) {
282 if (m_overwriteExistingExtraInfo[i] == -1) {
283 B2WARNING("The extraInfo " << m_extraInfoNames[i] <<
284 " has already been set! It was overwritten by this module if the new value was lower than the previous!");
285 } else if (m_overwriteExistingExtraInfo[i] == 0) {
286 B2WARNING("The extraInfo " << m_extraInfoNames[i] <<
287 " has already been set! The original value was kept and this module did not overwrite it!");
288 } else if (m_overwriteExistingExtraInfo[i] == 1) {
289 B2WARNING("The extraInfo " << m_extraInfoNames[i] <<
290 " has already been set! It was overwritten by this module if the new value was higher than the previous!");
291 } else if (m_overwriteExistingExtraInfo[i] == 2) {
292 B2WARNING("The extraInfo " << m_extraInfoNames[i] << " has already been set! It was overwritten by this module!");
293 }
294 }
295 }
296}
@ c_Event
Different object in each event, all objects/arrays are invalidated after event() function has been ca...
Definition: DataStore.h:59
void init_mva(MVA::Weightfile &weightfile, unsigned int i)
Initialize mva expert, dataset and features Called every time the weightfile in the database changes ...
std::vector< int > m_overwriteExistingExtraInfo
vector of -1/0/1/2: overwrite if lower/ don't overwrite / overwrite if higher/ always overwrite,...
std::vector< std::unique_ptr< MVA::Expert > > m_experts
Vector of pointers to the current MVA Experts.
void setExtraInfoField(Particle *, std::string, float, unsigned int)
Set the extra info field.
std::vector< unsigned int > m_nClasses
number of classes (~outputs) of the MVA Experts.
virtual void initialize() override
Initialize the module.
std::vector< std::unique_ptr< MVA::SingleDataset > > m_datasets
Vector of pointers to the current input datasets.
virtual void event() override
Called for each event.
std::vector< bool > m_existGivenExtraInfo
check if the given extraInfo is already defined.
std::vector< std::vector< const Variable::Manager::Var * > > m_individual_feature_variables
Vector of pointers to the feature variables for each expert.
virtual void terminate() override
Called at the end of the event processing.
double m_signal_fraction_override
Signal Fraction which should be used.
std::vector< std::string > m_identifiers
weight-files
std::vector< std::string > m_listNames
input particle list names
void fillDatasets(Particle *)
Evaluate the variables and fill the Datasets to be used by the experts.
virtual void beginRun() override
Called at the beginning of a new run.
std::map< const Variable::Manager::Var *, float > m_feature_variables
Map containing the values of all needed feature variables.
std::vector< std::vector< float > > analyse(Particle *)
Calculates expert output for given Particle pointer.
void setEventExtraInfoField(StoreObjPtr< EventExtraInfo >, std::string, float, unsigned int)
Set the event extra info field.
std::vector< std::string > m_extraInfoNames
Names under which the SignalProbability is stored in the extraInfo of the Particle object.
std::vector< std::unique_ptr< DBObjPtr< DatabaseRepresentationOfWeightfile > > > m_weightfile_representations
Vector of database pointers to the Database representation of the weightfile.
static void initSupportedInterfaces()
Static function which initliazes all supported interfaces, has to be called once before getSupportedI...
Definition: Interface.cc:45
static std::map< std::string, AbstractInterface * > getSupportedInterfaces()
Returns interfaces supported by the MVA Interface.
Definition: Interface.h:53
General options which are shared by all MVA trainings.
Definition: Options.h:62
The Weightfile class serializes all information about a training into an xml tree.
Definition: Weightfile.h:38
static Weightfile loadFromStream(std::istream &stream)
Static function which deserializes a Weightfile from a stream.
Definition: Weightfile.cc:251
void getOptions(Options &options) const
Fills an Option object from the xml tree.
Definition: Weightfile.cc:67
static Weightfile loadFromFile(const std::string &filename)
Static function which loads a Weightfile from a file.
Definition: Weightfile.cc:206
void addSignalFraction(float signal_fraction)
Saves the signal fraction in the xml tree.
Definition: Weightfile.cc:95
Base class for Modules.
Definition: Module.h:72
void setDescription(const std::string &description)
Sets the description of the module.
Definition: Module.cc:214
void setPropertyFlags(unsigned int propertyFlags)
Sets the flags for the module properties.
Definition: Module.cc:208
@ c_ParallelProcessingCertified
This module can be run in parallel processing mode safely (All I/O must be done through the data stor...
Definition: Module.h:80
Class to store reconstructed particles.
Definition: Particle.h:76
bool isRequired(const std::string &name="")
Ensure this array/object has been registered previously.
Type-safe access to single objects in the data store.
Definition: StoreObjPtr.h:95
Global list of available variables.
Definition: Manager.h:100
static Manager & Instance()
get singleton instance.
Definition: Manager.cc:26
void addParam(const std::string &name, T &paramVariable, const std::string &description, const T &defaultValue)
Adds a new parameter to the module.
Definition: Module.h:559
#define REG_MODULE(moduleName)
Register the given module (without 'Module' suffix) with the framework.
Definition: Module.h:649
Abstract base class for different kinds of events.