Belle II Software  release-05-01-25
NNWaveFitter.cc
1 /**************************************************************************
2  * BASF2 (Belle Analysis Framework 2) *
3  * Copyright(C) 2010 - Belle II Collaboration *
4  * *
5  * Author: The Belle II Collaboration *
6  * Contributors: Peter Kvasnicka *
7  * *
8  * This software is provided "as is" without any warranty. *
9  **************************************************************************/
10 
11 #include <svd/reconstruction/NNWaveFitter.h>
12 #include <iostream>
13 #include <fstream>
14 #include <sstream>
15 #include <algorithm>
16 #include <boost/property_tree/ptree.hpp>
17 #include <boost/property_tree/xml_parser.hpp>
18 #include <framework/logging/Logger.h>
19 
20 using namespace std;
21 using namespace Belle2;
22 using namespace SVD;
23 using namespace Eigen;
24 
38 int NNWaveFitter::readNetworkData(const string& xmlData)
39 {
40  using namespace boost::property_tree;
41  using boost::property_tree::ptree;
42 
43  ptree propertyTree;
44 
45  try {
46  B2DEBUG(400, "Reading xml");
47  stringstream ss;
48  ss << xmlData;
49  read_xml(ss, propertyTree);
50  } catch (const ptree_error& e) {
51  B2ERROR("Failed to parse xml data: " << e.what());
52  } catch (std::exception const& ex) {
53  B2ERROR("STD excpetion " << ex.what() << " in parsing.");
54  return -1;
55  }
56  // 1. Check that we have a classifier with relu activation.
57  try {
58  // 1a. Classifier
59  string function_name = propertyTree.get<string>("PMML.NeuralNetwork.<xmlattr>.functionName");
60  if (function_name != "classification") {
61  B2ERROR("This is incorrect network, expected multiclass classifier.");
62  return -1;
63  }
64  B2DEBUG(400, "Claasifier confirmed.");
65  // 1b. Activation
66  string activationFunction = propertyTree.get<string>("PMML.NeuralNetwork.<xmlattr>.activationFunction");
67  if (activationFunction != "rectifier") {
68  B2ERROR("Expected rectifier (relu) activation, found " << activationFunction << "instead.");
69  return -1;
70  }
71  B2DEBUG(400, "Activation confirmed.");
72  // 1c.Waveform type FIXME: no logic or checking here:
73  string waveFormType = propertyTree.get<string>("PMML.MiningBuildTask.Training.Waveform");
74  B2DEBUG(400, "Waveform set to " << waveFormType);
75  } catch (const ptree_error& e) {
76  B2ERROR("PropertyTree excpetion : " << e.what() << " in network check.");
77  return -1;
78  }
79 
80  // 2. Read network topology
81  try {
82  string pathString = "PMML.MiningBuildTask.NetworkParameters";
83  map<size_t, size_t> m_layer_sizes_map;
84  for (const auto& layer_tag : propertyTree.get_child(pathString)) {
85  if (layer_tag.first == "<xmlattr>") continue;
86  size_t layer_no = layer_tag.second.get<size_t>("<xmlattr>.number");
87  size_t layer_size = layer_tag.second.get<size_t>("<xmlattr>.size");
88  m_layer_sizes_map.insert(make_pair(layer_no, layer_size));
89  }
90  // Once we have layer sizes, we can set weight matrices and intercept
91  // vectors to proper sizes.
92  m_nLayers = m_layer_sizes_map.size();
93  m_layerSizes.resize(m_nLayers);
94  for (size_t iLayer = 0; iLayer < m_nLayers; ++iLayer)
95  m_layerSizes[iLayer] = m_layer_sizes_map[iLayer];
96  for (size_t iLayer = 1; iLayer < m_nLayers; ++iLayer) {
97  m_networkCoefs.insert(make_pair(iLayer, make_pair(
98  MatrixXd(m_layerSizes[iLayer], m_layerSizes[iLayer - 1]),
99  VectorXd(m_layerSizes[iLayer])
100  )
101  ));
102  }
103  for (size_t iLayer = 0; iLayer < m_nLayers; ++iLayer)
104  m_layerStates.push_back(VectorXd(m_layerSizes[iLayer]));
105  B2DEBUG(400, "Network topology read.");
106  } catch (const ptree_error& e) {
107  B2ERROR("PropertyTree excpetion : " << e.what() << " when reading network topology.");
108  return -1;
109  }
110 
111  // Read training parameter bounds
112  try {
113  string pathString("PMML.MiningBuildTask.Training");
114  for (const auto& param_tag : propertyTree.get_child(pathString)) {
115  if (param_tag.first == "Parameter") {
116  string valueString(param_tag.second.get<string>("<xmlattr>.value"));
117  double low = param_tag.second.get<double>("<xmlattr>.low");
118  double high = param_tag.second.get<double>("<xmlattr>.high");
119  if (valueString == "amplitude") {
120  m_amplitudeBounds.first = low;
121  m_amplitudeBounds.second = high;
122  } else if (valueString == "t0") {
123  m_timeShiftBounds.first = low * Unit::ns;
124  m_timeShiftBounds.second = high * Unit::ns;
125  } else if (valueString == "tau") {
126  m_waveWidthBounds.first = low * Unit::ns;
127  m_waveWidthBounds.second = high * Unit::ns;
128  }
129  }
130  }
131  // Set bounds in the tau encoder.
132  // Tau bounds are raw values!
133  m_tauCoder.setBounds(m_amplitudeBounds.first, m_amplitudeBounds.second,
134  m_waveWidthBounds.first, m_waveWidthBounds.second);
135  B2DEBUG(400, "Read parameter bounds.");
136  } catch (const ptree_error& e) {
137  B2ERROR("PropertyTree excpetion: " << e.what() << " when reading parameter bounds.");
138  return -1;
139  }
140 
141  // Reading output binning.
142  try {
143  string pathString("PMML.DataDictionary.DataField");
144  m_binCenters.resize(m_layerSizes[m_nLayers - 1]);
145  m_bins.resize(m_layerSizes[m_nLayers - 1] + 1);
146  for (const auto& value_tag : propertyTree.get_child(pathString)) {
147  if (value_tag.first == "Value") {
148  size_t i = value_tag.second.get<size_t>("<xmlattr>.value");
149  if (i == 1) // numbering is from 1!
150  m_bins[0] = value_tag.second.get<double>("<xmlattr>.lower") * Unit::ns;
151  m_bins[i] = value_tag.second.get<double>("<xmlattr>.upper") * Unit::ns;
152  m_binCenters[i - 1] = value_tag.second.get<double>("<xmlattr>.midpoint") * Unit::ns;
153  }
154  }
155  m_fitTool = std::shared_ptr<NNWaveFitTool>(new NNWaveFitTool(m_bins, m_binCenters, WaveGenerator(m_wave)));
156  B2DEBUG(400, "Outputs done.");
157  } catch (const ptree_error& e) {
158  B2ERROR("PropertyTree excpetion: " << e.what() << " when reading bin data.");
159  return -1;
160  }
161 
162  // Reading neurons.
163  try {
164  string pathString("PMML.NeuralNetwork");
165  for (const auto& nl_tag : propertyTree.get_child(pathString)) {
166  if (nl_tag.first != "NeuralLayer") continue;
167  B2DEBUG(400, "Reading neural layers " << nl_tag.first << " " << nl_tag.second.size());
168  for (const auto& neuron_tag : nl_tag.second) {
169  if (neuron_tag.first != "Neuron") continue;
170  double bias = neuron_tag.second.get<double>("<xmlattr>.bias");
171  string sid = neuron_tag.second.get<string>("<xmlattr>.id");
172  size_t layer = stoi(sid.substr(0, 1)); // !! Won't work with > 9 !!
173  size_t pos = stoi(sid.substr(2, sid.size()));
174  B2DEBUG(400, "Reading neurons " << layer << "/" << pos << " bias: " << bias);
175  m_networkCoefs[layer].second(pos - 1) = bias;
176  for (const auto& con_tag : neuron_tag.second) {
177  if (con_tag.first != "Con") continue;
178  double weight = con_tag.second.get<double>("<xmlattr>.weight");
179  string sid2 = con_tag.second.get<string>("<xmlattr>.from");
180  size_t pos2 = stoi(sid2.substr(sid2.find('/') + 1, sid2.size()));
181  m_networkCoefs[layer].first(pos - 1, pos2 - 1) = weight;
182  B2DEBUG(400, "Reading connections " << sid2 << " weight: " << weight);
183  }
184  }
185  }
186  B2DEBUG(400, "Neurons done.");
187  } catch (const ptree_error& e) {
188  B2ERROR("PropertyTree excpetion: " << e.what() << " when reading neurons.");
189  return -1;
190  }
191  return 0;
192 }
193 
194 bool NNWaveFitter::checkCoefficients(const std::string& dumpname, double tol)
195 {
196  bool result = true;
197  ifstream dump(dumpname);
198 
199  // Read the dumpfile line by line and compare coefficients.
200  string line;
201  getline(dump, line); // Header
202  // The dimension of the matrix is
203  // (m_layerSizes[iLayer], m_layerSizes[iLayer-1])
204  // and the dimension of the intercepts is
205  // m_layerSizes[iLayer]
206  for (size_t iLayer = 1; iLayer < m_nLayers; ++iLayer) {
207  getline(dump, line); // Layer
208  getline(dump, line); // Matrix
209  // the matrix is m_networkCoefs[iLayer].first, and has
210  // m_layerSizes[iLayer] rows with m_layerSizes[iLayer-1] entries
211  // in a row (columns)
212  for (size_t iRow = 0; iRow < m_layerSizes[iLayer]; ++iRow) {
213  getline(dump, line);
214  istringstream iline(line);
215  for (size_t iCol = 0; iCol < m_layerSizes[iLayer - 1]; ++iCol) {
216  double value = 0;
217  iline >> value;
218  if (fabs(value - m_networkCoefs[iLayer].first(iRow, iCol)) > tol) {
219  B2DEBUG(90, "Mismatch in weights in layer: " << iLayer
220  << " row: " << iRow << " col: " << iCol
221  << " C++: " << m_networkCoefs[iLayer].first(iRow, iCol)
222  << " Python: " << value
223  << " Diff: " << m_networkCoefs[iLayer].first(iRow, iCol) - value
224  );
225  result = false;
226  }
227  }
228  }
229 
230  getline(dump, line); // Intercepts
231  // this is m_networkCoefs[iLayer].second, and has m_layerSizes[iLayer]
232  // numbers in a single row.
233  getline(dump, line);
234  istringstream iline(line);
235  for (size_t iRow = 0; iRow < m_layerSizes[iLayer]; ++iRow) {
236  double value = 0;
237  iline >> value;
238  if (fabs(value - m_networkCoefs[iLayer].second(iRow)) > tol) {
239  B2DEBUG(90, "Mismatch in intercepts in layer: " << iLayer
240  << " row: " << iRow
241  << " C++: " << m_networkCoefs[iLayer].second(iRow)
242  << " Python: " << value
243  << " Diff: " << m_networkCoefs[iLayer].second(iRow) - value
244  );
245 
246  result = false;
247  }
248  }
249  }
250  return result;
251 }
252 
253 void NNWaveFitter::setNetwrok(const string& xmlData)
254 {
255  if ((xmlData != "") && (readNetworkData(xmlData) == 0)) {
256  m_isValid = true;
257  } else m_isValid = false;
258  // We don't issue any additional warnings here.
259 }
260 
261 
262 NNWaveFitter::NNWaveFitter(string xmlData)
263 {
264  m_wave = w_betaprime;
265  if (xmlData != "") setNetwrok(xmlData);
266 }
267 
268 
269 std::shared_ptr<nnFitterBinData> NNWaveFitter::getFit(const apvSamples& samples, double tau)
270 {
271  // Create the fit result object
272  std::shared_ptr<nnFitterBinData> result(new nnFitterBinData);
273  if (!m_isValid) return result;
274 
275  copy(samples.begin(), samples.end(), &m_layerStates[0](0));
276  // Add tau in the last input cell
277  m_layerStates[0][samples.size()] = m_tauCoder.encodeTau(tau);
278  ostringstream os;
279  os << "Fitting with tau = " << tau << " encoded to " << m_tauCoder.encodeTau(tau) << endl
280  << "Samples: ";
281  copy(samples.begin(), samples.end(), ostream_iterator<double>(os, " "));
282  os << endl;
283  m_tauCoder.print(os);
284  B2DEBUG(100, os.str());
285  // Calculate network response
286  os.str() = "";
287  os << "Layer states: " << endl;
288  for (size_t iLayer = 1; iLayer < m_nLayers; ++iLayer) {
289  m_layerStates[iLayer] = (
290  m_networkCoefs[iLayer].first * m_layerStates[iLayer - 1]
291  + m_networkCoefs[iLayer].second
292  ).unaryExpr(relu);
293  os << m_layerStates[iLayer] << endl;
294  }
295  B2DEBUG(100, os.str());
296 
297  // Output transformation 1: softmax normalization
298 
299  m_layerStates[m_nLayers - 1] = softmax(m_layerStates[m_nLayers - 1]);
300 
301  result->resize(m_layerStates[m_nLayers - 1].size());
302  for (size_t i = 0; i < result->size(); ++i)
303  (*result)[i] = m_layerStates[m_nLayers - 1][i];
304 
305  os.str() = "";
306  os << "Result: " << endl;
307  copy(result->begin(), result->end(), ostream_iterator<double>(os, " "));
308  os << endl;
309  B2DEBUG(100, os.str());
310 
311  return result;
312 }
313 
Belle2::SVD::WaveGenerator
Waveform generator This is a functor to calculate signal values.
Definition: SVDSimulationTools.h:102
Belle2
Abstract base class for different kinds of events.
Definition: MillepedeAlgorithm.h:19
Belle2::SVD::apvSamples
std::array< apvSampleBaseType, nAPVSamples > apvSamples
vector od apvSample BaseType objects
Definition: SVDSimulationTools.h:41
Belle2::SVD::w_betaprime
double w_betaprime(double t)
Beta-prime wave function, x^alpha/(1+x)^beta.
Definition: SVDSimulationTools.h:89
Belle2::SVD::NNWaveFitTool
The class holds arrays of bins and bin centers, and a wave generator object containing information on...
Definition: NNWaveFitTool.h:93
Belle2::SVD::nnFitterBinData
std::vector< double > nnFitterBinData
Vector of values defined for bins, such as bin times or bin probabilities.
Definition: NNWaveFitTool.h:32