Belle II Software  release-08-01-10
CDCTriggerNeuroDataModule.cc
1 /**************************************************************************
2  * basf2 (Belle II Analysis Software Framework) *
3  * Author: The Belle II Collaboration *
4  * *
5  * See git log for contributors and copyright holders. *
6  * This file is licensed under LGPL-3.0, see LICENSE.md. *
7  **************************************************************************/
8 
9 
10 #include "trg/cdc/modules/neurotrigger/CDCTriggerNeuroDataModule.h"
11 
12 #include <fstream>
13 #include <framework/datastore/StoreArray.h>
14 #include <mdst/dataobjects/MCParticle.h>
15 #include <tracking/dataobjects/RecoTrack.h>
16 #include <trg/cdc/dataobjects/CDCTriggerSegmentHit.h>
17 #include <trg/cdc/dataobjects/CDCTriggerTrack.h>
18 #include <framework/datastore/StoreObjPtr.h>
19 #include <framework/dataobjects/EventMetaData.h>
20 #include <framework/core/ModuleParam.templateDetails.h>
21 
22 #include <cdc/geometry/CDCGeometryPar.h>
23 #include <framework/gearbox/Unit.h>
24 #include <framework/geometry/B2Vector3.h>
25 
26 
27 #include <iostream>
28 #include <cmath>
29 #include "boost/iostreams/filter/gzip.hpp"
30 #include "boost/iostreams/filtering_streambuf.hpp"
31 #include "boost/iostreams/filtering_stream.hpp"
32 #include "boost/multi_array.hpp"
33 #define BOOST_MULTI_ARRAY_NO_GENERATORS
34 
35 namespace Belle2 {
40  REG_MODULE(CDCTriggerNeuroData);
41 
43  {
45  "This module takes 2dtracks, track segments, and targettracks (either recotracks or mcparticles) as input and generates training data for the neurotrigger in a tab separated, gzip compressed file."
46  );
47  // parameters for saving / loading
48  addParam("hitCollectionName", m_hitCollectionName,
49  "Name of the input StoreArray of CDCTriggerSegmentHits. The Axials need to have a relation to inputtracks",
50  std::string(""));
51  addParam("inputCollectionName", m_inputCollectionName,
52  "Name of the StoreArray holding the 2D input tracks.",
53  std::string("CDCTriggerNNInput2DTracks"));
54  addParam("targetCollectionName", m_targetCollectionName,
55  "Name of the MCParticle/RecoTrack collection used as target values.",
56  std::string("RecoTracks"));
57  addParam("trainOnRecoTracks", m_trainOnRecoTracks,
58  "If true, use RecoTracks as targets instead of MCParticles.",
59  true);
60  addParam("EventTimeName", m_EventTimeName,
61  "Name of the event time object.",
62  std::string("CDCTriggerNeuroETFT0"));
63  addParam("NeuroTrackInputMode", m_neuroTrackInputMode,
64  "When using real tracks, use neurotracks instead of 2dtracks as input to the neurotrigger. this is important to get the relations right.",
65  false);
66  addParam("singleUse", m_singleUse,
67  "Only use a track for a single expert", true);
68  addParam("configFileName", m_configFileName,
69  "Name of the configuration file. This File should be created by the CDCTriggerIDHistModule and will be extended in this module",
70  std::string(""));
71  addParam("writeconfigFileName", m_writeconfigFileName,
72  "Name of the configuration file, which will be written. If left blank, the same file as the input configuration file is used and it will be overwritten.",
73  std::string(""));
74  addParam("gzipFilename", m_filename,
75  "Name of the gzip file, where the training samples will be saved.",
76  std::string("out.gz"));
77 
78 
79 
80 
81  }
82  void
84  {
85  // register store objects
87  // decided wether to load mcparticles or recotracks as training targets.
88  // This has to be done because the train target values are obtained
89  // for both classes in a different way.
90  if (m_trainOnRecoTracks) {
92  targets.isRequired(m_targetCollectionName);
93  } else {
95  targets.isRequired(m_targetCollectionName);
96  }
97  // initialize the neurotrigger object, but use the parameters given in the module
98  if (m_configFileName != "") {
101  } else {
102  B2ERROR("The Neurotrigger needs to be initialized by a configuration file! Make sure to give the configuration file as a parameter.");
103  }
104  // in this version, we first need an idhistfile in prior to collect the training data.
105  // this idhistfile is created by running another steering file before
106  // and stored in the config file loaded in the previous step.
107  m_trainSet.clear();
109  // create an empty dataset of training data for each expert network
110  for (unsigned iMLP = 0; iMLP < m_NeuroTrigger.nSectors(); ++iMLP) {
111  m_trainSet.push_back(CDCTriggerMLPData());
112  int layerId = 3;
113  for (int iSL = 0; iSL < 9; ++iSL) {
114  m_trainSet[iMLP].addCounters(cdc.nWiresInLayer(layerId));
115  layerId += (iSL > 0 ? 6 : 7);
116  }
117  }
118  // this one sets up the other root store arrays needed to collect training data
120  // consistency check of training parameters
121  if (m_NeuroTrigger.nSectors() != m_trainSet.size()) {
122  B2ERROR("Number of ID sets (" << m_trainSet.size() << ") should match " <<
123  "number of sectors (" << m_NeuroTrigger.nSectors() << ")");
124  }
125  // overwrite previous file with empty file, in case it already exists
126  std::ofstream gzipfile4(m_filename, std::ios_base::trunc | std::ios_base::binary);
127  boost::iostreams::filtering_ostream outStream;
128  outStream.push(boost::iostreams::gzip_compressor());
129  outStream.push(gzipfile4);
131  outStream << sample.headline << std::endl;
132  }
133  void
135  {
136  StoreObjPtr<EventMetaData> evtmetadata;
137  for (int itrack = 0; itrack < m_tracks.getEntries(); ++itrack) {
138  // get related MCParticle/RecoTrack for target
139  // and retrieve track parameters
140  //float phi0Target = 0;
141  //float invptTarget = 0;
142  float thetaTarget = 0;
143  float zTarget = 0;
144  if (m_trainOnRecoTracks) {
145  RecoTrack* recoTrack =
146  m_tracks[itrack]->getRelatedTo<RecoTrack>(m_targetCollectionName);
147  if (!recoTrack) {
148  B2DEBUG(150, "Skipping CDCTriggerTrack without relation to RecoTrack.");
149  continue;
150  }
151  // a RecoTrack has multiple representations for different particle hypothesis
152  // -> just take the first one that does not give errors.
153  const std::vector<genfit::AbsTrackRep*>& reps = recoTrack->getRepresentations();
154  bool foundValidRep = false;
155  for (unsigned irep = 0; irep < reps.size() && !foundValidRep; ++irep) {
156  if (!recoTrack->wasFitSuccessful(reps[irep])) {
157  continue;
158  }
159 
160  // get state (position, momentum etc.) from hit closest to IP and
161  // extrapolate to z-axis (may throw an exception -> continue to next representation)
162  try {
164  recoTrack->getMeasuredStateOnPlaneClosestTo(ROOT::Math::XYZVector(0, 0, 0), reps[irep]);
165  reps[irep]->extrapolateToLine(state, TVector3(0, 0, -1000), TVector3(0, 0, 2000));
166  // flip tracks if necessary, such that trigger tracks and reco tracks
167  // point in the same direction
168  if (state.getMom().Dot(B2Vector3D(m_tracks[itrack]->getDirection())) < 0) {
169  state.setPosMom(state.getPos(), -state.getMom());
170  state.setChargeSign(-state.getCharge());
171  }
172  // get track parameters
173  //phi0Target = state.getMom().Phi();
174  //invptTarget = state.getCharge() / state.getMom().Pt();
175  thetaTarget = state.getMom().Theta();
176  zTarget = state.getPos().Z();
177  } catch (...) {
178  continue;
179  }
180  // break loop
181  foundValidRep = true;
182  }
183  if (!foundValidRep) {
184  B2DEBUG(150, "No valid representation found for RecoTrack, skipping.");
185  continue;
186  }
187  } else {
188  MCParticle* mcTrack =
189  m_tracks[itrack]->getRelatedTo<MCParticle>(m_targetCollectionName);
190  if (not mcTrack) {
191  B2DEBUG(150, "Skipping CDCTriggerTrack without relation to MCParticle.");
192  continue;
193  }
194  //phi0Target = mcTrack->getMomentum().Phi();
195  //invptTarget = mcTrack->getCharge() / mcTrack->getMomentum().Pt();
196  thetaTarget = mcTrack->getMomentum().Theta();
197  zTarget = mcTrack->getProductionVertex().Z();
198  }
200 
201  // find all matching sectors
202  float phi0 = m_tracks[itrack]->getPhi0();
203  float invpt = m_tracks[itrack]->getKappa(1.5);
204  float theta = atan2(1., m_tracks[itrack]->getCotTheta());
205  std::vector<int> sectors = m_NeuroTrigger.selectMLPsTrain(phi0, invpt, theta);
206  if (sectors.size() == 0) continue;
207 
208  // get target values
209  std::vector<float> targetRaw = {};
211  targetRaw.push_back(zTarget);
213  targetRaw.push_back(thetaTarget);
214  for (unsigned i = 0; i < sectors.size(); ++i) {
215  int isector = sectors[i];
216  std::vector<float> target = m_NeuroTrigger[isector].scaleTarget(targetRaw);
217  // skip out of range targets or rescale them
218  bool outOfRange = false;
219  for (unsigned itarget = 0; itarget < target.size(); ++itarget) {
220  if (fabs(target[itarget]) > 1.) {
221  outOfRange = true;
222  target[itarget] /= fabs(target[itarget]);
223  }
224  }
225  if (!m_neuroParameters.rescaleTarget && outOfRange) {
226  continue;
227  }
228  //
229  // read out or determine event time
231  // check hit pattern
232  unsigned long hitPattern = m_NeuroTrigger.getCompleteHitPattern(isector, *m_tracks[itrack], m_neuroTrackInputMode); // xxxxx0xxx
233  // sectorpattern holds the absolut necessary SLs for the expert
234  unsigned long sectorPattern = m_NeuroTrigger[isector].getSLpattern(); // 010100010
235  // sectorpatternmask holds the SLs, which are generally used to determine the right expert
236  unsigned long sectorPatternMask = m_NeuroTrigger[isector].getSLpatternMask(); // 010101010
237  B2DEBUG(250, "hitPattern " << hitPattern << " sectorPattern " << sectorPattern);
238  // if multiple experts should be trained with one track, the necessary SLs have to be in the hitpattern
239  if (!m_singleUse && sectorPattern > 0 && (sectorPattern & hitPattern) != sectorPattern) {
240  B2DEBUG(250, "hitPattern not matching " << (sectorPattern & hitPattern));
241  continue;
242  // if one track should only be used for one expert, the absolute necessary SLs for this expert should match exactly the set within the hitpattern of the generally used SLs for determining the expert.
243  } else if (m_singleUse && sectorPattern > 0 && (sectorPattern) != (hitPattern & sectorPatternMask)) {
244  // 010100010 != 0x0x0x0x0
245  B2DEBUG(250, "hitPattern not matching " << (sectorPatternMask & hitPattern));
246  continue;
247  }
248  // check, if enough axials are there. first, we select the axial bits from the
249  // hitpattern (341 = int('101010101',base=2)) and then check if the number of
250  // ones is equal or greater than 4.
251  if ((hitPattern & 341) != 341 && // this is an ugly workaround, because popcount is only
252  (hitPattern & 341) != 340 && // available with c++20 and newer
253  (hitPattern & 341) != 337 &&
254  (hitPattern & 341) != 325 &&
255  (hitPattern & 341) != 277 &&
256  (hitPattern & 341) != 85) {
257  B2DEBUG(250, "Not enough axial hits (<4), skipping!");
258  continue;
259  }
260  // get training data
261  std::vector<unsigned> hitIds;
262  if (m_neuroTrackInputMode) {
263  hitIds = m_NeuroTrigger.selectHitsHWSim(isector, *m_tracks[itrack]);
264  } else {
265  hitIds = m_NeuroTrigger.selectHits(isector, *m_tracks[itrack]);
266  }
267  // add a "sample" it is a full training set including some zeroes for the neurotrigger values (raw and scaled z and theta). Those are sometimes used in the training for reference, but cannot be added here without running *some* neuotrigger instance.
268  CDCTriggerMLPData::NeuroSet<27, 2> sample(m_NeuroTrigger.getInputVector(isector, hitIds).data(), target.data(),
269  evtmetadata->getExperiment(), evtmetadata->getRun(), evtmetadata->getSubrun(), evtmetadata->getEvent(), itrack, i,
270  m_tracks.getEntries(), 0, 0, 0, 0, phi0, theta, invpt);
271  //check whether we already have enough samples
272  m_trainSet[isector].addSample(sample);
273  if ((m_trainSet)[isector].nSamples() % 1000 == 0) {
274  B2DEBUG(50, m_trainSet[isector].nSamples() << " samples for training collected for sector " << isector);
275  }
276  std::ofstream gzipfile4(m_filename, std::ios_base::app | std::ios_base::binary);
277  boost::iostreams::filtering_ostream outStream;
278  outStream.push(boost::iostreams::gzip_compressor());
279  outStream.push(gzipfile4);
280  outStream << sample << std::endl;
281  m_trackcounter++;
282  }
283 
284  }
285  }
286  void
288  {
289  std::stringstream ss;
290  for (unsigned int i = 0; i < m_trainSet.size(); ++i) {
291  ss << "expert " << i << " : " << m_trainSet[i].nSamples() << ", ";
292  }
293  // lock the parameters, which were used to obtain the training data, so they cannot be altered in the next steps.
298  for (auto x : m_neuroParameters.maxHitsPerSL) {
299  x.lock();
300  }
301  if (m_writeconfigFileName == "") {
302  B2INFO("write writeconfig now: " + m_configFileName);
304  } else {
305  B2INFO("write writeconfig now: " + m_writeconfigFileName);
307  }
308 
309  B2DEBUG(10, "Collected events: " << ss.str());
310  }
312 }
Struct for training data of a single MLP for the neuro trigger.
std::string m_EventTimeName
name of the event time StoreObjPtr
std::vector< CDCTriggerMLPData > m_trainSet
Sets of training data for all sectors.
bool m_trainOnRecoTracks
Switch between MCParticles or RecoTracks as targets.
NeuroTriggerParameters m_neuroParameters
Parameters for the NeuroTrigger.
NeuroTrigger m_NeuroTrigger
Instance of the NeuroTrigger.
std::string m_targetCollectionName
Name of the MCParticles/RecoTracks collection used as target values.
bool m_neuroTrackInputMode
Switch to rescale out of range target values or ignore them.
std::string m_inputCollectionName
Name of the StoreArray containing the input 2D tracks.
StoreArray< CDCTriggerTrack > m_tracks
List of input tracks.
std::string m_filename
Name of gzip file where the training data are saved.
bool m_singleUse
use a track only once and not for every expert
std::string m_writeconfigFileName
Name of the configuration file used in the module to write the neuroparamters.
std::string m_hitCollectionName
Name of the StoreArray containing the input track segment hits.
std::string m_configFileName
Name of the configuration file used in the module to load the neuroparamters.
The Class for CDC Geometry Parameters.
static CDCGeometryPar & Instance(const CDCGeometry *=nullptr)
Static method to get a reference to the CDCGeometryPar instance.
A Class to store the Monte Carlo particle information.
Definition: MCParticle.h:32
ROOT::Math::XYZVector getProductionVertex() const
Return production vertex position.
Definition: MCParticle.h:189
ROOT::Math::XYZVector getMomentum() const
Return momentum.
Definition: MCParticle.h:198
Base class for Modules.
Definition: Module.h:72
void setDescription(const std::string &description)
Sets the description of the module.
Definition: Module.cc:214
void lock()
lock the variable, to make it read only
std::vector< NNTParam< unsigned short > > maxHitsPerSL
Maximum number of hits in a single super layer for all networks.
NNTParam< bool > rescaleTarget
flag to allow for target tracks lying out of the output range to be rescaled during training.
NNTParam< bool > targetTheta
train theta as output
NNTParam< unsigned > ETOption
Determine, how the event time should be obtained.
NNTParam< bool > targetZ
train z as output
const std::string et_option() const
return the string variant ov the et option
void loadconfigtxt(const std::string &filename)
load the configuration from a file
void saveconfigtxt(const std::string &filename)
save the configuration to a file
void initialize(const Parameters &p)
Set parameters and get some network independent parameters.
void updateTrack(const CDCTriggerTrack &track)
Calculate 2D phi position and arclength for the given track and store them.
std::vector< unsigned > selectHits(unsigned isector, const CDCTriggerTrack &track, bool returnAllRelevant=false)
Select best hits for each super layer.
void initializeCollections(std::string hitCollectionName, std::string eventTimeName, const std::string &et_option)
set the hit collection and event time to required and store the hit collection name
std::vector< float > getInputVector(unsigned isector, const std::vector< unsigned > &hitIds)
Calculate input values for MLP.
std::vector< unsigned > selectHitsHWSim(unsigned isector, const CDCTriggerTrack &track)
Select hits for each super layer from the ones related to input track.
std::vector< int > selectMLPsTrain(float phi0, float invpt, float theta)
Select all matching expert MLPs based on the given track parameters.
unsigned long getCompleteHitPattern(unsigned isector, const CDCTriggerTrack &track, const bool neurotrackinputmode)
Get complete hit pattern of neurotrack.
void getEventTime(unsigned isector, const CDCTriggerTrack &track, std::string et_option, const bool)
Read out the event time and store it.
unsigned nSectors() const
return number of neural networks
Definition: NeuroTrigger.h:164
This is the Reconstruction Event-Data Model Track.
Definition: RecoTrack.h:79
bool wasFitSuccessful(const genfit::AbsTrackRep *representation=nullptr) const
Returns true if the last fit with the given representation was successful.
Definition: RecoTrack.cc:336
const genfit::MeasuredStateOnPlane & getMeasuredStateOnPlaneClosestTo(const ROOT::Math::XYZVector &closestPoint, const genfit::AbsTrackRep *representation=nullptr)
Return genfit's MasuredStateOnPlane, that is closest to the given point useful for extrapolation of m...
Definition: RecoTrack.cc:426
const std::vector< genfit::AbsTrackRep * > & getRepresentations() const
Return a list of track representations. You are not allowed to modify or delete them!
Definition: RecoTrack.h:638
bool isRequired(const std::string &name="")
Ensure this array/object has been registered previously.
int getEntries() const
Get the number of objects in the array.
Definition: StoreArray.h:216
Type-safe access to single objects in the data store.
Definition: StoreObjPtr.h:96
#StateOnPlane with additional covariance matrix.
REG_MODULE(arichBtest)
Register the Module.
void addParam(const std::string &name, T &paramVariable, const std::string &description, const T &defaultValue)
Adds a new parameter to the module.
Definition: Module.h:560
virtual void initialize() override
Initialize the module.
virtual void event() override
Called once for each event.
virtual void terminate() override
Do the training for all sectors.
CDCTriggerNeuroDataModule()
Constructor, for setting module description and parameters.
Abstract base class for different kinds of events.
Struct to keep one set of training data for either training, validation or testing.