Belle II Software prerelease-10-00-00a
CDCTriggerNeuroDataModule.cc
1/**************************************************************************
2 * basf2 (Belle II Analysis Software Framework) *
3 * Author: The Belle II Collaboration *
4 * *
5 * See git log for contributors and copyright holders. *
6 * This file is licensed under LGPL-3.0, see LICENSE.md. *
7 **************************************************************************/
8
9
10#include "trg/cdc/modules/neurotrigger/CDCTriggerNeuroDataModule.h"
11
12#include <fstream>
13#include <framework/datastore/StoreArray.h>
14#include <mdst/dataobjects/MCParticle.h>
15#include <tracking/dataobjects/RecoTrack.h>
16#include <trg/cdc/dataobjects/CDCTriggerSegmentHit.h>
17#include <trg/cdc/dataobjects/CDCTriggerTrack.h>
18#include <framework/datastore/StoreObjPtr.h>
19#include <framework/dataobjects/EventMetaData.h>
20#include <framework/core/ModuleParam.templateDetails.h>
21
22#include <cdc/geometry/CDCGeometryPar.h>
23#include <framework/gearbox/Unit.h>
24#include <framework/geometry/B2Vector3.h>
25
26
27#include <iostream>
28#include <cmath>
29#include "boost/iostreams/filter/gzip.hpp"
30#include "boost/iostreams/filtering_streambuf.hpp"
31#include "boost/iostreams/filtering_stream.hpp"
32#include "boost/multi_array.hpp"
33#define BOOST_MULTI_ARRAY_NO_GENERATORS
34
35namespace Belle2 {
40 REG_MODULE(CDCTriggerNeuroData);
41
43 {
45 "This module takes 2dtracks, track segments, and targettracks (either recotracks or mcparticles) as input and generates training data for the neurotrigger in a tab separated, gzip compressed file."
46 );
47 // parameters for saving / loading
48 addParam("hitCollectionName", m_hitCollectionName,
49 "Name of the input StoreArray of CDCTriggerSegmentHits. The Axials need to have a relation to inputtracks",
50 std::string(""));
51 addParam("inputCollectionName", m_inputCollectionName,
52 "Name of the StoreArray holding the 2D input tracks.",
53 std::string("CDCTriggerNNInput2DTracks"));
54 addParam("targetCollectionName", m_targetCollectionName,
55 "Name of the MCParticle/RecoTrack collection used as target values.",
56 std::string("RecoTracks"));
57 addParam("trainOnRecoTracks", m_trainOnRecoTracks,
58 "If true, use RecoTracks as targets instead of MCParticles.",
59 true);
60 addParam("EventTimeName", m_EventTimeName,
61 "Name of the event time object.",
62 std::string("CDCTriggerNeuroETFT0"));
63 addParam("NeuroTrackInputMode", m_neuroTrackInputMode,
64 "When using real tracks, use neurotracks instead of 2dtracks as input to the neurotrigger. this is important to get the relations right.",
65 false);
66 addParam("singleUse", m_singleUse,
67 "Only use a track for a single expert", true);
68 addParam("configFileName", m_configFileName,
69 "Name of the configuration file. This File should be created by the CDCTriggerIDHistModule and will be extended in this module",
70 std::string(""));
71 addParam("writeconfigFileName", m_writeconfigFileName,
72 "Name of the configuration file, which will be written. If left blank, the same file as the input configuration file is used and it will be overwritten.",
73 std::string(""));
74 addParam("gzipFilename", m_filename,
75 "Name of the gzip file, where the training samples will be saved.",
76 std::string("out.gz"));
77 addParam("SaveFakeTrack", m_SaveFakeTrack,
78 "Flag for whether save fake track or not",
79 false);
80
81
82
83 }
84 void
86 {
87 // register store objects
89 // decided whether to load mcparticles or recotracks as training targets.
90 // This has to be done because the train target values are obtained
91 // for both classes in a different way.
94 targets.isRequired(m_targetCollectionName);
95 } else {
97 targets.isRequired(m_targetCollectionName);
98 }
99 // initialize the neurotrigger object, but use the parameters given in the module
100 if (m_configFileName != "") {
101 m_neuroParameters.loadconfigtxt(m_configFileName);
103 } else {
104 B2ERROR("The Neurotrigger needs to be initialized by a configuration file! Make sure to give the configuration file as a parameter.");
105 }
106 // in this version, we first need an idhistfile in prior to collect the training data.
107 // this idhistfile is created by running another steering file before
108 // and stored in the config file loaded in the previous step.
109 m_trainSet.clear();
111 // create an empty dataset of training data for each expert network
112 for (unsigned iMLP = 0; iMLP < m_NeuroTrigger.nSectors(); ++iMLP) {
113 m_trainSet.push_back(CDCTriggerMLPData());
114 int layerId = 3;
115 for (int iSL = 0; iSL < 9; ++iSL) {
116 m_trainSet[iMLP].addCounters(cdc.nWiresInLayer(layerId));
117 layerId += (iSL > 0 ? 6 : 7);
118 }
119 }
120 // this one sets up the other root store arrays needed to collect training data
121 m_NeuroTrigger.initializeCollections(m_hitCollectionName, m_EventTimeName, m_neuroParameters.et_option());
122 // consistency check of training parameters
123 if (m_NeuroTrigger.nSectors() != m_trainSet.size()) {
124 B2ERROR("Number of ID sets (" << m_trainSet.size() << ") should match " <<
125 "number of sectors (" << m_NeuroTrigger.nSectors() << ")");
126 }
127 // overwrite previous file with empty file, in case it already exists
128 std::ofstream gzipfile4(m_filename, std::ios_base::trunc | std::ios_base::binary);
129 boost::iostreams::filtering_ostream outStream;
130 outStream.push(boost::iostreams::gzip_compressor());
131 outStream.push(gzipfile4);
132 int inlen = m_neuroParameters.AdditionWireMode ? 9 * (3 + m_neuroParameters.AdditionInputPerSL) : 27 ;
133 CDCTriggerMLPData::NeuroSet sample(inlen);
134 outStream << sample.headline << std::endl;
135 }
136 void
138 {
139 StoreObjPtr<EventMetaData> evtmetadata;
140 B2DEBUG(150, Form("Number of Track : %d", m_tracks.getEntries()));
141 for (int itrack = 0; itrack < m_tracks.getEntries(); ++itrack) {
142 // get related MCParticle/RecoTrack for target
143 // and retrieve track parameters
144 //float phi0Target = 0;
145 //float invptTarget = 0;
146 float thetaTarget = 0;
147 float zTarget = 0;
148 float PtTarget = 0;
149 bool isFakeTrack = false;
150
152 RecoTrack* recoTrack =
153 m_tracks[itrack]->getRelatedTo<RecoTrack>(m_targetCollectionName);
154 if (!recoTrack) {
155 if (m_SaveFakeTrack)
156 isFakeTrack = true;
157 else {
158 B2DEBUG(150, "Skipping CDCTriggerTrack without relation to RecoTrack.");
159 continue;
160 }
161 } else {
162 // a RecoTrack has multiple representations for different particle hypothesis
163 // -> just take the first one that does not give errors.
164 const std::vector<genfit::AbsTrackRep*>& reps = recoTrack->getRepresentations();
165 bool foundValidRep = false;
166 for (unsigned irep = 0; irep < reps.size() && !foundValidRep; ++irep) {
167 if (!recoTrack->wasFitSuccessful(reps[irep])) {
168 continue;
169 }
170
171 // get state (position, momentum etc.) from hit closest to IP and
172 // extrapolate to z-axis (may throw an exception -> continue to next representation)
173 try {
174 genfit::MeasuredStateOnPlane state =
175 recoTrack->getMeasuredStateOnPlaneClosestTo(ROOT::Math::XYZVector(0, 0, 0), reps[irep]);
176 reps[irep]->extrapolateToLine(state, TVector3(0, 0, -1000), TVector3(0, 0, 2000));
177 // flip tracks if necessary, such that trigger tracks and reco tracks
178 // point in the same direction
179 if (state.getMom().Dot(B2Vector3D(m_tracks[itrack]->getDirection())) < 0) {
180 state.setPosMom(state.getPos(), -state.getMom());
181 state.setChargeSign(-state.getCharge());
182 }
183 // get track parameters
184 //phi0Target = state.getMom().Phi();
185 //invptTarget = state.getCharge() / state.getMom().Pt();
186 thetaTarget = state.getMom().Theta();
187 zTarget = state.getPos().Z();
188 PtTarget = state.getMom().Pt();
189 } catch (...) {
190 continue;
191 }
192 // break loop
193 foundValidRep = true;
194 }
195 if (!foundValidRep) {
196 B2DEBUG(150, "No valid representation found for RecoTrack, skipping.");
197 continue;
198 }
199 }
200 } else {
201 MCParticle* mcTrack =
202 m_tracks[itrack]->getRelatedTo<MCParticle>(m_targetCollectionName);
203 if (!mcTrack) {
204 if (m_SaveFakeTrack)
205 isFakeTrack = true;
206 else {
207 B2DEBUG(150, "Skipping CDCTriggerTrack without relation to MCParticle.");
208 continue;
209 }
210 } else {
211 //phi0Target = mcTrack->getMomentum().Phi();
212 //invptTarget = mcTrack->getCharge() / mcTrack->getMomentum().rho();
213 thetaTarget = mcTrack->getMomentum().Theta();
214 zTarget = mcTrack->getProductionVertex().Z();
215 PtTarget = mcTrack->getMomentum().rho();
216 }
217 }
218 m_NeuroTrigger.updateTrack(*m_tracks[itrack]);
219
220 // find all matching sectors
221 float phi0 = m_tracks[itrack]->getPhi0();
222 float invpt = m_tracks[itrack]->getKappa(1.5);
223 float theta = atan2(1., m_tracks[itrack]->getCotTheta());
224 std::vector<int> sectors = m_NeuroTrigger.selectMLPsTrain(phi0, invpt, theta);
225 if (sectors.size() == 0) continue;
226
227 // get target values
228 std::vector<float> targetRaw = {};
229 if (!isFakeTrack) {
230 if (m_neuroParameters.targetZ)
231 targetRaw.push_back(zTarget);
232 if (m_neuroParameters.targetTheta)
233 targetRaw.push_back(thetaTarget);
234 }
235 for (unsigned i = 0; i < sectors.size(); ++i) {
236 int isector = sectors[i];
237 std::vector<float> target;
238 if (isFakeTrack) {
239 target.push_back(2);
240 target.push_back(2);
241 } else {
242 target = m_NeuroTrigger[isector].scaleTarget(targetRaw);
243 // skip out of range targets or rescale them
244 bool outOfRange = false;
245 for (unsigned itarget = 0; itarget < target.size(); ++itarget) {
246 if (fabs(target[itarget]) > 1.) {
247 outOfRange = true;
248 target[itarget] /= fabs(target[itarget]);
249 }
250 }
251 if (!m_neuroParameters.rescaleTarget && outOfRange) {
252 continue;
253 }
254 }
255
256 //
257 // read out or determine event time
258 m_NeuroTrigger.getEventTime(isector, *m_tracks[itrack], m_neuroParameters.et_option(), m_neuroTrackInputMode);
259 // check hit pattern
260 unsigned long hitPattern = m_NeuroTrigger.getCompleteHitPattern(isector, *m_tracks[itrack], m_neuroTrackInputMode); // xxxxx0xxx
261 // sectorpattern holds the absolute necessary SLs for the expert
262 unsigned long sectorPattern = m_NeuroTrigger[isector].getSLpattern(); // 010100010
263 // sectorpatternmask holds the SLs, which are generally used to determine the right expert
264 unsigned long sectorPatternMask = m_NeuroTrigger[isector].getSLpatternMask(); // 010101010
265 B2DEBUG(250, "hitPattern " << hitPattern << " sectorPattern " << sectorPattern);
266 // if multiple experts should be trained with one track, the necessary SLs have to be in the hitpattern
267 if (!m_singleUse && sectorPattern > 0 && (sectorPattern & hitPattern) != sectorPattern) {
268 B2DEBUG(250, "hitPattern not matching " << (sectorPattern & hitPattern));
269 continue;
270 // if one track should only be used for one expert, the absolute necessary SLs for this expert should match exactly the set within the hitpattern of the generally used SLs for determining the expert.
271 } else if (m_singleUse && sectorPattern > 0 && (sectorPattern) != (hitPattern & sectorPatternMask)) {
272 // 010100010 != 0x0x0x0x0
273 B2DEBUG(250, "hitPattern not matching " << (sectorPatternMask & hitPattern));
274 continue;
275 }
276 // check, if enough axials are there. first, we select the axial bits from the
277 // hitpattern (341 = int('101010101',base=2)) and then check if the number of
278 // ones is equal or greater than 4.
279 int count = 0;
280 unsigned axialPattern = hitPattern & 0b101010101;
281 while (axialPattern) {
282 count += axialPattern & 1;
283 axialPattern >>= 1;
284 }
285 if (count < 4) {
286 B2DEBUG(250, "Not enough axial hits (<4), skipping!");
287 continue;
288 }
289 // get training data
290 std::vector<unsigned> hitIds;
292 hitIds = m_NeuroTrigger.selectHitsHWSim(isector, *m_tracks[itrack]);
293 } else {
294 hitIds = m_NeuroTrigger.selectHits(isector, *m_tracks[itrack]);
295 }
296 // add a "sample" it is a full training set including some zeroes for the neurotrigger values (raw and scaled z and theta). Those are sometimes used in the training for reference, but cannot be added here without running *some* neuotrigger instance.
297 CDCTriggerMLPData::NeuroSet sample(m_NeuroTrigger.getInputVector(isector, hitIds), target,
298 evtmetadata->getExperiment(), evtmetadata->getRun(), evtmetadata->getSubrun(), evtmetadata->getEvent(), itrack, i,
299 m_tracks.getEntries(), 0, 0, 0, 0, phi0, theta, invpt, PtTarget);
300 //check whether we already have enough samples
301 m_trainSet[isector].addSample(sample);
302 if ((m_trainSet)[isector].nSamples() % 1000 == 0) {
303 B2DEBUG(50, m_trainSet[isector].nSamples() << " samples for training collected for sector " << isector);
304 }
305 std::ofstream gzipfile4(m_filename, std::ios_base::app | std::ios_base::binary);
306 boost::iostreams::filtering_ostream outStream;
307 outStream.push(boost::iostreams::gzip_compressor());
308 outStream.push(gzipfile4);
309
310 outStream << sample << std::endl;
312 }
313
314 }
315 }
316 void
318 {
319 std::stringstream ss;
320 for (unsigned int i = 0; i < m_trainSet.size(); ++i) {
321 ss << "expert " << i << " : " << m_trainSet[i].nSamples() << ", ";
322 }
323 // lock the parameters, which were used to obtain the training data, so they cannot be altered in the next steps.
324 m_neuroParameters.ETOption.lock();
325 m_neuroParameters.rescaleTarget.lock();
326 m_neuroParameters.targetZ.lock();
327 m_neuroParameters.targetTheta.lock();
328 for (auto x : m_neuroParameters.maxHitsPerSL) {
329 x.lock();
330 }
331 if (m_writeconfigFileName == "") {
332 B2INFO("write writeconfig now: " + m_configFileName);
333 m_neuroParameters.saveconfigtxt(m_configFileName);
334 } else {
335 B2INFO("write writeconfig now: " + m_writeconfigFileName);
337 }
338
339 B2DEBUG(10, "Collected events: " << ss.str());
340 }
341
342}
Struct for training data of a single MLP for the neuro trigger.
std::string m_EventTimeName
name of the event time StoreObjPtr
std::vector< CDCTriggerMLPData > m_trainSet
Sets of training data for all sectors.
bool m_trainOnRecoTracks
Switch between MCParticles or RecoTracks as targets.
NeuroTriggerParameters m_neuroParameters
Parameters for the NeuroTrigger.
NeuroTrigger m_NeuroTrigger
Instance of the NeuroTrigger.
std::string m_targetCollectionName
Name of the MCParticles/RecoTracks collection used as target values.
bool m_neuroTrackInputMode
Switch to rescale out of range target values or ignore them.
std::string m_inputCollectionName
Name of the StoreArray containing the input 2D tracks.
StoreArray< CDCTriggerTrack > m_tracks
List of input tracks.
std::string m_filename
Name of gzip file where the training data are saved.
bool m_singleUse
use a track only once and not for every expert
std::string m_writeconfigFileName
Name of the configuration file used in the module to write the neuroparamters.
std::string m_hitCollectionName
Name of the StoreArray containing the input track segment hits.
bool m_SaveFakeTrack
Flag to save Fake track.
std::string m_configFileName
Name of the configuration file used in the module to load the neuroparamters.
The Class for CDC Geometry Parameters.
static CDCGeometryPar & Instance(const CDCGeometry *=nullptr)
Static method to get a reference to the CDCGeometryPar instance.
A Class to store the Monte Carlo particle information.
Definition MCParticle.h:32
ROOT::Math::XYZVector getProductionVertex() const
Return production vertex position.
Definition MCParticle.h:178
ROOT::Math::XYZVector getMomentum() const
Return momentum.
Definition MCParticle.h:187
void setDescription(const std::string &description)
Sets the description of the module.
Definition Module.cc:214
Module()
Constructor.
Definition Module.cc:30
This is the Reconstruction Event-Data Model Track.
Definition RecoTrack.h:79
const std::vector< genfit::AbsTrackRep * > & getRepresentations() const
Return a list of track representations. You are not allowed to modify or delete them!
Definition RecoTrack.h:638
bool wasFitSuccessful(const genfit::AbsTrackRep *representation=nullptr) const
Returns true if the last fit with the given representation was successful.
Definition RecoTrack.cc:336
const genfit::MeasuredStateOnPlane & getMeasuredStateOnPlaneClosestTo(const ROOT::Math::XYZVector &closestPoint, const genfit::AbsTrackRep *representation=nullptr)
Return genfit's MasuredStateOnPlane, that is closest to the given point useful for extrapolation of m...
Definition RecoTrack.cc:426
Accessor to arrays stored in the data store.
Definition StoreArray.h:113
Type-safe access to single objects in the data store.
Definition StoreObjPtr.h:96
void addParam(const std::string &name, T &paramVariable, const std::string &description, const T &defaultValue)
Adds a new parameter to the module.
Definition Module.h:559
#define REG_MODULE(moduleName)
Register the given module (without 'Module' suffix) with the framework.
Definition Module.h:649
B2Vector3< double > B2Vector3D
typedef for common usage with double
Definition B2Vector3.h:516
virtual void initialize() override
Initialize the module.
virtual void event() override
Called once for each event.
virtual void terminate() override
Do the training for all sectors.
CDCTriggerNeuroDataModule()
Constructor, for setting module description and parameters.
Abstract base class for different kinds of events.
Struct to keep one set of training data for either training, validation or testing.