Belle II Software development
CDCTriggerNeuroDataModule.cc
1/**************************************************************************
2 * basf2 (Belle II Analysis Software Framework) *
3 * Author: The Belle II Collaboration *
4 * *
5 * See git log for contributors and copyright holders. *
6 * This file is licensed under LGPL-3.0, see LICENSE.md. *
7 **************************************************************************/
8
9
10#include "trg/cdc/modules/neurotrigger/CDCTriggerNeuroDataModule.h"
11
12#include <fstream>
13#include <framework/datastore/StoreArray.h>
14#include <mdst/dataobjects/MCParticle.h>
15#include <tracking/dataobjects/RecoTrack.h>
16#include <trg/cdc/dataobjects/CDCTriggerSegmentHit.h>
17#include <trg/cdc/dataobjects/CDCTriggerTrack.h>
18#include <framework/datastore/StoreObjPtr.h>
19#include <framework/dataobjects/EventMetaData.h>
20#include <framework/core/ModuleParam.templateDetails.h>
21
22#include <cdc/geometry/CDCGeometryPar.h>
23#include <framework/gearbox/Unit.h>
24#include <framework/geometry/B2Vector3.h>
25
26
27#include <iostream>
28#include <cmath>
29#include "boost/iostreams/filter/gzip.hpp"
30#include "boost/iostreams/filtering_streambuf.hpp"
31#include "boost/iostreams/filtering_stream.hpp"
32#include "boost/multi_array.hpp"
33#define BOOST_MULTI_ARRAY_NO_GENERATORS
34
35namespace Belle2 {
41 REG_MODULE(CDCTriggerNeuroData);
42
44 {
46 "This module takes 2dtracks, track segments, and targettracks (either recotracks or mcparticles) as input and generates training data for the neurotrigger in a tab separated, gzip compressed file."
47 );
48 // parameters for saving / loading
49 addParam("hitCollectionName", m_hitCollectionName,
50 "Name of the input StoreArray of CDCTriggerSegmentHits. The Axials need to have a relation to inputtracks",
51 std::string(""));
52 addParam("inputCollectionName", m_inputCollectionName,
53 "Name of the StoreArray holding the 2D input tracks.",
54 std::string("CDCTriggerNNInput2DTracks"));
55 addParam("targetCollectionName", m_targetCollectionName,
56 "Name of the MCParticle/RecoTrack collection used as target values.",
57 std::string("RecoTracks"));
58 addParam("trainOnRecoTracks", m_trainOnRecoTracks,
59 "If true, use RecoTracks as targets instead of MCParticles.",
60 true);
61 addParam("EventTimeName", m_EventTimeName,
62 "Name of the event time object.",
63 std::string("CDCTriggerNeuroETFT0"));
64 addParam("NeuroTrackInputMode", m_neuroTrackInputMode,
65 "When using real tracks, use neurotracks instead of 2dtracks as input to the neurotrigger. this is important to get the relations right.",
66 false);
67 addParam("singleUse", m_singleUse,
68 "Only use a track for a single expert", true);
69 addParam("configFileName", m_configFileName,
70 "Name of the configuration file. This File should be created by the CDCTriggerIDHistModule and will be extended in this module",
71 std::string(""));
72 addParam("writeconfigFileName", m_writeconfigFileName,
73 "Name of the configuration file, which will be written. If left blank, the same file as the input configuration file is used and it will be overwritten.",
74 std::string(""));
75 addParam("gzipFilename", m_filename,
76 "Name of the gzip file, where the training samples will be saved.",
77 std::string("out.gz"));
78 addParam("SaveFakeTrack", m_SaveFakeTrack,
79 "Flag for whether save fake track or not",
80 false);
81
82
83
84 }
85 void
87 {
88 // register store objects
90 // decided whether to load mcparticles or recotracks as training targets.
91 // This has to be done because the train target values are obtained
92 // for both classes in a different way.
95 targets.isRequired(m_targetCollectionName);
96 } else {
98 targets.isRequired(m_targetCollectionName);
99 }
100 // initialize the neurotrigger object, but use the parameters given in the module
101 if (m_configFileName != "") {
102 m_neuroParameters.loadconfigtxt(m_configFileName);
104 } else {
105 B2ERROR("The Neurotrigger needs to be initialized by a configuration file! Make sure to give the configuration file as a parameter.");
106 }
107 // in this version, we first need an idhistfile in prior to collect the training data.
108 // this idhistfile is created by running another steering file before
109 // and stored in the config file loaded in the previous step.
110 m_trainSet.clear();
112 // create an empty dataset of training data for each expert network
113 for (unsigned iMLP = 0; iMLP < m_NeuroTrigger.nSectors(); ++iMLP) {
114 m_trainSet.push_back(CDCTriggerMLPData());
115 int layerId = 3;
116 for (int iSL = 0; iSL < 9; ++iSL) {
117 m_trainSet[iMLP].addCounters(cdc.nWiresInLayer(layerId));
118 layerId += (iSL > 0 ? 6 : 7);
119 }
120 }
121 // this one sets up the other root store arrays needed to collect training data
122 m_NeuroTrigger.initializeCollections(m_hitCollectionName, m_EventTimeName, m_neuroParameters.et_option());
123 // consistency check of training parameters
124 if (m_NeuroTrigger.nSectors() != m_trainSet.size()) {
125 B2ERROR("Number of ID sets (" << m_trainSet.size() << ") should match " <<
126 "number of sectors (" << m_NeuroTrigger.nSectors() << ")");
127 }
128 // overwrite previous file with empty file, in case it already exists
129 std::ofstream gzipfile4(m_filename, std::ios_base::trunc | std::ios_base::binary);
130 boost::iostreams::filtering_ostream outStream;
131 outStream.push(boost::iostreams::gzip_compressor());
132 outStream.push(gzipfile4);
133 int inlen = m_neuroParameters.AdditionWireMode ? 9 * (3 + m_neuroParameters.AdditionInputPerSL) : 27 ;
134 CDCTriggerMLPData::NeuroSet sample(inlen);
135 outStream << sample.headline << std::endl;
136 }
137 void
139 {
140 StoreObjPtr<EventMetaData> evtmetadata;
141 B2DEBUG(150, Form("Number of Track : %d", m_tracks.getEntries()));
142 for (int itrack = 0; itrack < m_tracks.getEntries(); ++itrack) {
143 // get related MCParticle/RecoTrack for target
144 // and retrieve track parameters
145 //float phi0Target = 0;
146 //float invptTarget = 0;
147 float thetaTarget = 0;
148 float zTarget = 0;
149 float PtTarget = 0;
150 bool isFakeTrack = false;
151
153 RecoTrack* recoTrack =
154 m_tracks[itrack]->getRelatedTo<RecoTrack>(m_targetCollectionName);
155 if (!recoTrack) {
156 if (m_SaveFakeTrack)
157 isFakeTrack = true;
158 else {
159 B2DEBUG(150, "Skipping CDCTriggerTrack without relation to RecoTrack.");
160 continue;
161 }
162 } else {
163 // a RecoTrack has multiple representations for different particle hypothesis
164 // -> just take the first one that does not give errors.
165 const std::vector<genfit::AbsTrackRep*>& reps = recoTrack->getRepresentations();
166 bool foundValidRep = false;
167 for (unsigned irep = 0; irep < reps.size() && !foundValidRep; ++irep) {
168 if (!recoTrack->wasFitSuccessful(reps[irep])) {
169 continue;
170 }
171
172 // get state (position, momentum etc.) from hit closest to IP and
173 // extrapolate to z-axis (may throw an exception -> continue to next representation)
174 try {
175 genfit::MeasuredStateOnPlane state =
176 recoTrack->getMeasuredStateOnPlaneClosestTo(ROOT::Math::XYZVector(0, 0, 0), reps[irep]);
177 reps[irep]->extrapolateToLine(state, TVector3(0, 0, -1000), TVector3(0, 0, 2000));
178 // flip tracks if necessary, such that trigger tracks and reco tracks
179 // point in the same direction
180 if (state.getMom().Dot(B2Vector3D(m_tracks[itrack]->getDirection())) < 0) {
181 state.setPosMom(state.getPos(), -state.getMom());
182 state.setChargeSign(-state.getCharge());
183 }
184 // get track parameters
185 //phi0Target = state.getMom().Phi();
186 //invptTarget = state.getCharge() / state.getMom().Pt();
187 thetaTarget = state.getMom().Theta();
188 zTarget = state.getPos().Z();
189 PtTarget = state.getMom().Pt();
190 } catch (...) {
191 continue;
192 }
193 // break loop
194 foundValidRep = true;
195 }
196 if (!foundValidRep) {
197 B2DEBUG(150, "No valid representation found for RecoTrack, skipping.");
198 continue;
199 }
200 }
201 } else {
202 MCParticle* mcTrack =
203 m_tracks[itrack]->getRelatedTo<MCParticle>(m_targetCollectionName);
204 if (!mcTrack) {
205 if (m_SaveFakeTrack)
206 isFakeTrack = true;
207 else {
208 B2DEBUG(150, "Skipping CDCTriggerTrack without relation to MCParticle.");
209 continue;
210 }
211 } else {
212 //phi0Target = mcTrack->getMomentum().Phi();
213 //invptTarget = mcTrack->getCharge() / mcTrack->getMomentum().rho();
214 thetaTarget = mcTrack->getMomentum().Theta();
215 zTarget = mcTrack->getProductionVertex().Z();
216 PtTarget = mcTrack->getMomentum().rho();
217 }
218 }
219 m_NeuroTrigger.updateTrack(*m_tracks[itrack]);
220
221 // find all matching sectors
222 float phi0 = m_tracks[itrack]->getPhi0();
223 float invpt = m_tracks[itrack]->getKappa(1.5);
224 float theta = atan2(1., m_tracks[itrack]->getCotTheta());
225 std::vector<int> sectors = m_NeuroTrigger.selectMLPsTrain(phi0, invpt, theta);
226 if (sectors.size() == 0) continue;
227
228 // get target values
229 std::vector<float> targetRaw = {};
230 if (!isFakeTrack) {
231 if (m_neuroParameters.targetZ)
232 targetRaw.push_back(zTarget);
233 if (m_neuroParameters.targetTheta)
234 targetRaw.push_back(thetaTarget);
235 }
236 for (unsigned i = 0; i < sectors.size(); ++i) {
237 int isector = sectors[i];
238 std::vector<float> target;
239 if (isFakeTrack) {
240 target.push_back(2);
241 target.push_back(2);
242 } else {
243 target = m_NeuroTrigger[isector].scaleTarget(targetRaw);
244 // skip out of range targets or rescale them
245 bool outOfRange = false;
246 for (unsigned itarget = 0; itarget < target.size(); ++itarget) {
247 if (fabs(target[itarget]) > 1.) {
248 outOfRange = true;
249 target[itarget] /= fabs(target[itarget]);
250 }
251 }
252 if (!m_neuroParameters.rescaleTarget && outOfRange) {
253 continue;
254 }
255 }
256
257 //
258 // read out or determine event time
259 m_NeuroTrigger.getEventTime(isector, *m_tracks[itrack], m_neuroParameters.et_option(), m_neuroTrackInputMode);
260 // check hit pattern
261 unsigned long hitPattern = m_NeuroTrigger.getCompleteHitPattern(isector, *m_tracks[itrack], m_neuroTrackInputMode); // xxxxx0xxx
262 // sectorpattern holds the absolute necessary SLs for the expert
263 unsigned long sectorPattern = m_NeuroTrigger[isector].getSLpattern(); // 010100010
264 // sectorpatternmask holds the SLs, which are generally used to determine the right expert
265 unsigned long sectorPatternMask = m_NeuroTrigger[isector].getSLpatternMask(); // 010101010
266 B2DEBUG(250, "hitPattern " << hitPattern << " sectorPattern " << sectorPattern);
267 // if multiple experts should be trained with one track, the necessary SLs have to be in the hitpattern
268 if (!m_singleUse && sectorPattern > 0 && (sectorPattern & hitPattern) != sectorPattern) {
269 B2DEBUG(250, "hitPattern not matching " << (sectorPattern & hitPattern));
270 continue;
271 // if one track should only be used for one expert, the absolute necessary SLs for this expert should match exactly the set within the hitpattern of the generally used SLs for determining the expert.
272 } else if (m_singleUse && sectorPattern > 0 && (sectorPattern) != (hitPattern & sectorPatternMask)) {
273 // 010100010 != 0x0x0x0x0
274 B2DEBUG(250, "hitPattern not matching " << (sectorPatternMask & hitPattern));
275 continue;
276 }
277 // check, if enough axials are there. first, we select the axial bits from the
278 // hitpattern (341 = int('101010101',base=2)) and then check if the number of
279 // ones is equal or greater than 4.
280 int count = 0;
281 unsigned axialPattern = hitPattern & 0b101010101;
282 while (axialPattern) {
283 count += axialPattern & 1;
284 axialPattern >>= 1;
285 }
286 if (count < 4) {
287 B2DEBUG(250, "Not enough axial hits (<4), skipping!");
288 continue;
289 }
290 // get training data
291 std::vector<unsigned> hitIds;
293 hitIds = m_NeuroTrigger.selectHitsHWSim(isector, *m_tracks[itrack]);
294 } else {
295 hitIds = m_NeuroTrigger.selectHits(isector, *m_tracks[itrack]);
296 }
297 // add a "sample" it is a full training set including some zeroes for the neurotrigger values (raw and scaled z and theta). Those are sometimes used in the training for reference, but cannot be added here without running *some* neuotrigger instance.
298 CDCTriggerMLPData::NeuroSet sample(m_NeuroTrigger.getInputVector(isector, hitIds), target,
299 evtmetadata->getExperiment(), evtmetadata->getRun(), evtmetadata->getSubrun(), evtmetadata->getEvent(), itrack, i,
300 m_tracks.getEntries(), 0, 0, 0, 0, phi0, theta, invpt, PtTarget);
301 //check whether we already have enough samples
302 m_trainSet[isector].addSample(sample);
303 if ((m_trainSet)[isector].nSamples() % 1000 == 0) {
304 B2DEBUG(50, m_trainSet[isector].nSamples() << " samples for training collected for sector " << isector);
305 }
306 std::ofstream gzipfile4(m_filename, std::ios_base::app | std::ios_base::binary);
307 boost::iostreams::filtering_ostream outStream;
308 outStream.push(boost::iostreams::gzip_compressor());
309 outStream.push(gzipfile4);
310
311 outStream << sample << std::endl;
313 }
314
315 }
316 }
317 void
319 {
320 std::stringstream ss;
321 for (unsigned int i = 0; i < m_trainSet.size(); ++i) {
322 ss << "expert " << i << " : " << m_trainSet[i].nSamples() << ", ";
323 }
324 // lock the parameters, which were used to obtain the training data, so they cannot be altered in the next steps.
325 m_neuroParameters.ETOption.lock();
326 m_neuroParameters.rescaleTarget.lock();
327 m_neuroParameters.targetZ.lock();
328 m_neuroParameters.targetTheta.lock();
329 for (auto x : m_neuroParameters.maxHitsPerSL) {
330 x.lock();
331 }
332 if (m_writeconfigFileName == "") {
333 B2INFO("write writeconfig now: " + m_configFileName);
334 m_neuroParameters.saveconfigtxt(m_configFileName);
335 } else {
336 B2INFO("write writeconfig now: " + m_writeconfigFileName);
338 }
339
340 B2DEBUG(10, "Collected events: " << ss.str());
341 }
342
343}
Struct for training data of a single MLP for the neuro trigger.
std::string m_EventTimeName
name of the event time StoreObjPtr
std::vector< CDCTriggerMLPData > m_trainSet
Sets of training data for all sectors.
bool m_trainOnRecoTracks
Switch between MCParticles or RecoTracks as targets.
NeuroTriggerParameters m_neuroParameters
Parameters for the NeuroTrigger.
NeuroTrigger m_NeuroTrigger
Instance of the NeuroTrigger.
std::string m_targetCollectionName
Name of the MCParticles/RecoTracks collection used as target values.
bool m_neuroTrackInputMode
Switch to rescale out of range target values or ignore them.
std::string m_inputCollectionName
Name of the StoreArray containing the input 2D tracks.
StoreArray< CDCTriggerTrack > m_tracks
List of input tracks.
std::string m_filename
Name of gzip file where the training data are saved.
bool m_singleUse
use a track only once and not for every expert
std::string m_writeconfigFileName
Name of the configuration file used in the module to write the neuroparamters.
std::string m_hitCollectionName
Name of the StoreArray containing the input track segment hits.
bool m_SaveFakeTrack
Flag to save Fake track.
std::string m_configFileName
Name of the configuration file used in the module to load the neuroparamters.
The Class for CDC Geometry Parameters.
static CDCGeometryPar & Instance(const CDCGeometry *=nullptr)
Static method to get a reference to the CDCGeometryPar instance.
A Class to store the Monte Carlo particle information.
Definition MCParticle.h:32
ROOT::Math::XYZVector getProductionVertex() const
Return production vertex position.
Definition MCParticle.h:178
ROOT::Math::XYZVector getMomentum() const
Return momentum.
Definition MCParticle.h:187
void setDescription(const std::string &description)
Sets the description of the module.
Definition Module.cc:214
Module()
Constructor.
Definition Module.cc:30
This is the Reconstruction Event-Data Model Track.
Definition RecoTrack.h:79
const std::vector< genfit::AbsTrackRep * > & getRepresentations() const
Return a list of track representations. You are not allowed to modify or delete them!
Definition RecoTrack.h:638
bool wasFitSuccessful(const genfit::AbsTrackRep *representation=nullptr) const
Returns true if the last fit with the given representation was successful.
Definition RecoTrack.cc:336
const genfit::MeasuredStateOnPlane & getMeasuredStateOnPlaneClosestTo(const ROOT::Math::XYZVector &closestPoint, const genfit::AbsTrackRep *representation=nullptr)
Return genfit's MasuredStateOnPlane, that is closest to the given point useful for extrapolation of m...
Definition RecoTrack.cc:426
Accessor to arrays stored in the data store.
Definition StoreArray.h:113
Type-safe access to single objects in the data store.
Definition StoreObjPtr.h:96
void addParam(const std::string &name, T &paramVariable, const std::string &description, const T &defaultValue)
Adds a new parameter to the module.
Definition Module.h:559
#define REG_MODULE(moduleName)
Register the given module (without 'Module' suffix) with the framework.
Definition Module.h:649
B2Vector3< double > B2Vector3D
typedef for common usage with double
Definition B2Vector3.h:516
virtual void initialize() override
Initialize the module.
virtual void event() override
Called once for each event.
virtual void terminate() override
Do the training for all sectors.
CDCTriggerNeuroDataModule()
Constructor, for setting module description and parameters.
Abstract base class for different kinds of events.
Struct to keep one set of training data for either training, validation or testing.