Belle II Software development
CDCTriggerNeuroDataModule.cc
1/**************************************************************************
2 * basf2 (Belle II Analysis Software Framework) *
3 * Author: The Belle II Collaboration *
4 * *
5 * See git log for contributors and copyright holders. *
6 * This file is licensed under LGPL-3.0, see LICENSE.md. *
7 **************************************************************************/
8
9
10#include "trg/cdc/modules/neurotrigger/CDCTriggerNeuroDataModule.h"
11
12#include <fstream>
13#include <framework/datastore/StoreArray.h>
14#include <mdst/dataobjects/MCParticle.h>
15#include <tracking/dataobjects/RecoTrack.h>
16#include <trg/cdc/dataobjects/CDCTriggerSegmentHit.h>
17#include <trg/cdc/dataobjects/CDCTriggerTrack.h>
18#include <framework/datastore/StoreObjPtr.h>
19#include <framework/dataobjects/EventMetaData.h>
20#include <framework/core/ModuleParam.templateDetails.h>
21
22#include <cdc/geometry/CDCGeometryPar.h>
23#include <framework/gearbox/Unit.h>
24#include <framework/geometry/B2Vector3.h>
25
26
27#include <iostream>
28#include <cmath>
29#include "boost/iostreams/filter/gzip.hpp"
30#include "boost/iostreams/filtering_streambuf.hpp"
31#include "boost/iostreams/filtering_stream.hpp"
32#include "boost/multi_array.hpp"
33#define BOOST_MULTI_ARRAY_NO_GENERATORS
34
35namespace Belle2 {
40 REG_MODULE(CDCTriggerNeuroData);
41
43 {
45 "This module takes 2dtracks, track segments, and targettracks (either recotracks or mcparticles) as input and generates training data for the neurotrigger in a tab separated, gzip compressed file."
46 );
47 // parameters for saving / loading
48 addParam("hitCollectionName", m_hitCollectionName,
49 "Name of the input StoreArray of CDCTriggerSegmentHits. The Axials need to have a relation to inputtracks",
50 std::string(""));
51 addParam("inputCollectionName", m_inputCollectionName,
52 "Name of the StoreArray holding the 2D input tracks.",
53 std::string("CDCTriggerNNInput2DTracks"));
54 addParam("targetCollectionName", m_targetCollectionName,
55 "Name of the MCParticle/RecoTrack collection used as target values.",
56 std::string("RecoTracks"));
57 addParam("trainOnRecoTracks", m_trainOnRecoTracks,
58 "If true, use RecoTracks as targets instead of MCParticles.",
59 true);
60 addParam("EventTimeName", m_EventTimeName,
61 "Name of the event time object.",
62 std::string("CDCTriggerNeuroETFT0"));
63 addParam("NeuroTrackInputMode", m_neuroTrackInputMode,
64 "When using real tracks, use neurotracks instead of 2dtracks as input to the neurotrigger. this is important to get the relations right.",
65 false);
66 addParam("singleUse", m_singleUse,
67 "Only use a track for a single expert", true);
68 addParam("configFileName", m_configFileName,
69 "Name of the configuration file. This File should be created by the CDCTriggerIDHistModule and will be extended in this module",
70 std::string(""));
71 addParam("writeconfigFileName", m_writeconfigFileName,
72 "Name of the configuration file, which will be written. If left blank, the same file as the input configuration file is used and it will be overwritten.",
73 std::string(""));
74 addParam("gzipFilename", m_filename,
75 "Name of the gzip file, where the training samples will be saved.",
76 std::string("out.gz"));
77
78
79
80
81 }
82 void
84 {
85 // register store objects
87 // decided wether to load mcparticles or recotracks as training targets.
88 // This has to be done because the train target values are obtained
89 // for both classes in a different way.
92 targets.isRequired(m_targetCollectionName);
93 } else {
95 targets.isRequired(m_targetCollectionName);
96 }
97 // initialize the neurotrigger object, but use the parameters given in the module
98 if (m_configFileName != "") {
101 } else {
102 B2ERROR("The Neurotrigger needs to be initialized by a configuration file! Make sure to give the configuration file as a parameter.");
103 }
104 // in this version, we first need an idhistfile in prior to collect the training data.
105 // this idhistfile is created by running another steering file before
106 // and stored in the config file loaded in the previous step.
107 m_trainSet.clear();
109 // create an empty dataset of training data for each expert network
110 for (unsigned iMLP = 0; iMLP < m_NeuroTrigger.nSectors(); ++iMLP) {
111 m_trainSet.push_back(CDCTriggerMLPData());
112 int layerId = 3;
113 for (int iSL = 0; iSL < 9; ++iSL) {
114 m_trainSet[iMLP].addCounters(cdc.nWiresInLayer(layerId));
115 layerId += (iSL > 0 ? 6 : 7);
116 }
117 }
118 // this one sets up the other root store arrays needed to collect training data
120 // consistency check of training parameters
121 if (m_NeuroTrigger.nSectors() != m_trainSet.size()) {
122 B2ERROR("Number of ID sets (" << m_trainSet.size() << ") should match " <<
123 "number of sectors (" << m_NeuroTrigger.nSectors() << ")");
124 }
125 // overwrite previous file with empty file, in case it already exists
126 std::ofstream gzipfile4(m_filename, std::ios_base::trunc | std::ios_base::binary);
127 boost::iostreams::filtering_ostream outStream;
128 outStream.push(boost::iostreams::gzip_compressor());
129 outStream.push(gzipfile4);
131 outStream << sample.headline << std::endl;
132 }
133 void
135 {
136 StoreObjPtr<EventMetaData> evtmetadata;
137 for (int itrack = 0; itrack < m_tracks.getEntries(); ++itrack) {
138 // get related MCParticle/RecoTrack for target
139 // and retrieve track parameters
140 //float phi0Target = 0;
141 //float invptTarget = 0;
142 float thetaTarget = 0;
143 float zTarget = 0;
145 RecoTrack* recoTrack =
146 m_tracks[itrack]->getRelatedTo<RecoTrack>(m_targetCollectionName);
147 if (!recoTrack) {
148 B2DEBUG(150, "Skipping CDCTriggerTrack without relation to RecoTrack.");
149 continue;
150 }
151 // a RecoTrack has multiple representations for different particle hypothesis
152 // -> just take the first one that does not give errors.
153 const std::vector<genfit::AbsTrackRep*>& reps = recoTrack->getRepresentations();
154 bool foundValidRep = false;
155 for (unsigned irep = 0; irep < reps.size() && !foundValidRep; ++irep) {
156 if (!recoTrack->wasFitSuccessful(reps[irep])) {
157 continue;
158 }
159
160 // get state (position, momentum etc.) from hit closest to IP and
161 // extrapolate to z-axis (may throw an exception -> continue to next representation)
162 try {
163 genfit::MeasuredStateOnPlane state =
164 recoTrack->getMeasuredStateOnPlaneClosestTo(ROOT::Math::XYZVector(0, 0, 0), reps[irep]);
165 reps[irep]->extrapolateToLine(state, TVector3(0, 0, -1000), TVector3(0, 0, 2000));
166 // flip tracks if necessary, such that trigger tracks and reco tracks
167 // point in the same direction
168 if (state.getMom().Dot(B2Vector3D(m_tracks[itrack]->getDirection())) < 0) {
169 state.setPosMom(state.getPos(), -state.getMom());
170 state.setChargeSign(-state.getCharge());
171 }
172 // get track parameters
173 //phi0Target = state.getMom().Phi();
174 //invptTarget = state.getCharge() / state.getMom().Pt();
175 thetaTarget = state.getMom().Theta();
176 zTarget = state.getPos().Z();
177 } catch (...) {
178 continue;
179 }
180 // break loop
181 foundValidRep = true;
182 }
183 if (!foundValidRep) {
184 B2DEBUG(150, "No valid representation found for RecoTrack, skipping.");
185 continue;
186 }
187 } else {
188 MCParticle* mcTrack =
189 m_tracks[itrack]->getRelatedTo<MCParticle>(m_targetCollectionName);
190 if (not mcTrack) {
191 B2DEBUG(150, "Skipping CDCTriggerTrack without relation to MCParticle.");
192 continue;
193 }
194 //phi0Target = mcTrack->getMomentum().Phi();
195 //invptTarget = mcTrack->getCharge() / mcTrack->getMomentum().Pt();
196 thetaTarget = mcTrack->getMomentum().Theta();
197 zTarget = mcTrack->getProductionVertex().Z();
198 }
200
201 // find all matching sectors
202 float phi0 = m_tracks[itrack]->getPhi0();
203 float invpt = m_tracks[itrack]->getKappa(1.5);
204 float theta = atan2(1., m_tracks[itrack]->getCotTheta());
205 std::vector<int> sectors = m_NeuroTrigger.selectMLPsTrain(phi0, invpt, theta);
206 if (sectors.size() == 0) continue;
207
208 // get target values
209 std::vector<float> targetRaw = {};
211 targetRaw.push_back(zTarget);
213 targetRaw.push_back(thetaTarget);
214 for (unsigned i = 0; i < sectors.size(); ++i) {
215 int isector = sectors[i];
216 std::vector<float> target = m_NeuroTrigger[isector].scaleTarget(targetRaw);
217 // skip out of range targets or rescale them
218 bool outOfRange = false;
219 for (unsigned itarget = 0; itarget < target.size(); ++itarget) {
220 if (fabs(target[itarget]) > 1.) {
221 outOfRange = true;
222 target[itarget] /= fabs(target[itarget]);
223 }
224 }
225 if (!m_neuroParameters.rescaleTarget && outOfRange) {
226 continue;
227 }
228 //
229 // read out or determine event time
231 // check hit pattern
232 unsigned long hitPattern = m_NeuroTrigger.getCompleteHitPattern(isector, *m_tracks[itrack], m_neuroTrackInputMode); // xxxxx0xxx
233 // sectorpattern holds the absolut necessary SLs for the expert
234 unsigned long sectorPattern = m_NeuroTrigger[isector].getSLpattern(); // 010100010
235 // sectorpatternmask holds the SLs, which are generally used to determine the right expert
236 unsigned long sectorPatternMask = m_NeuroTrigger[isector].getSLpatternMask(); // 010101010
237 B2DEBUG(250, "hitPattern " << hitPattern << " sectorPattern " << sectorPattern);
238 // if multiple experts should be trained with one track, the necessary SLs have to be in the hitpattern
239 if (!m_singleUse && sectorPattern > 0 && (sectorPattern & hitPattern) != sectorPattern) {
240 B2DEBUG(250, "hitPattern not matching " << (sectorPattern & hitPattern));
241 continue;
242 // if one track should only be used for one expert, the absolute necessary SLs for this expert should match exactly the set within the hitpattern of the generally used SLs for determining the expert.
243 } else if (m_singleUse && sectorPattern > 0 && (sectorPattern) != (hitPattern & sectorPatternMask)) {
244 // 010100010 != 0x0x0x0x0
245 B2DEBUG(250, "hitPattern not matching " << (sectorPatternMask & hitPattern));
246 continue;
247 }
248 // check, if enough axials are there. first, we select the axial bits from the
249 // hitpattern (341 = int('101010101',base=2)) and then check if the number of
250 // ones is equal or greater than 4.
251 if ((hitPattern & 341) != 341 && // this is an ugly workaround, because popcount is only
252 (hitPattern & 341) != 340 && // available with c++20 and newer
253 (hitPattern & 341) != 337 &&
254 (hitPattern & 341) != 325 &&
255 (hitPattern & 341) != 277 &&
256 (hitPattern & 341) != 85) {
257 B2DEBUG(250, "Not enough axial hits (<4), skipping!");
258 continue;
259 }
260 // get training data
261 std::vector<unsigned> hitIds;
263 hitIds = m_NeuroTrigger.selectHitsHWSim(isector, *m_tracks[itrack]);
264 } else {
265 hitIds = m_NeuroTrigger.selectHits(isector, *m_tracks[itrack]);
266 }
267 // add a "sample" it is a full training set including some zeroes for the neurotrigger values (raw and scaled z and theta). Those are sometimes used in the training for reference, but cannot be added here without running *some* neuotrigger instance.
268 CDCTriggerMLPData::NeuroSet<27, 2> sample(m_NeuroTrigger.getInputVector(isector, hitIds).data(), target.data(),
269 evtmetadata->getExperiment(), evtmetadata->getRun(), evtmetadata->getSubrun(), evtmetadata->getEvent(), itrack, i,
270 m_tracks.getEntries(), 0, 0, 0, 0, phi0, theta, invpt);
271 //check whether we already have enough samples
272 m_trainSet[isector].addSample(sample);
273 if ((m_trainSet)[isector].nSamples() % 1000 == 0) {
274 B2DEBUG(50, m_trainSet[isector].nSamples() << " samples for training collected for sector " << isector);
275 }
276 std::ofstream gzipfile4(m_filename, std::ios_base::app | std::ios_base::binary);
277 boost::iostreams::filtering_ostream outStream;
278 outStream.push(boost::iostreams::gzip_compressor());
279 outStream.push(gzipfile4);
280 outStream << sample << std::endl;
282 }
283
284 }
285 }
286 void
288 {
289 std::stringstream ss;
290 for (unsigned int i = 0; i < m_trainSet.size(); ++i) {
291 ss << "expert " << i << " : " << m_trainSet[i].nSamples() << ", ";
292 }
293 // lock the parameters, which were used to obtain the training data, so they cannot be altered in the next steps.
298 for (auto x : m_neuroParameters.maxHitsPerSL) {
299 x.lock();
300 }
301 if (m_writeconfigFileName == "") {
302 B2INFO("write writeconfig now: " + m_configFileName);
304 } else {
305 B2INFO("write writeconfig now: " + m_writeconfigFileName);
307 }
308
309 B2DEBUG(10, "Collected events: " << ss.str());
310 }
312}
Struct for training data of a single MLP for the neuro trigger.
std::string m_EventTimeName
name of the event time StoreObjPtr
std::vector< CDCTriggerMLPData > m_trainSet
Sets of training data for all sectors.
bool m_trainOnRecoTracks
Switch between MCParticles or RecoTracks as targets.
NeuroTriggerParameters m_neuroParameters
Parameters for the NeuroTrigger.
NeuroTrigger m_NeuroTrigger
Instance of the NeuroTrigger.
std::string m_targetCollectionName
Name of the MCParticles/RecoTracks collection used as target values.
bool m_neuroTrackInputMode
Switch to rescale out of range target values or ignore them.
std::string m_inputCollectionName
Name of the StoreArray containing the input 2D tracks.
StoreArray< CDCTriggerTrack > m_tracks
List of input tracks.
std::string m_filename
Name of gzip file where the training data are saved.
bool m_singleUse
use a track only once and not for every expert
std::string m_writeconfigFileName
Name of the configuration file used in the module to write the neuroparamters.
std::string m_hitCollectionName
Name of the StoreArray containing the input track segment hits.
std::string m_configFileName
Name of the configuration file used in the module to load the neuroparamters.
The Class for CDC Geometry Parameters.
static CDCGeometryPar & Instance(const CDCGeometry *=nullptr)
Static method to get a reference to the CDCGeometryPar instance.
A Class to store the Monte Carlo particle information.
Definition: MCParticle.h:32
ROOT::Math::XYZVector getProductionVertex() const
Return production vertex position.
Definition: MCParticle.h:189
ROOT::Math::XYZVector getMomentum() const
Return momentum.
Definition: MCParticle.h:198
Base class for Modules.
Definition: Module.h:72
void setDescription(const std::string &description)
Sets the description of the module.
Definition: Module.cc:214
void lock()
lock the variable, to make it read only
std::vector< NNTParam< unsigned short > > maxHitsPerSL
Maximum number of hits in a single super layer for all networks.
NNTParam< bool > rescaleTarget
flag to allow for target tracks lying out of the output range to be rescaled during training.
NNTParam< bool > targetTheta
train theta as output
NNTParam< unsigned > ETOption
Determine, how the event time should be obtained.
NNTParam< bool > targetZ
train z as output
const std::string et_option() const
return the string variant ov the et option
void loadconfigtxt(const std::string &filename)
load the configuration from a file
void saveconfigtxt(const std::string &filename)
save the configuration to a file
void initialize(const Parameters &p)
Set parameters and get some network independent parameters.
void updateTrack(const CDCTriggerTrack &track)
Calculate 2D phi position and arclength for the given track and store them.
std::vector< unsigned > selectHits(unsigned isector, const CDCTriggerTrack &track, bool returnAllRelevant=false)
Select best hits for each super layer.
void initializeCollections(std::string hitCollectionName, std::string eventTimeName, const std::string &et_option)
set the hit collection and event time to required and store the hit collection name
std::vector< float > getInputVector(unsigned isector, const std::vector< unsigned > &hitIds)
Calculate input values for MLP.
std::vector< unsigned > selectHitsHWSim(unsigned isector, const CDCTriggerTrack &track)
Select hits for each super layer from the ones related to input track.
std::vector< int > selectMLPsTrain(float phi0, float invpt, float theta)
Select all matching expert MLPs based on the given track parameters.
unsigned long getCompleteHitPattern(unsigned isector, const CDCTriggerTrack &track, const bool neurotrackinputmode)
Get complete hit pattern of neurotrack.
void getEventTime(unsigned isector, const CDCTriggerTrack &track, std::string et_option, const bool)
Read out the event time and store it.
unsigned nSectors() const
return number of neural networks
Definition: NeuroTrigger.h:164
This is the Reconstruction Event-Data Model Track.
Definition: RecoTrack.h:79
const std::vector< genfit::AbsTrackRep * > & getRepresentations() const
Return a list of track representations. You are not allowed to modify or delete them!
Definition: RecoTrack.h:638
bool wasFitSuccessful(const genfit::AbsTrackRep *representation=nullptr) const
Returns true if the last fit with the given representation was successful.
Definition: RecoTrack.cc:336
const genfit::MeasuredStateOnPlane & getMeasuredStateOnPlaneClosestTo(const ROOT::Math::XYZVector &closestPoint, const genfit::AbsTrackRep *representation=nullptr)
Return genfit's MasuredStateOnPlane, that is closest to the given point useful for extrapolation of m...
Definition: RecoTrack.cc:426
bool isRequired(const std::string &name="")
Ensure this array/object has been registered previously.
Accessor to arrays stored in the data store.
Definition: StoreArray.h:113
int getEntries() const
Get the number of objects in the array.
Definition: StoreArray.h:216
Type-safe access to single objects in the data store.
Definition: StoreObjPtr.h:96
void addParam(const std::string &name, T &paramVariable, const std::string &description, const T &defaultValue)
Adds a new parameter to the module.
Definition: Module.h:560
#define REG_MODULE(moduleName)
Register the given module (without 'Module' suffix) with the framework.
Definition: Module.h:650
virtual void initialize() override
Initialize the module.
virtual void event() override
Called once for each event.
virtual void terminate() override
Do the training for all sectors.
CDCTriggerNeuroDataModule()
Constructor, for setting module description and parameters.
Abstract base class for different kinds of events.
Struct to keep one set of training data for either training, validation or testing.