Belle II Software development
neurotrainer.py
1#!/user/bin/env python
2
3
10
11import basf2
12from ROOT import Belle2
13import os
14import glob
15
16"""
17Example script showing how to train neural networks
18to be used with the CDCTriggerNeuroModule.
19
20This script uses realistic values for the amount of training data
21and the number of runs, so it will run a long time.
22"""
23
24# ------------ #
25# user options #
26# ------------ #
27
28# set random seed
29basf2.set_random_seed(1)
30
31# paths for the trained networks, the training data and the log files
32mlpdir = Belle2.FileSystem.findFile('trg/cdc/data')
33traindir = Belle2.FileSystem.findFile('trg/cdc/data')
34logdir = Belle2.FileSystem.findFile('trg/cdc/data')
35# filenames for the trained networks, the training data and the log files
36mlpname = 'NeuroTrigger.root'
37trainname = 'NeuroTriggerTraindata.root'
38logname = 'NeuroTriggerLog' # file extensions are appended automatically
39
40# number of threads to be used for parallel training
41nthreads = 1
42
43# We want to train on single tracks within the acceptance of the track finder.
44particlegun_params = {
45 'pdgCodes': [-13, 13], # muons
46 'nTracks': 1, # single tracks
47 'momentumGeneration': 'inversePt', # uniform in the track curvature
48 'momentumParams': [0.3, 10.], # 0.3: minimum pt for standard track finder
49 'thetaGeneration': 'uniformCos', # uniform in solid angle
50 'thetaParams': [23, 144], # hit SL 6 from z in [-50, 50]
51 'phiGeneration': 'uniform', # uniform in solid angle
52 'phiParams': [0, 360], # full phi
53 'vertexGeneration': 'uniform', # uniform vertex distribution
54 'xVertexParams': [0, 0.0], # vertex on z-axis
55 'yVertexParams': [0, 0.0], # vertex on z-axis
56 'zVertexParams': [-50.0, 50.0]} # target range for training
57
58background = True
59bkgdir = '/sw/belle2/bkg.mixing/'
60
61
62# ------------------------- #
63# create path up to trigger #
64# ------------------------- #
65
66main = basf2.create_path()
67
68# The CDCTriggerNeuroTrainer module stops the event loop when there is enough data,
69# so just put a very high number of events here.
70main.add_module('EventInfoSetter', evtNumList=1000000000)
71main.add_module('Progress')
72main.add_module('Gearbox')
73main.add_module('Geometry', components=['BeamPipe', 'Cryostat',
74 'PXD', 'SVD', 'CDC',
75 'MagneticFieldConstant4LimitedRCDC'])
76particlegun = basf2.register_module('ParticleGun')
77particlegun.param(particlegun_params)
78main.add_module(particlegun)
79main.add_module('FullSim')
80if background:
81 main.add_module('BeamBkgMixer',
82 backgroundFiles=glob.glob(os.path.join(bkgdir, '*usual*.root')),
83 components=['CDC'])
84main.add_module('CDCDigitizer')
85
86
87# -------------------------------------- #
88# add trigger modules up to neurotrigger #
89# -------------------------------------- #
90
91main.add_module('CDCTriggerTSF',
92 InnerTSLUTFile=Belle2.FileSystem.findFile("data/trg/cdc/innerLUT_Bkg_p0.70_b0.80.coe"),
93 OuterTSLUTFile=Belle2.FileSystem.findFile("data/trg/cdc/outerLUT_Bkg_p0.70_b0.80.coe"))
94main.add_module('CDCTrigger2DFinder')
95# For single tracks the event time estimate is not very reliable,
96# so use the true event time here and hope for the best...
97main.add_module('CDCTriggerETF', trueEventTime=True)
98
99
100# ---------------- #
101# add the training #
102# ---------------- #
103
104# To get target values for the training, we need relations between 2D track and MCParticles.
105# We only want matched tracks (no clones) with many true hits.
106main.add_module('CDCTriggerMCMatcher', minAxial=4, axialOnly=True,
107 relateClonesAndMerged=False,
108 TrgTrackCollectionName='TRGCDC2DFinderTracks')
109
110main.add_module('CDCTriggerNeuroTrainer',
111 # input and target arrays
112 inputCollectionName='TRGCDC2DFinderTracks',
113 targetCollectionName='MCParticles',
114 trainOnRecoTracks=False, # train with MCParticles as targets
115 # output files
116 filename=os.path.join(mlpdir, mlpname),
117 trainFilename=os.path.join(traindir, trainname),
118 logFilename=os.path.join(logdir, logname),
119 # sector definition
120 nMLP=5, # total number of sectors
121 # hit pattern sectorization (5 sectors for different missing stereo hits)
122 SLpatternMask=[int('010101010', 2)], # ignore axial hits in sector selection
123 SLpattern=[int('111111111', 2), # full hits
124 int('101111111', 2), # SL 7 missing
125 int('111011111', 2), # SL 5 missing
126 int('111110111', 2), # SL 3 missing
127 int('111111101', 2)], # SL 1 missing
128 # phase space sectorization (trivial here, only 1 sector)
129 invptRange=[[-5., 5.]], # sectorization in charge/pt
130 phiRange=[[0., 360.]], # sectorization in phi
131 thetaRange=[[0., 180.]], # sectorization in theta (requires 3D input tracks)
132 selectSectorByMC=False, # use 2D track parameters to select sector
133 invptRangeTrain=[[-5., 5.]], # sector ranges during train may overlap
134 phiRangeTrain=[[0., 360.]], # i.e. they can be larger than the final sectors
135 thetaRangeTrain=[[0., 180.]], # (will be shrunk after training)
136 # network structure
137 multiplyHidden=False, # set the number of hidden nodes directly
138 nHidden=[[81]], # 1 hidden layer with 81 nodes for all sectors
139 wMax=63., # limit weights to [-63, 63]
140 # target definition
141 targetZ=True, # output z-vertex
142 targetTheta=True, # output also polar angle
143 outputScale=[[-50., 50., 0., 180.]], # output 1 (z) scaled to [-50, 50]cm,
144 # output 2 (theta) scaled to [0, 180]deg
145 rescaleTarget=False, # targets outside of output range are skipped
146 # relevant ID ranges: region around 2D track from which hits are taken
147 # (determined from a histogram that is generated from hits related to MCParticles)
148 nTrainPrepare=1000, # number of tracks used to prepare the ID histogram
149 relevantCut=0.02, # cut on the ID histogram
150 cutSum=False, # cut directly on the ID histogram bins
151 # training parameters
152 multiplyNTrain=True, # set training data relative to degrees of freedom
153 nTrainMax=10., # training data (10x degrees of freedom)
154 nTrainMin=10., # don't train if there is less than 10x DoF training data
155 nValid=1000, # number of validation samples (to avoid overtraining)
156 nTest=5000, # number of test samples (to select best of several runs)
157 repeatTrain=10, # train each sector 10x with different initial weights
158 checkInterval=500, # stop training if validation error does not improve for 500 epochs
159 maxEpochs=10000, # stop training after 10000 epochs
160 nThreads=nthreads, # number of parallel threads
161 stopLoop=True, # stop event loop when there is enough training data
162 # log level
163 logLevel=basf2.LogLevel.DEBUG, # show some debug output
164 debugLevel=50)
165# show only the message of the debug output
166basf2.logging.set_info(basf2.LogLevel.DEBUG, basf2.LogInfo.LEVEL | basf2.LogInfo.MESSAGE)
167
168
169# Process events
170basf2.process(main)
171
172# Print call statistics
173print(basf2.statistics)
static std::string findFile(const std::string &path, bool silent=false)
Search for given file or directory in local or central release directory, and return absolute path if...
Definition: FileSystem.cc:151