Belle II Software development
core.py
1#!/usr/bin/env python3
2
3
10
11# @cond SUPPRESS_DOXYGEN
12
13"""
14 The Full Event Interpretation Algorithm
15
16 Some basic facts:
17 - The algorithm will automatically reconstruct B mesons and calculate a signal probability for each candidate.
18 - It can be used for hadronic and semileptonic tagging.
19 - The algorithm has to be trained on MC, and can afterwards be applied on data.
20 - The training requires O(100) million MC events
21 - The weight files are stored in the Belle II Condition database
22
23 Read this file if you want to understand the technical details of the FEI.
24
25 The FEI follows a hierarchical approach.
26 There are 7 stages:
27 (Stage -1: Write out information about the provided data sample)
28 Stage 0: Final State Particles (FSP)
29 Stage 1: pi0, J/Psi, Lambda0
30 Stage 2: K_S0, Sigma+
31 Stage 3: D and Lambda_c mesons
32 Stage 4: D* mesons
33 Stage 5: B mesons
34 Stage 6: Finish
35
36 Most stages consists of:
37 - Create Particle Candidates
38 - Apply Cuts
39 - Do vertex Fitting
40 - Apply a multivariate classification method
41 - Apply more Cuts
42
43 The FEI will reconstruct these 7 stages during the training phase,
44 since the stages depend on one another, you have to run basf2 multiple (7) times on the same data
45 to train all the necessary multivariate classifiers.
46"""
47
48# Import basf2
49import basf2
50from basf2 import B2INFO, B2WARNING, B2ERROR
51import pybasf2
52import modularAnalysis as ma
53import b2bii
54
55# Should come after basf2 import
56import pdg
57from fei import config
58import basf2_mva
59
60# Standard python modules
61import collections
62import os
63import shutil
64import typing
65import pickle
66import re
67import functools
68import subprocess
69import multiprocessing
70
71# Simple object containing the output of fei
72FeiState = collections.namedtuple('FeiState', 'path, stage, plists, fsplists, excludelists')
73
74
75class TrainingDataInformation:
76 """
77 Contains the relevant information about the used training data.
78 Basically we write out the number of MC particles in the whole dataset.
79 This numbers we can use to calculate what fraction of candidates we have to write
80 out as TrainingData to get a reasonable amount of candidates to train on
81 (too few candidates will lead to heavy overtraining, too many won't fit into memory).
82 Secondly we can use this information for the generation of the monitoring pdfs,
83 where we calculate reconstruction efficiencies.
84 """
85
86 def __init__(self, particles: typing.Sequence[config.Particle], outputPath: str = ''):
87 """
88 Create a new TrainingData object
89 @param particles list of config.Particle objects
90 @param outputPath path to the output directory
91 """
92
93 self.particles = particles
94
95 self.filename = os.path.join(outputPath, 'mcParticlesCount.root')
96
97 def available(self) -> bool:
98 """
99 Check if the relevant information is already available
100 """
101 return os.path.isfile(self.filename)
102
103 def reconstruct(self) -> pybasf2.Path:
104 """
105 Returns pybasf2.Path which counts the number of MCParticles in each event.
106 @param particles list of config.Particle objects
107 """
108 # Unique absolute pdg-codes of all particles
109 pdgs = {abs(pdg.from_name(particle.name)) for particle in self.particles}
110
111 path = basf2.create_path()
112 module = basf2.register_module('VariablesToHistogram')
113 module.set_name("VariablesToHistogram_MCCount")
114 module.param('variables', [(f'NumberOfMCParticlesInEvent({pdg})', 100, -0.5, 99.5) for pdg in pdgs])
115 module.param('fileName', self.filename)
116 module.param('ignoreCommandLineOverride', True)
117 path.add_module(module)
118 return path
119
120 def get_mc_counts(self):
121 """
122 Read out the number of MC particles from the file created by reconstruct
123 """
124 # Unique absolute pdg-codes of all particles
125 # Always avoid the top-level 'import ROOT'.
126 import ROOT # noqa
127 root_file = ROOT.TFile.Open(self.filename, 'read')
128 mc_counts = {}
129
130 for key in root_file.GetListOfKeys():
131 variable = ROOT.Belle2.MakeROOTCompatible.invertMakeROOTCompatible(key.GetName())
132 pdg = abs(int(variable[len('NumberOfMCParticlesInEvent('):-len(")")]))
133 hist = key.ReadObj()
134 mc_counts[pdg] = {}
135 mc_counts[pdg]['sum'] = sum(hist.GetXaxis().GetBinCenter(bin + 1) * hist.GetBinContent(bin + 1)
136 for bin in range(hist.GetNbinsX()))
137 mc_counts[pdg]['std'] = hist.GetStdDev()
138 mc_counts[pdg]['avg'] = hist.GetMean()
139 mc_counts[pdg]['max'] = hist.GetXaxis().GetBinCenter(hist.FindLastBinAbove(0.0))
140 mc_counts[pdg]['min'] = hist.GetXaxis().GetBinCenter(hist.FindFirstBinAbove(0.0))
141
142 mc_counts[0] = {}
143 mc_counts[0]['sum'] = hist.GetEntries() # this is the total number of ALL events, does not matter which hist we take
144 root_file.Close()
145 return mc_counts
146
147
148class FSPLoader:
149 """
150 Steers the loading of FSP particles.
151 This does NOT include RootInput, Geometry or anything required before loading FSPs,
152 the user has to add this himself (because it depends on the MC campaign and if you want
153 to use Belle or Belle II).
154 """
155
156 def __init__(self, particles: typing.Sequence[config.Particle], config: config.FeiConfiguration):
157 """
158 Create a new FSPLoader object
159 @param particles list of config.Particle objects
160 @param config config.FeiConfiguration object
161 """
162
163 self.particles = particles
164
165 self.config = config
166
167 def get_fsp_lists(self) -> typing.List[str]:
168 """
169 Returns a list of FSP particle lists which are used in the FEI.
170 This is used to create the RootOutput module.
171 """
172 fsps = ['K+:FSP', 'pi+:FSP', 'e+:FSP', 'mu+:FSP', 'p+:FSP', 'gamma:FSP', 'K_S0:V0', 'Lambda0:V0', 'K_L0:FSP', 'gamma:V0']
173 if b2bii.isB2BII():
174 fsps += ['pi0:FSP']
175 return fsps
176
177 def reconstruct(self) -> pybasf2.Path:
178 """
179 Returns pybasf2.Path which loads the FSP Particles
180 """
181 path = basf2.create_path()
182
183 if b2bii.isB2BII():
184 ma.fillParticleLists([('K+:FSP', ''), ('pi+:FSP', ''), ('e+:FSP', ''),
185 ('mu+:FSP', ''), ('p+:FSP', '')], writeOut=True, path=path)
186 for outputList, inputList in [('gamma:FSP', 'gamma:mdst'), ('K_S0:V0', 'K_S0:mdst'),
187 ('Lambda0:V0', 'Lambda0:mdst'), ('K_L0:FSP', 'K_L0:mdst'),
188 ('pi0:FSP', 'pi0:mdst'), ('gamma:V0', 'gamma:v0mdst')]:
189 ma.copyParticles(outputList, inputList, writeOut=True, path=path)
190 else:
191 ma.fillParticleLists([('K+:FSP', ''), ('pi+:FSP', ''), ('e+:FSP', ''),
192 ('mu+:FSP', ''), ('gamma:FSP', ''),
193 ('p+:FSP', ''), ('K_L0:FSP', '')], writeOut=True, path=path)
194 ma.fillParticleList('K_S0:V0 -> pi+ pi-', '', writeOut=True, path=path)
195 ma.fillParticleList('Lambda0:V0 -> p+ pi-', '', writeOut=True, path=path)
196 ma.fillConvertedPhotonsList('gamma:V0 -> e+ e-', '', writeOut=True, path=path)
197
198 if self.config.monitor:
199 names = ['e+', 'K+', 'pi+', 'mu+', 'gamma', 'K_S0', 'p+', 'K_L0', 'Lambda0', 'pi0']
200 filename = os.path.join(self.config.monitoring_path, 'Monitor_FSPLoader.root')
201 pdgs = {abs(pdg.from_name(name)) for name in names}
202 variables = [(f'NumberOfMCParticlesInEvent({pdg})', 100, -0.5, 99.5) for pdg in pdgs]
203 ma.variablesToHistogram('', variables=variables, filename=filename, ignoreCommandLineOverride=True, path=path)
204 return path
205
206
207class TrainingData:
208 """
209 Steers the creation of the training data.
210 The training data is used to train a multivariate classifier for each channel.
211 The training of the FEI at its core is just generating this training data for each channel.
212 After we created the training data for a stage, we have to train the classifiers (see Teacher class further down).
213 """
214
215 def __init__(self, particles: typing.Sequence[config.Particle], config: config.FeiConfiguration,
216 mc_counts: typing.Mapping[int, typing.Mapping[str, float]]):
217 """
218 Create a new TrainingData object
219 @param particles list of config.Particle objects
220 @param config config.FeiConfiguration object
221 @param mc_counts containing number of MC Particles
222 """
223
224 self.particles = particles
225
226 self.config = config
227
228 self.mc_counts = mc_counts
229
230 def reconstruct(self) -> pybasf2.Path:
231 """
232 Returns pybasf2.Path which creates the training data for the given particles
233 """
234 import ROOT # noqa
235 path = basf2.create_path()
236
237 for particle in self.particles:
238 pdgcode = abs(pdg.from_name(particle.name))
239 nSignal = self.mc_counts[pdgcode]['sum']
240 print(f"FEI-core: TrainingData: nSignal for {particle.name}: {nSignal}")
241
242 # For D-Mesons we usually have a efficiency of 10^-3 including branching fraction
243 if pdgcode > 400:
244 nSignal /= 1000
245 # For B-Mesons we usually have a efficiency of 10^-4 including branching fraction
246 if pdgcode > 500:
247 nSignal /= 10000
248
249 for channel in particle.channels:
250 weightfile = f'{channel.label}.xml'
251 if basf2_mva.available(weightfile):
252 B2INFO(f"FEI-core: Skipping preparing Training Data for {weightfile}, already available")
253 continue
254 filename = 'training_input.root'
255
256 # nBackground = nEvents * nBestCandidates
257 nBackground = self.mc_counts[0]['sum'] * channel.preCutConfig.bestCandidateCut
258 inverseSamplingRates = {}
259 # For some very pure channels (Jpsi), this sampling can be too aggressive and training fails.
260 # It can therefore be disabled in the preCutConfig.
261 if nBackground > Teacher.MaximumNumberOfMVASamples and not channel.preCutConfig.noBackgroundSampling:
262 inverseSamplingRates[0] = max(
263 1, int((int(nBackground / Teacher.MaximumNumberOfMVASamples) + 1) * channel.preCutConfig.bkgSamplingFactor))
264 elif channel.preCutConfig.bkgSamplingFactor > 1:
265 inverseSamplingRates[0] = int(channel.preCutConfig.bkgSamplingFactor)
266
267 if nSignal > Teacher.MaximumNumberOfMVASamples and not channel.preCutConfig.noSignalSampling:
268 inverseSamplingRates[1] = int(nSignal / Teacher.MaximumNumberOfMVASamples) + 1
269
270 spectators = [channel.mvaConfig.target] + list(channel.mvaConfig.spectators.keys())
271 if channel.mvaConfig.sPlotVariable is not None:
272 spectators.append(channel.mvaConfig.sPlotVariable)
273
274 if self.config.monitor:
275 hist_variables = ['mcErrors', 'mcParticleStatus'] + channel.mvaConfig.variables + spectators
276 hist_variables_2d = [(x, channel.mvaConfig.target)
277 for x in channel.mvaConfig.variables + spectators if x is not channel.mvaConfig.target]
278 hist_filename = os.path.join(self.config.monitoring_path, 'Monitor_TrainingData.root')
279 ma.variablesToHistogram(channel.name, variables=config.variables2binnings(hist_variables),
280 variables_2d=config.variables2binnings_2d(hist_variables_2d),
281 filename=hist_filename,
282 ignoreCommandLineOverride=True,
283 directory=config.removeJPsiSlash(f'{channel.label}'), path=path)
284
285 teacher = basf2.register_module('VariablesToNtuple')
286 teacher.set_name(f'VariablesToNtuple_{channel.name}')
287 teacher.param('fileName', filename)
288 teacher.param('treeName', ROOT.Belle2.MakeROOTCompatible.makeROOTCompatible(f'{channel.label} variables'))
289 teacher.param('variables', channel.mvaConfig.variables + spectators)
290 teacher.param('particleList', channel.name)
291 teacher.param('sampling', (channel.mvaConfig.target, inverseSamplingRates))
292 teacher.param('ignoreCommandLineOverride', True)
293 path.add_module(teacher)
294 return path
295
296
297class PreReconstruction:
298 """
299 Steers the reconstruction phase before the mva method was applied
300 It Includes:
301 - The ParticleCombination (for each particle and channel we create candidates using
302 the daughter candidates from the previous stages)
303 - MC Matching
304 - Vertex Fitting (this is the slowest part of the whole FEI, KFit is used by default,
305 but you can use fastFit as a drop-in replacement https://github.com/thomaskeck/FastFit/,
306 this will speed up the whole FEI by a factor 2-3)
307 """
308
309 def __init__(self, particles: typing.Sequence[config.Particle], config: config.FeiConfiguration):
310 """
311 Create a new PreReconstruction object
312 @param particles list of config.Particle objects
313 @param config config.FeiConfiguration object
314 """
315
316 self.particles = particles
317
318 self.config = config
319
320 def reconstruct(self) -> pybasf2.Path:
321 """
322 Returns pybasf2.Path which reconstructs the particles and does the vertex fitting if necessary
323 """
324 path = basf2.create_path()
325
326 for particle in self.particles:
327 for channel in particle.channels:
328
329 if (len(channel.daughters) == 1) and (pdg.from_name(
330 channel.daughters[0].split(':')[0]) == pdg.from_name(particle.name)):
331 ma.cutAndCopyList(channel.name, channel.daughters[0], channel.preCutConfig.userCut, writeOut=True, path=path)
332 v2EI = basf2.register_module('VariablesToExtraInfo')
333 v2EI.set_name(f'VariablesToExtraInfo_{channel.name}')
334 v2EI.param('particleList', channel.name)
335 v2EI.param('variables', {f'constant({channel.decayModeID})': 'decayModeID'})
336 # suppress warning that decay mode ID won't be overwritten if it already exists
337 v2EI.set_log_level(basf2.logging.log_level.ERROR)
338 path.add_module(v2EI)
339 else:
340 ma.reconstructDecay(channel.decayString, channel.preCutConfig.userCut, channel.decayModeID,
341 writeOut=True, path=path)
342 if self.config.monitor:
343 ma.matchMCTruth(channel.name, path=path)
344 bc_variable = channel.preCutConfig.bestCandidateVariable
345 if self.config.monitor == 'simple':
346 hist_variables = [channel.mvaConfig.target, 'extraInfo(decayModeID)']
347 hist_variables_2d = [(channel.mvaConfig.target, 'extraInfo(decayModeID)')]
348 else:
349 hist_variables = [bc_variable, 'mcErrors', 'mcParticleStatus',
350 channel.mvaConfig.target] + list(channel.mvaConfig.spectators.keys())
351 hist_variables_2d = [(bc_variable, channel.mvaConfig.target),
352 (bc_variable, 'mcErrors'),
353 (bc_variable, 'mcParticleStatus')]
354 for specVar in channel.mvaConfig.spectators:
355 hist_variables_2d.append((bc_variable, specVar))
356 hist_variables_2d.append((channel.mvaConfig.target, specVar))
357 filename = os.path.join(self.config.monitoring_path, 'Monitor_PreReconstruction_BeforeRanking.root')
358 ma.variablesToHistogram(
359 channel.name,
360 variables=config.variables2binnings(hist_variables),
361 variables_2d=config.variables2binnings_2d(hist_variables_2d),
362 filename=filename,
363 ignoreCommandLineOverride=True,
364 directory=f'{channel.label}',
365 path=path)
366
367 if channel.preCutConfig.bestCandidateMode == 'lowest':
368 ma.rankByLowest(channel.name,
369 channel.preCutConfig.bestCandidateVariable,
370 channel.preCutConfig.bestCandidateCut,
371 'preCut_rank',
372 path=path)
373 elif channel.preCutConfig.bestCandidateMode == 'highest':
374 ma.rankByHighest(channel.name,
375 channel.preCutConfig.bestCandidateVariable,
376 channel.preCutConfig.bestCandidateCut,
377 'preCut_rank',
378 path=path)
379 else:
380 raise RuntimeError(f'Unknown bestCandidateMode {repr(channel.preCutConfig.bestCandidateMode)}')
381
382 if 'gamma' in channel.decayString and channel.pi0veto:
383 ma.buildRestOfEvent(channel.name, path=path)
384 Ddaughter_roe_path = basf2.Path()
385 deadEndPath = basf2.Path()
386 ma.signalSideParticleFilter(channel.name, '', Ddaughter_roe_path, deadEndPath)
387 ma.fillParticleList('gamma:roe', 'isInRestOfEvent == 1', path=Ddaughter_roe_path)
388
389 matches = list(re.finditer('gamma', channel.decayString))
390 pi0lists = []
391 for igamma in range(len(matches)):
392 start, end = matches[igamma-1].span()
393 tempString = f'{channel.decayString[:start]}^gamma{channel.decayString[end:]}'
394 ma.fillSignalSideParticleList(f'gamma:sig_{igamma}', tempString, path=Ddaughter_roe_path)
395 ma.reconstructDecay(f'pi0:veto_{igamma} -> gamma:sig_{igamma} gamma:roe', '', path=Ddaughter_roe_path)
396 pi0lists.append(f'pi0:veto_{igamma}')
397 ma.copyLists('pi0:veto', pi0lists, writeOut=False, path=Ddaughter_roe_path)
398 ma.rankByLowest('pi0:veto', 'abs(dM)', 1, path=Ddaughter_roe_path)
399 ma.matchMCTruth('pi0:veto', path=Ddaughter_roe_path)
400 ma.variableToSignalSideExtraInfo(
401 'pi0:veto',
402 {
403 'InvM': 'pi0vetoMass',
404 'formula((daughter(0,E)-daughter(1,E))/(daughter(0,E)+daughter(1,E)))': 'pi0vetoEnergyAsymmetry',
405 },
406 path=Ddaughter_roe_path
407 )
408 path.for_each('RestOfEvent', 'RestOfEvents', Ddaughter_roe_path)
409
410 if self.config.monitor:
411 filename = os.path.join(self.config.monitoring_path, 'Monitor_PreReconstruction_AfterRanking.root')
412 if self.config.monitor != 'simple':
413 hist_variables += ['extraInfo(preCut_rank)']
414 hist_variables_2d += [('extraInfo(preCut_rank)', channel.mvaConfig.target),
415 ('extraInfo(preCut_rank)', 'mcErrors'),
416 ('extraInfo(preCut_rank)', 'mcParticleStatus')]
417 for specVar in channel.mvaConfig.spectators:
418 hist_variables_2d.append(('extraInfo(preCut_rank)', specVar))
419 ma.variablesToHistogram(
420 channel.name,
421 variables=config.variables2binnings(hist_variables),
422 variables_2d=config.variables2binnings_2d(hist_variables_2d),
423 filename=filename,
424 ignoreCommandLineOverride=True,
425 directory=f'{channel.label}',
426 path=path)
427 # If we are not in monitor mode we do the mc matching now,
428 # otherwise we did it above already!
429 elif self.config.training:
430 ma.matchMCTruth(channel.name, path=path)
431
432 if b2bii.isB2BII() and particle.name in ['K_S0', 'Lambda0']:
433 pvfit = basf2.register_module('ParticleVertexFitter')
434 pvfit.set_name(f'ParticleVertexFitter_{channel.name}')
435 pvfit.param('listName', channel.name)
436 pvfit.param('confidenceLevel', channel.preCutConfig.vertexCut)
437 pvfit.param('vertexFitter', 'KFit')
438 pvfit.param('fitType', 'vertex')
439 pvfit.set_log_level(basf2.logging.log_level.ERROR) # let's not produce gigabytes of uninteresting warnings
440 path.add_module(pvfit)
441 elif re.findall(r"[\w']+", channel.decayString).count('pi0') > 1 and particle.name != 'pi0':
442 basf2.B2INFO(f"Ignoring vertex fit for {channel.name} because multiple pi0 are not supported yet.")
443 elif len(channel.daughters) > 1:
444 pvfit = basf2.register_module('ParticleVertexFitter')
445 pvfit.set_name(f'ParticleVertexFitter_{channel.name}')
446 pvfit.param('listName', channel.name)
447 pvfit.param('confidenceLevel', channel.preCutConfig.vertexCut)
448 pvfit.param('vertexFitter', 'KFit')
449 if particle.name in ['pi0']:
450 pvfit.param('fitType', 'mass')
451 else:
452 pvfit.param('fitType', 'vertex')
453 pvfit.set_log_level(basf2.logging.log_level.ERROR) # let's not produce gigabytes of uninteresting warnings
454 path.add_module(pvfit)
455
456 if self.config.monitor:
457 if self.config.monitor == 'simple':
458 hist_variables = [channel.mvaConfig.target, 'extraInfo(decayModeID)']
459 hist_variables_2d = [(channel.mvaConfig.target, 'extraInfo(decayModeID)')]
460 else:
461 hist_variables = ['chiProb', 'mcErrors', 'mcParticleStatus',
462 channel.mvaConfig.target] + list(channel.mvaConfig.spectators.keys())
463 hist_variables_2d = [('chiProb', channel.mvaConfig.target),
464 ('chiProb', 'mcErrors'),
465 ('chiProb', 'mcParticleStatus')]
466 for specVar in channel.mvaConfig.spectators:
467 hist_variables_2d.append(('chiProb', specVar))
468 hist_variables_2d.append((channel.mvaConfig.target, specVar))
469 filename = os.path.join(self.config.monitoring_path, 'Monitor_PreReconstruction_AfterVertex.root')
470 ma.variablesToHistogram(
471 channel.name,
472 variables=config.variables2binnings(hist_variables),
473 variables_2d=config.variables2binnings_2d(hist_variables_2d),
474 filename=filename,
475 ignoreCommandLineOverride=True,
476 directory=f'{channel.label}',
477 path=path)
478
479 return path
480
481
482class PostReconstruction:
483 """
484 Steers the reconstruction phase after the mva method was applied
485 It Includes:
486 - The application of the mva method itself.
487 - Copying all channel lists in a common one for each particle defined in particles
488 - Tag unique signal candidates, to avoid double counting of channels with overlap
489 """
490
491 def __init__(self, particles: typing.Sequence[config.Particle], config: config.FeiConfiguration):
492 """
493 Create a new PostReconstruction object
494 @param particles list of config.Particle objects
495 @param config config.FeiConfiguration object
496 """
497
498 self.particles = particles
499
500 self.config = config
501
502 def get_missing_channels(self) -> typing.Sequence[str]:
503 """
504 Returns all channels for which the weightfile is missing
505 """
506 missing = []
507 for particle in self.particles:
508 for channel in particle.channels:
509 # weightfile = self.config.prefix + '_' + channel.label
510 weightfile = f'{channel.label}.xml'
511 if not basf2_mva.available(weightfile):
512 missing += [channel.label]
513 return missing
514
515 def available(self) -> bool:
516 """
517 Check if the relevant information is already available
518 """
519 return len(self.get_missing_channels()) == 0
520
521 def reconstruct(self) -> pybasf2.Path:
522 """
523 Returns pybasf2.Path which reconstructs the particles and does the vertex fitting if necessary
524 """
525 import ROOT # noqa
526 path = basf2.create_path()
527
528 for particle in self.particles:
529 for channel in particle.channels:
530 expert = basf2.register_module('MVAExpert')
531 expert.set_name(f'MVAExpert_{channel.name}')
532 if self.config.training:
533 expert.param('identifier', f'{channel.label}.xml')
534 else:
535 expert.param('identifier', f'{self.config.prefix}_{channel.label}')
536 expert.param('extraInfoName', 'SignalProbability')
537 expert.param('listNames', [channel.name])
538 # suppress warning that signal probability won't be overwritten if it already exists
539 expert.set_log_level(basf2.logging.log_level.ERROR)
540 path.add_module(expert)
541
542 if self.config.monitor:
543 if self.config.monitor == 'simple':
544 hist_variables = [channel.mvaConfig.target, 'extraInfo(decayModeID)']
545 hist_variables_2d = [(channel.mvaConfig.target, 'extraInfo(decayModeID)')]
546 else:
547 hist_variables = ['mcErrors',
548 'mcParticleStatus',
549 'extraInfo(SignalProbability)',
550 channel.mvaConfig.target,
551 'extraInfo(decayModeID)'] + list(channel.mvaConfig.spectators.keys())
552 hist_variables_2d = [('extraInfo(SignalProbability)', channel.mvaConfig.target),
553 ('extraInfo(SignalProbability)', 'mcErrors'),
554 ('extraInfo(SignalProbability)', 'mcParticleStatus'),
555 ('extraInfo(decayModeID)', channel.mvaConfig.target),
556 ('extraInfo(decayModeID)', 'mcErrors'),
557 ('extraInfo(decayModeID)', 'mcParticleStatus')]
558 for specVar in channel.mvaConfig.spectators:
559 hist_variables_2d.append(('extraInfo(SignalProbability)', specVar))
560 hist_variables_2d.append(('extraInfo(decayModeID)', specVar))
561 hist_variables_2d.append((channel.mvaConfig.target, specVar))
562 filename = os.path.join(self.config.monitoring_path, 'Monitor_PostReconstruction_AfterMVA.root')
563 ma.variablesToHistogram(
564 channel.name,
565 variables=config.variables2binnings(hist_variables),
566 variables_2d=config.variables2binnings_2d(hist_variables_2d),
567 filename=filename,
568 ignoreCommandLineOverride=True,
569 directory=f'{channel.label}',
570 path=path)
571
572 cutstring = ''
573 if particle.postCutConfig.value > 0.0:
574 cutstring = f'{particle.postCutConfig.value} < extraInfo(SignalProbability)'
575
576 ma.mergeListsWithBestDuplicate(particle.identifier, [c.name for c in particle.channels],
577 variable='particleSource', writeOut=True, path=path)
578
579 if self.config.monitor:
580 if self.config.monitor == 'simple':
581 hist_variables = [particle.mvaConfig.target, 'extraInfo(decayModeID)']
582 hist_variables_2d = [(particle.mvaConfig.target, 'extraInfo(decayModeID)')]
583 else:
584 hist_variables = ['mcErrors',
585 'mcParticleStatus',
586 'extraInfo(SignalProbability)',
587 particle.mvaConfig.target,
588 'extraInfo(decayModeID)'] + list(particle.mvaConfig.spectators.keys())
589 hist_variables_2d = [('extraInfo(decayModeID)', particle.mvaConfig.target),
590 ('extraInfo(decayModeID)', 'mcErrors'),
591 ('extraInfo(decayModeID)', 'mcParticleStatus')]
592 for specVar in particle.mvaConfig.spectators:
593 hist_variables_2d.append(('extraInfo(SignalProbability)', specVar))
594 hist_variables_2d.append(('extraInfo(decayModeID)', specVar))
595 hist_variables_2d.append((particle.mvaConfig.target, specVar))
596 filename = os.path.join(self.config.monitoring_path, 'Monitor_PostReconstruction_BeforePostCut.root')
597 ma.variablesToHistogram(
598 particle.identifier,
599 variables=config.variables2binnings(hist_variables),
600 variables_2d=config.variables2binnings_2d(hist_variables_2d),
601 filename=filename,
602 ignoreCommandLineOverride=True,
603 directory=config.removeJPsiSlash(f'{particle.identifier}'),
604 path=path)
605
606 ma.applyCuts(particle.identifier, cutstring, path=path)
607
608 if self.config.monitor:
609 filename = os.path.join(self.config.monitoring_path, 'Monitor_PostReconstruction_BeforeRanking.root')
610 ma.variablesToHistogram(
611 particle.identifier,
612 variables=config.variables2binnings(hist_variables),
613 variables_2d=config.variables2binnings_2d(hist_variables_2d),
614 filename=filename,
615 ignoreCommandLineOverride=True,
616 directory=config.removeJPsiSlash(f'{particle.identifier}'),
617 path=path)
618
619 ma.rankByHighest(particle.identifier, 'extraInfo(SignalProbability)',
620 particle.postCutConfig.bestCandidateCut, 'postCut_rank', path=path)
621
622 uniqueSignal = basf2.register_module('TagUniqueSignal')
623 uniqueSignal.param('particleList', particle.identifier)
624 uniqueSignal.param('target', particle.mvaConfig.target)
625 uniqueSignal.param('extraInfoName', 'uniqueSignal')
626 uniqueSignal.set_name(f'TagUniqueSignal_{particle.identifier}')
627 # suppress warning that unique signal extra info won't be overwritten if it already exists
628 uniqueSignal.set_log_level(basf2.logging.log_level.ERROR)
629 path.add_module(uniqueSignal)
630
631 if self.config.monitor:
632 if self.config.monitor != 'simple':
633 hist_variables += ['extraInfo(postCut_rank)']
634 hist_variables_2d += [('extraInfo(decayModeID)', 'extraInfo(postCut_rank)'),
635 (particle.mvaConfig.target, 'extraInfo(postCut_rank)'),
636 ('mcErrors', 'extraInfo(postCut_rank)'),
637 ('mcParticleStatus', 'extraInfo(postCut_rank)')]
638 for specVar in particle.mvaConfig.spectators:
639 hist_variables_2d.append(('extraInfo(postCut_rank)', specVar))
640 filename = os.path.join(self.config.monitoring_path, 'Monitor_PostReconstruction_AfterRanking.root')
641 ma.variablesToHistogram(
642 particle.identifier,
643 variables=config.variables2binnings(hist_variables),
644 variables_2d=config.variables2binnings_2d(hist_variables_2d),
645 filename=filename,
646 ignoreCommandLineOverride=True,
647 directory=config.removeJPsiSlash(f'{particle.identifier}'),
648 path=path)
649
650 filename = os.path.join(self.config.monitoring_path, 'Monitor_Final.root')
651 if self.config.monitor == 'simple':
652 hist_variables = ['extraInfo(uniqueSignal)', 'extraInfo(decayModeID)']
653 hist_variables_2d = [('extraInfo(uniqueSignal)', 'extraInfo(decayModeID)')]
654 ma.variablesToHistogram(
655 particle.identifier,
656 variables=config.variables2binnings(hist_variables),
657 variables_2d=config.variables2binnings_2d(hist_variables_2d),
658 filename=filename,
659 ignoreCommandLineOverride=True,
660 directory=config.removeJPsiSlash(f'{particle.identifier}'),
661 path=path)
662 else:
663 variables = ['extraInfo(SignalProbability)', 'mcErrors', 'mcParticleStatus', particle.mvaConfig.target,
664 'extraInfo(uniqueSignal)', 'extraInfo(decayModeID)'] + list(particle.mvaConfig.spectators.keys())
665
666 ma.variablesToNtuple(
667 particle.identifier,
668 variables,
669 treename=ROOT.Belle2.MakeROOTCompatible.makeROOTCompatible(
670 config.removeJPsiSlash(f'{particle.identifier} variables')),
671 filename=filename,
672 ignoreCommandLineOverride=True,
673 path=path)
674 return path
675
676
677class Teacher:
678 """
679 Performs all necessary trainings for all training data files which are
680 available but where there is no weight file available yet.
681 This class is usually used by the do_trainings function below, to perform the necessary trainings after each stage.
682 The trainings are run in parallel using multi-threading of python.
683 Each training is done by a subprocess call, the training command (passed by config.externTeacher) can be either
684 * basf2_mva_teacher, the training will be done directly on the machine
685 * externClustTeacher, the training will be submitted to the batch system of KEKCC
686 """
687
689 MaximumNumberOfMVASamples = int(1e7)
690
692 MinimumNumberOfMVASamples = int(5e2)
693
694 def __init__(self, particles: typing.Sequence[config.Particle], config: config.FeiConfiguration):
695 """
696 Create a new Teacher object
697 @param particles list of config.Particle objects
698 @param config config.FeiConfiguration object
699 """
700
701 self.particles = particles
702
703 self.config = config
704
705 @staticmethod
706 def create_fake_weightfile(channel: str):
707 """
708 Create a fake weight file using the trivial method, it will always return 0.0
709 @param channel for which we create a fake weight file
710 """
711 content = f"""
712 <?xml version="1.0" encoding="utf-8"?>
713 <method>Trivial</method>
714 <weightfile>{channel}.xml</weightfile>
715 <treename>tree</treename>
716 <target_variable>isSignal</target_variable>
717 <weight_variable>__weight__</weight_variable>
718 <signal_class>1</signal_class>
719 <max_events>0</max_events>
720 <number_feature_variables>1</number_feature_variables>
721 <variable0>M</variable0>
722 <number_spectator_variables>0</number_spectator_variables>
723 <number_data_files>1</number_data_files>
724 <datafile0>train.root</datafile0>
725 <Trivial_version>1</Trivial_version>
726 <Trivial_output>0</Trivial_output>
727 <signal_fraction>0.066082567</signal_fraction>
728 """
729 with open(f'{channel}.xml', "w") as f:
730 f.write(content)
731
732 @staticmethod
733 def check_if_weightfile_is_fake(filename: str):
734 """
735 Checks if the provided filename is a fake-weight file or not
736 @param filename the filename of the weight file
737 """
738 try:
739 return '<method>Trivial</method>' in open(filename).readlines()[2]
740 except BaseException:
741 return True
742 return True
743
744 def upload(self, channel: str):
745 """
746 Upload the weight file into the condition database
747 @param channel whose weight file is uploaded
748 """
749 disk = f'{channel}.xml'
750 dbase = f'{self.config.prefix}_{channel}'
751 basf2_mva.upload(disk, dbase)
752 print(f"FEI-core: Uploading {dbase} to localdb")
753 return (disk, dbase)
754
755 def do_all_trainings(self):
756 """
757 Do all trainings for which we find training data
758 """
759 # Always avoid the top-level 'import ROOT'.
760 import ROOT # noqa
761 # FEI uses multi-threading for parallel execution of tasks therefore
762 # the ROOT gui-thread is disabled, which otherwise interferes sometimes
763 ROOT.PyConfig.StartGuiThread = False
764 job_list = []
765
766 all_stage_particles = get_stages_from_particles(self.particles)
767 if self.config.cache is None:
768 stagesToTrain = range(1, len(all_stage_particles)+1)
769 else:
770 stagesToTrain = [self.config.cache]
771
772 filename = 'training_input.root'
773 if os.path.isfile(filename):
774 f = ROOT.TFile.Open(filename, 'read')
775 if f.IsZombie():
776 B2WARNING(f'Training of MVC failed: {filename}. ROOT file corrupt. No weight files will be provided.')
777 elif len([k.GetName() for k in f.GetListOfKeys()]) == 0:
778 B2WARNING(
779 f'Training of MVC failed: {filename}. ROOT file has no trees. No weight files will be provided.')
780 else:
781 for istage in stagesToTrain:
782 for particle in all_stage_particles[istage-1]:
783 for channel in particle.channels:
784 weightfile = f'{channel.label}.xml'
785 if basf2_mva.available(weightfile):
786 B2INFO(f"FEI-core: Skipping {weightfile}, already available")
787 continue
788 else:
789 treeName = ROOT.Belle2.MakeROOTCompatible.makeROOTCompatible(f'{channel.label} variables')
790 keys = [m for m in f.GetListOfKeys() if treeName in m.GetName()]
791 if not keys:
792 B2WARNING("Training of MVC failed. "
793 f"Couldn't find tree for channel {channel}. Ignoring channel.")
794 continue
795 elif len(keys) > 1:
796 B2WARNING(f"Found more than one tree for channel {channel}. Taking first tree from: {keys}")
797 tree = keys[0].ReadObj()
798 total_entries = tree.GetEntries()
799 nSig = tree.GetEntries(f'{channel.mvaConfig.target}==1.0')
800 nBg = tree.GetEntries(f'{channel.mvaConfig.target}==0.0')
801 B2INFO(
802 f'FEI-core: Number of events for channel: {channel.label}, '
803 f'Total: {total_entries}, Signal: {nSig}, Background: {nBg}')
804 if nSig < Teacher.MinimumNumberOfMVASamples:
805 B2WARNING("Training of MVC failed. "
806 f"Tree contains too few signal events {nSig}. Ignoring channel {channel}.")
807 self.create_fake_weightfile(channel.label)
808 self.upload(channel.label)
809 continue
810 if nBg < Teacher.MinimumNumberOfMVASamples:
811 B2WARNING("Training of MVC failed. "
812 f"Tree contains too few bckgrd events {nBg}. Ignoring channel {channel}.")
813 self.create_fake_weightfile(channel.label)
814 self.upload(channel.label)
815 continue
816 variable_str = "' '".join(channel.mvaConfig.variables)
817
818 spectators = list(channel.mvaConfig.spectators.keys())
819 if channel.mvaConfig.sPlotVariable is not None:
820 spectators.append(channel.mvaConfig.sPlotVariable)
821 spectators_str = "' '".join(spectators)
822
823 treeName = ROOT.Belle2.MakeROOTCompatible.makeROOTCompatible(f'{channel.label} variables')
824 command = (f"{self.config.externTeacher}"
825 f" --method '{channel.mvaConfig.method}'"
826 f" --target_variable '{channel.mvaConfig.target}'"
827 f" --treename '{treeName}'"
828 f" --datafile 'training_input.root'"
829 f" --signal_class 1"
830 f" --variables '{variable_str}'"
831 f" --identifier '{weightfile}'")
832 if len(spectators) > 0:
833 command += f" --spectators '{spectators_str}'"
834 command += f" {channel.mvaConfig.config} > '{channel.label}'.log 2>&1"
835 B2INFO(f"Used following command to invoke teacher: \n {command}")
836 job_list.append((channel.label, command))
837 f.Close()
838
839 if len(job_list) > 0:
840 p = multiprocessing.Pool(None, maxtasksperchild=1)
841 func = functools.partial(subprocess.call, shell=True)
842 p.map(func, [c for _, c in job_list])
843 p.close()
844 p.join()
845 weightfiles = []
846 for name, _ in job_list:
847 if not basf2_mva.available(f'{name}.xml'):
848 B2WARNING("Training of MVC failed. For unknown reasons, check the logfile", f'{name}.log')
849 self.create_fake_weightfile(name)
850 weightfiles.append(self.upload(name))
851 return weightfiles
852
853
854def convert_legacy_training(particles: typing.Sequence[config.Particle], configuration: config.FeiConfiguration):
855 """
856 Convert an old FEI training into the new format.
857 The old format used hashes for the weight files, the hashes can be converted to the new naming scheme
858 using the Summary.pickle file outputted by the FEIv3. This file must be passes by the parameter configuration.legacy.
859 @param particles list of config.Particle objects
860 @param config config.FeiConfiguration object
861 """
862 summary = pickle.load(open(configuration.legacy, 'rb'))
863 channel2lists = {k: v[2] for k, v in summary['channel2lists'].items()}
864
865 teacher = Teacher(particles, configuration)
866
867 for particle in particles:
868 for channel in particle.channels:
869 new_weightfile = f'{configuration.prefix}_{channel.label}'
870 old_weightfile = f'{configuration.prefix}_{channel2lists[channel.label.replace("Jpsi", "J/psi")]}'
871 if not basf2_mva.available(new_weightfile):
872 if old_weightfile is None or not basf2_mva.available(old_weightfile):
873 Teacher.create_fake_weightfile(channel.label)
874 teacher.upload(channel.label)
875 else:
876 basf2_mva.download(old_weightfile, f'{channel.label}.xml')
877 teacher.upload(channel.label)
878
879
880def get_stages_from_particles(particles: typing.Sequence[typing.Union[config.Particle, str]]):
881 """
882 Returns the hierarchical structure of the FEI.
883 Each stage depends on the particles in the previous stage.
884 The final stage is empty (meaning everything is done, and the training is finished at this point).
885 @param particles list of config.Particle or string objects
886 """
887 def get_pname(p):
888 return p.split(":")[0] if isinstance(p, str) else p.name
889
890 def get_plabel(p):
891 return (p.split(":")[1] if isinstance(p, str) else p.label).lower()
892
893 stages = [
894 [p for p in particles if get_pname(p) in ['e+', 'K+', 'pi+', 'mu+', 'gamma', 'p+', 'K_L0']],
895 [p for p in particles if get_pname(p) in ['pi0', 'J/psi', 'Lambda0']],
896 [p for p in particles if get_pname(p) in ['K_S0', 'Sigma+']],
897 [p for p in particles if get_pname(p) in ['D+', 'D0', 'D_s+', 'Lambda_c+'] and 'tag' not in get_plabel(p)],
898 [p for p in particles if get_pname(p) in ['D*+', 'D*0', 'D_s*+'] and 'tag' not in get_plabel(p)],
899 [p for p in particles if get_pname(p) in ['B0', 'B+', 'B_s0'] or 'tag' in get_plabel(p)],
900 []
901 ]
902
903 for p in particles:
904 pname = get_pname(p)
905 if pname not in [pname for stage in stages for p in stage]:
906 raise RuntimeError(f"Unknown particle {pname}: Not implemented in FEI")
907
908 return stages
909
910
911def do_trainings(particles: typing.Sequence[config.Particle], configuration: config.FeiConfiguration):
912 """
913 Performs the training of mva classifiers for all available training data,
914 this function must be either called by the user after each stage of the FEI during training,
915 or (more likely) is called by the distributed.py script after merging the outputs of all jobs,
916 @param particles list of config.Particle objects
917 @param config config.FeiConfiguration object
918 @return list of tuple with weight file on disk and identifier in database for all trained classifiers
919 """
920 teacher = Teacher(particles, configuration)
921 return teacher.do_all_trainings()
922
923
924def save_summary(particles: typing.Sequence[config.Particle],
925 configuration: config.FeiConfiguration,
926 cache: int,
927 roundMode: int = None,
928 pickleName: str = 'Summary.pickle'):
929 """
930 Creates the Summary.pickle, which is used to keep track of the stage during the training,
931 and can be used later to investigate which configuration was used exactly to create the training.
932 @param particles list of config.Particle objects
933 @param config config.FeiConfiguration object
934 @param cache current cache level
935 @param roundMode mode of current round of training
936 @param pickleName name of the pickle file
937 """
938 if roundMode is None:
939 roundMode = configuration.roundMode
940 configuration = configuration._replace(cache=cache, roundMode=roundMode)
941 # Backup existing Summary.pickle files
942 for i in range(8, -1, -1):
943 if os.path.isfile(f'{pickleName}.backup_{i}'):
944 shutil.copyfile(f'{pickleName}.backup_{i}', f'{pickleName}.backup_{i+1}')
945 if os.path.isfile(pickleName):
946 shutil.copyfile(pickleName, f'{pickleName}.backup_0')
947 pickle.dump((particles, configuration), open(pickleName, 'wb'))
948
949
950def get_path(particles: typing.Sequence[config.Particle], configuration: config.FeiConfiguration) -> FeiState:
951 """
952 The most important function of the FEI.
953 This creates the FEI path for training/fitting (both terms are equal), and application/inference (both terms are equal).
954 The whole FEI is defined by the particles which are reconstructed (see default_channels.py)
955 and the configuration (see config.py).
956
957 TRAINING
958 For training this function is called multiple times, each time the FEI reconstructs one more stage in the hierarchical structure
959 i.e. we start with FSP, pi0, KS_0, D, D*, and with B mesons. You have to set configuration.training to True for training mode.
960 All weight files created during the training will be stored in your local database.
961 If you want to use the FEI training everywhere without copying this database by hand, you have to upload your local database
962 to the central database first (see documentation for the Belle2 Condition Database).
963
964 APPLICATION
965 For application you call this function once, and it returns the whole path which will reconstruct B mesons
966 with an associated signal probability. You have to set configuration.training to False for application mode.
967
968 MONITORING
969 You can always turn on the monitoring (configuration.monitor != False),
970 to write out ROOT Histograms of many quantities for each stage,
971 using these histograms you can use the printReporting.py or latexReporting.py scripts to automatically create pdf files.
972
973 LEGACY
974 This function can also use old FEI trainings (version 3), just pass the Summary.pickle file of the old training,
975 and the weight files will be automatically converted to the new naming scheme.
976
977 @param particles list of config.Particle objects
978 @param config config.FeiConfiguration object
979 """
980 print(r"""
981 ____ _ _ _ _ ____ _ _ ____ _ _ ___ _ _ _ ___ ____ ____ ___ ____ ____ ___ ____ ___ _ ____ _ _
982 |___ | | | | |___ | | |___ |\ | | | |\ | | |___ |__/ |__] |__/ |___ | |__| | | | | |\ |
983 | |__| |___ |___ |___ \/ |___ | \| | | | \| | |___ | \ | | \ |___ | | | | | |__| | \|
984
985 Author: Thomas Keck 2014 - 2017
986 Please cite my PhD thesis
987 """)
988
989 # The cache parameter of the configuration object is used during training to keep track,
990 # which reconstruction steps are already performed.
991 # For fitting/training we start by default with -1, meaning we still have to create the TrainingDataInformation,
992 # which is used to determine the number of candidates we have to write out for the FSP trainings in stage 0.
993 # For inference/application we start by default with 0, because we don't need the TrainingDataInformation in stage 0.
994 # RoundMode plays a similar role as cache,
995 # it is used to keep track in which phase within a stage the basf2 execution stops, relevant only for training.
996 # During the training we save the particles and configuration (including the current cache stage) in the Summary.pickle object.
997 if configuration.training and (configuration.monitor and (configuration.monitoring_path != '')):
998 B2ERROR("FEI-core: Custom Monitoring path is not allowed during training!")
999
1000 if configuration.cache is None:
1001 pickleName = 'Summary.pickle'
1002 if configuration.monitor:
1003 pickleName = os.path.join(configuration.monitoring_path, pickleName)
1004
1005 if os.path.isfile(pickleName):
1006 particles_bkp, config_bkp = pickle.load(open(pickleName, 'rb'))
1007 # check if configuration changed
1008 for fd in configuration._fields:
1009 if fd == 'cache' or fd == 'roundMode':
1010 continue
1011 if getattr(configuration, fd) != getattr(config_bkp, fd):
1012 B2WARNING(
1013 f"FEI-core: Configuration changed: {fd} from {getattr(config_bkp, fd)} to {getattr(configuration, fd)}")
1014
1015 configuration = config_bkp
1016 cache = configuration.cache
1017 print("Cache: Replaced particles from steering and configuration from Summary.pickle: ", cache, configuration.roundMode)
1018 else:
1019 if configuration.training:
1020 cache = -1
1021 else:
1022 cache = 0
1023 else:
1024 cache = configuration.cache
1025
1026 # Now we start building the training or application path
1027 path = basf2.create_path()
1028
1029 # There are in total 7 stages.
1030 # For training we start with -1 and go to 7 one stage at a time
1031 # For application we can run stage 0 to 7 at once
1032 stages = get_stages_from_particles(particles)
1033
1034 # If the user provided a Summary.pickle file of a FEIv3 training we
1035 # convert the old weight files (with hashes), to the new naming scheme.
1036 # Afterwards the algorithm runs as usual
1037 if configuration.legacy is not None:
1038 convert_legacy_training(particles, configuration)
1039
1040 # During the training we require the number of MC particles in the whole processed
1041 # data sample, because we don't want to write out billions of e.g. pion candidates.
1042 # Knowing the total amount of MC particles we can write out only every e.g. 10th candidate
1043 # That's why we have to write out the TrainingDataInformation before doing anything during the training phase.
1044 # During application we only need this if we run in monitor mode, and want to write out a summary in the end,
1045 # the summary contains efficiency, and the efficiency calculation requires the total number of MC particles.
1046 training_data_information = TrainingDataInformation(particles, outputPath=configuration.monitoring_path)
1047 if cache < 0 and configuration.training:
1048 print("Stage 0: Run over all files to count the number of events and McParticles")
1049 path.add_path(training_data_information.reconstruct())
1050 if configuration.training:
1051 save_summary(particles, configuration, 0)
1052 return FeiState(path, 0, [], [], [])
1053 elif not configuration.training and configuration.monitor:
1054 path.add_path(training_data_information.reconstruct())
1055
1056 # We load the Final State particles
1057 # It is assumed that the user takes care of adding RootInput, Geometry, and everything
1058 # which is required to read in data, so we directly start to load the FSP particles
1059 # used by the FEI.
1060 loader = FSPLoader(particles, configuration)
1061 if cache < 1:
1062 print("Stage 0: Load FSP particles")
1063 path.add_path(loader.reconstruct())
1064
1065 # Now we reconstruct each stage one after another.
1066 # Each stage consists of two parts:
1067 # PreReconstruction (before the mva method was applied):
1068 # - Particle combination
1069 # - Do vertex fitting
1070 # - Some simple cuts and best candidate selection
1071 # PostReconstruction (after the mva method was applied):
1072 # - Apply the mva method
1073 # - Apply cuts on the mva output and best candidate selection
1074 #
1075 # If the weight files for the PostReconstruction are not available for the current stage and we are in training mode,
1076 # we have to create the training data. The training itself is done by the do_trainings function which is called
1077 # either by the user after each step, or by the distributed.py script
1078 #
1079 # If the weight files for the PostReconstruction are not available for the current stage and we are not in training mode,
1080 # we keep going, as soon as the user will call process on the produced path he will get an error message that the
1081 # weight files are missing.
1082 #
1083 # Finally we keep track of the ParticleLists we use, so the user can run the RemoveParticles module to reduce the size of the
1084 # intermediate output of RootOutput.
1085 used_lists = []
1086 for stage, stage_particles in enumerate(stages):
1087 if len(stage_particles) == 0:
1088 print(f"Stage {stage}: No particles to reconstruct in this stage, skipping!")
1089 continue
1090
1091 pre_reconstruction = PreReconstruction(stage_particles, configuration)
1092 post_reconstruction = PostReconstruction(stage_particles, configuration)
1093
1094 if stage >= cache:
1095 print(f"Stage {stage}: PreReconstruct particles: ", [p.name for p in stage_particles])
1096 path.add_path(pre_reconstruction.reconstruct())
1097 if configuration.training and not (post_reconstruction.available() and configuration.roundMode == 0):
1098 print(f"Stage {stage}: Create training data for particles: ", [p.name for p in stage_particles])
1099 mc_counts = training_data_information.get_mc_counts()
1100 training_data = TrainingData(stage_particles, configuration, mc_counts)
1101 path.add_path(training_data.reconstruct())
1102 used_lists += [channel.name for particle in stage_particles for channel in particle.channels]
1103 break
1104
1105 used_lists += [particle.identifier for particle in stage_particles]
1106 if (stage >= cache - 1) and not ((configuration.roundMode == 1) and configuration.training):
1107 if (configuration.roundMode == 3) and configuration.training:
1108 print(f"Stage {stage}: BDTs already applied for particles, no postReco needed: ", [p.name for p in stage_particles])
1109 else:
1110 print(f"Stage {stage}: Apply BDT for particles: ", [p.name for p in stage_particles])
1111 if configuration.training and not post_reconstruction.available():
1112 raise RuntimeError("FEI-core: training of current stage was not successful, please retrain!")
1113 path.add_path(post_reconstruction.reconstruct())
1114 if (((configuration.roundMode == 2) or (configuration.roundMode == 3)) and configuration.training):
1115 break
1116 fsps_of_all_stages = [fsp for sublist in get_stages_from_particles(loader.get_fsp_lists()) for fsp in sublist]
1117
1118 excludelists = []
1119 if configuration.training and (configuration.roundMode == 3):
1120 dontRemove = used_lists + fsps_of_all_stages
1121 # cleanup higher stages
1122 cleanup = basf2.register_module('RemoveParticlesNotInLists')
1123 print("FEI-REtrain: pruning basf2_input.root of higher stages")
1124 cleanup.param('particleLists', dontRemove)
1125 path.add_module(cleanup)
1126
1127 # check which lists we have to exclude from the output
1128 import ROOT # noqa
1129 excludedParticlesNonConjugated = [p.identifier for p in particles if p.identifier not in dontRemove]
1130 excludedParticles = [
1131 str(name) for name in list(
1132 ROOT.Belle2.ParticleListName.addAntiParticleLists(excludedParticlesNonConjugated))]
1133 root_file = ROOT.TFile.Open('basf2_input.root', "READ")
1134 tree = root_file.Get('tree')
1135 for branch in tree.GetListOfBranches():
1136 branchName = branch.GetName()
1137 if any(exParticle in branchName for exParticle in excludedParticles):
1138 excludelists.append(branchName)
1139 print("Exclude lists from output: ", excludelists)
1140
1141 # If we run in monitor mode we are interested in the ModuleStatistics,
1142 # these statistics contain the runtime for each module which was run.
1143 if configuration.monitor:
1144 print("Add ModuleStatistics")
1145 output = basf2.register_module('RootOutput')
1146 output.param('outputFileName', os.path.join(configuration.monitoring_path, 'Monitor_ModuleStatistics.root'))
1147 output.param('branchNames', ['EventMetaData']) # cannot be removed, but of only small effect on file size
1148 output.param('branchNamesPersistent', ['ProcessStatistics'])
1149 output.param('ignoreCommandLineOverride', True)
1150 path.add_module(output)
1151
1152 # As mentioned above the FEI keeps track of the stages which are already reconstructed during the training
1153 # so we write out the Summary.pickle here, and increase the stage by one.
1154 if configuration.training or configuration.monitor:
1155 print("Save Summary.pickle")
1156 save_summary(particles, configuration, stage+1, pickleName=os.path.join(configuration.monitoring_path, 'Summary.pickle'))
1157
1158 # Finally we return the path, the stage and the used lists to the user.
1159 return FeiState(path, stage+1, plists=used_lists, fsplists=fsps_of_all_stages, excludelists=excludelists)
1160
1161# @endcond
isB2BII()
Definition b2bii.py:14
from_name(name)
Definition pdg.py:63