Belle II Software development
core.py
1#!/usr/bin/env python3
2
3
10
11"""
12 The Full Event Interpretation Algorithm
13
14 Some basic facts:
15 - The algorithm will automatically reconstruct B mesons and calculate a signal probability for each candidate.
16 - It can be used for hadronic and semileptonic tagging.
17 - The algorithm has to be trained on MC, and can afterwards be applied on data.
18 - The training requires O(100) million MC events
19 - The weight files are stored in the Belle II Condition database
20
21 Read this file if you want to understand the technical details of the FEI.
22
23 The FEI follows a hierarchical approach.
24 There are 7 stages:
25 (Stage -1: Write out information about the provided data sample)
26 Stage 0: Final State Particles (FSP)
27 Stage 1: pi0, J/Psi, Lambda0
28 Stage 2: K_S0, Sigma+
29 Stage 3: D and Lambda_c mesons
30 Stage 4: D* mesons
31 Stage 5: B mesons
32 Stage 6: Finish
33
34 Most stages consists of:
35 - Create Particle Candidates
36 - Apply Cuts
37 - Do vertex Fitting
38 - Apply a multivariate classification method
39 - Apply more Cuts
40
41 The FEI will reconstruct these 7 stages during the training phase,
42 since the stages depend on one another, you have to run basf2 multiple (7) times on the same data
43 to train all the necessary multivariate classifiers.
44"""
45
46# Import basf2
47import basf2
48from basf2 import B2INFO, B2WARNING, B2ERROR
49import pybasf2
50import modularAnalysis as ma
51import b2bii
52
53# Should come after basf2 import
54import pdg
55from fei import config
56import basf2_mva
57
58# Standard python modules
59import collections
60import os
61import shutil
62import typing
63import pickle
64import re
65import functools
66import subprocess
67import multiprocessing
68
69# Simple object containing the output of fei
70FeiState = collections.namedtuple('FeiState', 'path, stage, plists, fsplists, excludelists')
71
72
74 """
75 Contains the relevant information about the used training data.
76 Basically we write out the number of MC particles in the whole dataset.
77 This numbers we can use to calculate what fraction of candidates we have to write
78 out as TrainingData to get a reasonable amount of candidates to train on
79 (too few candidates will lead to heavy overtraining, too many won't fit into memory).
80 Secondly we can use this information for the generation of the monitoring pdfs,
81 where we calculate reconstruction efficiencies.
82 """
83
84 def __init__(self, particles: typing.Sequence[config.Particle], outputPath: str = ''):
85 """
86 Create a new TrainingData object
87 @param particles list of config.Particle objects
88 @param outputPath path to the output directory
89 """
90
91 self.particles = particles
92
93 self.filename = os.path.join(outputPath, 'mcParticlesCount.root')
94
95 def available(self) -> bool:
96 """
97 Check if the relevant information is already available
98 """
99 return os.path.isfile(self.filename)
100
101 def reconstruct(self) -> pybasf2.Path:
102 """
103 Returns pybasf2.Path which counts the number of MCParticles in each event.
104 @param particles list of config.Particle objects
105 """
106 # Unique absolute pdg-codes of all particles
107 pdgs = {abs(pdg.from_name(particle.name)) for particle in self.particles}
108
109 path = basf2.create_path()
110 module = basf2.register_module('VariablesToHistogram')
111 module.set_name("VariablesToHistogram_MCCount")
112 module.param('variables', [(f'NumberOfMCParticlesInEvent({pdg})', 100, -0.5, 99.5) for pdg in pdgs])
113 module.param('fileName', self.filename)
114 module.param('ignoreCommandLineOverride', True)
115 path.add_module(module)
116 return path
117
118 def get_mc_counts(self):
119 """
120 Read out the number of MC particles from the file created by reconstruct
121 """
122 # Unique absolute pdg-codes of all particles
123 # Always avoid the top-level 'import ROOT'.
124 import ROOT # noqa
125 root_file = ROOT.TFile.Open(self.filename, 'read')
126 mc_counts = {}
127
128 for key in root_file.GetListOfKeys():
129 variable = ROOT.Belle2.MakeROOTCompatible.invertMakeROOTCompatible(key.GetName())
130 pdg = abs(int(variable[len('NumberOfMCParticlesInEvent('):-len(")")]))
131 hist = key.ReadObj()
132 mc_counts[pdg] = {}
133 mc_counts[pdg]['sum'] = sum(hist.GetXaxis().GetBinCenter(bin + 1) * hist.GetBinContent(bin + 1)
134 for bin in range(hist.GetNbinsX()))
135 mc_counts[pdg]['std'] = hist.GetStdDev()
136 mc_counts[pdg]['avg'] = hist.GetMean()
137 mc_counts[pdg]['max'] = hist.GetXaxis().GetBinCenter(hist.FindLastBinAbove(0.0))
138 mc_counts[pdg]['min'] = hist.GetXaxis().GetBinCenter(hist.FindFirstBinAbove(0.0))
139
140 mc_counts[0] = {}
141 mc_counts[0]['sum'] = hist.GetEntries() # this is the total number of ALL events, does not matter which hist we take
142 root_file.Close()
143 return mc_counts
144
145
147 """
148 Steers the loading of FSP particles.
149 This does NOT include RootInput, Geometry or anything required before loading FSPs,
150 the user has to add this himself (because it depends on the MC campaign and if you want
151 to use Belle or Belle II).
152 """
153
154 def __init__(self, particles: typing.Sequence[config.Particle], config: config.FeiConfiguration):
155 """
156 Create a new FSPLoader object
157 @param particles list of config.Particle objects
158 @param config config.FeiConfiguration object
159 """
160
161 self.particles = particles
162
163 self.config = config
164
165 def get_fsp_lists(self) -> typing.List[str]:
166 """
167 Returns a list of FSP particle lists which are used in the FEI.
168 This is used to create the RootOutput module.
169 """
170 fsps = ['K+:FSP', 'pi+:FSP', 'e+:FSP', 'mu+:FSP', 'p+:FSP', 'gamma:FSP', 'K_S0:V0', 'Lambda0:V0', 'K_L0:FSP', 'gamma:V0']
171 if b2bii.isB2BII():
172 fsps += ['pi0:FSP']
173 return fsps
174
175 def reconstruct(self) -> pybasf2.Path:
176 """
177 Returns pybasf2.Path which loads the FSP Particles
178 """
179 path = basf2.create_path()
180
181 if b2bii.isB2BII():
182 ma.fillParticleLists([('K+:FSP', ''), ('pi+:FSP', ''), ('e+:FSP', ''),
183 ('mu+:FSP', ''), ('p+:FSP', '')], writeOut=True, path=path)
184 for outputList, inputList in [('gamma:FSP', 'gamma:mdst'), ('K_S0:V0', 'K_S0:mdst'),
185 ('Lambda0:V0', 'Lambda0:mdst'), ('K_L0:FSP', 'K_L0:mdst'),
186 ('pi0:FSP', 'pi0:mdst'), ('gamma:V0', 'gamma:v0mdst')]:
187 ma.copyParticles(outputList, inputList, writeOut=True, path=path)
188 else:
189 ma.fillParticleLists([('K+:FSP', ''), ('pi+:FSP', ''), ('e+:FSP', ''),
190 ('mu+:FSP', ''), ('gamma:FSP', ''),
191 ('p+:FSP', ''), ('K_L0:FSP', '')], writeOut=True, path=path)
192 ma.fillParticleList('K_S0:V0 -> pi+ pi-', '', writeOut=True, path=path)
193 ma.fillParticleList('Lambda0:V0 -> p+ pi-', '', writeOut=True, path=path)
194 ma.fillConvertedPhotonsList('gamma:V0 -> e+ e-', '', writeOut=True, path=path)
195
196 if self.config.monitor:
197 names = ['e+', 'K+', 'pi+', 'mu+', 'gamma', 'K_S0', 'p+', 'K_L0', 'Lambda0', 'pi0']
198 filename = os.path.join(self.config.monitoring_path, 'Monitor_FSPLoader.root')
199 pdgs = {abs(pdg.from_name(name)) for name in names}
200 variables = [(f'NumberOfMCParticlesInEvent({pdg})', 100, -0.5, 99.5) for pdg in pdgs]
201 ma.variablesToHistogram('', variables=variables, filename=filename, ignoreCommandLineOverride=True, path=path)
202 return path
203
204
206 """
207 Steers the creation of the training data.
208 The training data is used to train a multivariate classifier for each channel.
209 The training of the FEI at its core is just generating this training data for each channel.
210 After we created the training data for a stage, we have to train the classifiers (see Teacher class further down).
211 """
212
213 def __init__(self, particles: typing.Sequence[config.Particle], config: config.FeiConfiguration,
214 mc_counts: typing.Mapping[int, typing.Mapping[str, float]]):
215 """
216 Create a new TrainingData object
217 @param particles list of config.Particle objects
218 @param config config.FeiConfiguration object
219 @param mc_counts containing number of MC Particles
220 """
221
222 self.particles = particles
223
224 self.config = config
225
226 self.mc_counts = mc_counts
227
228 def reconstruct(self) -> pybasf2.Path:
229 """
230 Returns pybasf2.Path which creates the training data for the given particles
231 """
232 import ROOT # noqa
233 path = basf2.create_path()
234
235 for particle in self.particles:
236 pdgcode = abs(pdg.from_name(particle.name))
237 nSignal = self.mc_counts[pdgcode]['sum']
238 print(f"FEI-core: TrainingData: nSignal for {particle.name}: {nSignal}")
239
240 # For D-Mesons we usually have a efficiency of 10^-3 including branching fraction
241 if pdgcode > 400:
242 nSignal /= 1000
243 # For B-Mesons we usually have a efficiency of 10^-4 including branching fraction
244 if pdgcode > 500:
245 nSignal /= 10000
246
247 for channel in particle.channels:
248 weightfile = f'{channel.label}.xml'
249 if basf2_mva.available(weightfile):
250 B2INFO(f"FEI-core: Skipping preparing Training Data for {weightfile}, already available")
251 continue
252 filename = 'training_input.root'
253
254 # nBackground = nEvents * nBestCandidates
255 nBackground = self.mc_counts[0]['sum'] * channel.preCutConfig.bestCandidateCut
256 inverseSamplingRates = {}
257 # For some very pure channels (Jpsi), this sampling can be too aggressive and training fails.
258 # It can therefore be disabled in the preCutConfig.
259 if nBackground > Teacher.MaximumNumberOfMVASamples and not channel.preCutConfig.noBackgroundSampling:
260 inverseSamplingRates[0] = max(
261 1, int((int(nBackground / Teacher.MaximumNumberOfMVASamples) + 1) * channel.preCutConfig.bkgSamplingFactor))
262 elif channel.preCutConfig.bkgSamplingFactor > 1:
263 inverseSamplingRates[0] = int(channel.preCutConfig.bkgSamplingFactor)
264
265 if nSignal > Teacher.MaximumNumberOfMVASamples and not channel.preCutConfig.noSignalSampling:
266 inverseSamplingRates[1] = int(nSignal / Teacher.MaximumNumberOfMVASamples) + 1
267
268 spectators = [channel.mvaConfig.target] + list(channel.mvaConfig.spectators.keys())
269 if channel.mvaConfig.sPlotVariable is not None:
270 spectators.append(channel.mvaConfig.sPlotVariable)
271
272 if self.config.monitor:
273 hist_variables = ['mcErrors', 'mcParticleStatus'] + channel.mvaConfig.variables + spectators
274 hist_variables_2d = [(x, channel.mvaConfig.target)
275 for x in channel.mvaConfig.variables + spectators if x is not channel.mvaConfig.target]
276 hist_filename = os.path.join(self.config.monitoring_path, 'Monitor_TrainingData.root')
277 ma.variablesToHistogram(channel.name, variables=config.variables2binnings(hist_variables),
278 variables_2d=config.variables2binnings_2d(hist_variables_2d),
279 filename=hist_filename,
280 ignoreCommandLineOverride=True,
281 directory=config.removeJPsiSlash(f'{channel.label}'), path=path)
282
283 teacher = basf2.register_module('VariablesToNtuple')
284 teacher.set_name(f'VariablesToNtuple_{channel.name}')
285 teacher.param('fileName', filename)
286 teacher.param('treeName', ROOT.Belle2.MakeROOTCompatible.makeROOTCompatible(f'{channel.label} variables'))
287 teacher.param('variables', channel.mvaConfig.variables + spectators)
288 teacher.param('particleList', channel.name)
289 teacher.param('sampling', (channel.mvaConfig.target, inverseSamplingRates))
290 teacher.param('ignoreCommandLineOverride', True)
291 path.add_module(teacher)
292 return path
293
294
296 """
297 Steers the reconstruction phase before the mva method was applied
298 It Includes:
299 - The ParticleCombination (for each particle and channel we create candidates using
300 the daughter candidates from the previous stages)
301 - MC Matching
302 - Vertex Fitting (this is the slowest part of the whole FEI, KFit is used by default,
303 but you can use fastFit as a drop-in replacement https://github.com/thomaskeck/FastFit/,
304 this will speed up the whole FEI by a factor 2-3)
305 """
306
307 def __init__(self, particles: typing.Sequence[config.Particle], config: config.FeiConfiguration):
308 """
309 Create a new PreReconstruction object
310 @param particles list of config.Particle objects
311 @param config config.FeiConfiguration object
312 """
313
314 self.particles = particles
315
316 self.config = config
317
318 def reconstruct(self) -> pybasf2.Path:
319 """
320 Returns pybasf2.Path which reconstructs the particles and does the vertex fitting if necessary
321 """
322 path = basf2.create_path()
323
324 for particle in self.particles:
325 for channel in particle.channels:
326
327 if (len(channel.daughters) == 1) and (pdg.from_name(
328 channel.daughters[0].split(':')[0]) == pdg.from_name(particle.name)):
329 ma.cutAndCopyList(channel.name, channel.daughters[0], channel.preCutConfig.userCut, writeOut=True, path=path)
330 v2EI = basf2.register_module('VariablesToExtraInfo')
331 v2EI.set_name(f'VariablesToExtraInfo_{channel.name}')
332 v2EI.param('particleList', channel.name)
333 v2EI.param('variables', {f'constant({channel.decayModeID})': 'decayModeID'})
334 # suppress warning that decay mode ID won't be overwritten if it already exists
335 v2EI.set_log_level(basf2.logging.log_level.ERROR)
336 path.add_module(v2EI)
337 else:
338 ma.reconstructDecay(channel.decayString, channel.preCutConfig.userCut, channel.decayModeID,
339 writeOut=True, path=path)
340 if self.config.monitor:
341 if "tag" in (channel.name).lower():
342 ma.matchTagTruth(channel.name, path=path)
343 else:
344 ma.matchMCTruth(channel.name, path=path)
345 bc_variable = channel.preCutConfig.bestCandidateVariable
346 if self.config.monitor == 'simple':
347 hist_variables = [channel.mvaConfig.target, 'extraInfo(decayModeID)']
348 hist_variables_2d = [(channel.mvaConfig.target, 'extraInfo(decayModeID)')]
349 else:
350 hist_variables = [bc_variable, 'mcErrors', 'mcParticleStatus',
351 channel.mvaConfig.target] + list(channel.mvaConfig.spectators.keys())
352 hist_variables_2d = [(bc_variable, channel.mvaConfig.target),
353 (bc_variable, 'mcErrors'),
354 (bc_variable, 'mcParticleStatus')]
355 for specVar in channel.mvaConfig.spectators:
356 hist_variables_2d.append((bc_variable, specVar))
357 hist_variables_2d.append((channel.mvaConfig.target, specVar))
358 filename = os.path.join(self.config.monitoring_path, 'Monitor_PreReconstruction_BeforeRanking.root')
359 ma.variablesToHistogram(
360 channel.name,
361 variables=config.variables2binnings(hist_variables),
362 variables_2d=config.variables2binnings_2d(hist_variables_2d),
363 filename=filename,
364 ignoreCommandLineOverride=True,
365 directory=f'{channel.label}',
366 path=path)
367
368 if channel.preCutConfig.bestCandidateMode == 'lowest':
369 ma.rankByLowest(channel.name,
370 channel.preCutConfig.bestCandidateVariable,
371 channel.preCutConfig.bestCandidateCut,
372 'preCut_rank',
373 path=path)
374 elif channel.preCutConfig.bestCandidateMode == 'highest':
375 ma.rankByHighest(channel.name,
376 channel.preCutConfig.bestCandidateVariable,
377 channel.preCutConfig.bestCandidateCut,
378 'preCut_rank',
379 path=path)
380 else:
381 raise RuntimeError(f'Unknown bestCandidateMode {repr(channel.preCutConfig.bestCandidateMode)}')
382
383 if 'gamma' in channel.decayString and channel.pi0veto:
384 ma.buildRestOfEvent(channel.name, path=path)
385 Ddaughter_roe_path = basf2.Path()
386 deadEndPath = basf2.Path()
387 ma.signalSideParticleFilter(channel.name, '', Ddaughter_roe_path, deadEndPath)
388 ma.fillParticleList('gamma:roe', 'isInRestOfEvent == 1', path=Ddaughter_roe_path)
389
390 matches = list(re.finditer('gamma', channel.decayString))
391 pi0lists = []
392 for igamma in range(len(matches)):
393 start, end = matches[igamma-1].span()
394 tempString = f'{channel.decayString[:start]}^gamma{channel.decayString[end:]}'
395 ma.fillSignalSideParticleList(f'gamma:sig_{igamma}', tempString, path=Ddaughter_roe_path)
396 ma.reconstructDecay(f'pi0:veto_{igamma} -> gamma:sig_{igamma} gamma:roe', '', path=Ddaughter_roe_path)
397 pi0lists.append(f'pi0:veto_{igamma}')
398 ma.copyLists('pi0:veto', pi0lists, writeOut=False, path=Ddaughter_roe_path)
399 ma.rankByLowest('pi0:veto', 'abs(dM)', 1, path=Ddaughter_roe_path)
400 ma.matchMCTruth('pi0:veto', path=Ddaughter_roe_path)
401 ma.variableToSignalSideExtraInfo(
402 'pi0:veto',
403 {
404 'InvM': 'pi0vetoMass',
405 'formula((daughter(0,E)-daughter(1,E))/(daughter(0,E)+daughter(1,E)))': 'pi0vetoEnergyAsymmetry',
406 },
407 path=Ddaughter_roe_path
408 )
409 path.for_each('RestOfEvent', 'RestOfEvents', Ddaughter_roe_path)
410
411 if self.config.monitor:
412 filename = os.path.join(self.config.monitoring_path, 'Monitor_PreReconstruction_AfterRanking.root')
413 if self.config.monitor != 'simple':
414 hist_variables += ['extraInfo(preCut_rank)']
415 hist_variables_2d += [('extraInfo(preCut_rank)', channel.mvaConfig.target),
416 ('extraInfo(preCut_rank)', 'mcErrors'),
417 ('extraInfo(preCut_rank)', 'mcParticleStatus')]
418 for specVar in channel.mvaConfig.spectators:
419 hist_variables_2d.append(('extraInfo(preCut_rank)', specVar))
420 ma.variablesToHistogram(
421 channel.name,
422 variables=config.variables2binnings(hist_variables),
423 variables_2d=config.variables2binnings_2d(hist_variables_2d),
424 filename=filename,
425 ignoreCommandLineOverride=True,
426 directory=f'{channel.label}',
427 path=path)
428 # If we are not in monitor mode we do the mc matching now,
429 # otherwise we did it above already!
430 elif self.config.training:
431 if "tag" in (channel.name).lower():
432 ma.matchTagTruth(channel.name, path=path)
433 else:
434 ma.matchMCTruth(channel.name, path=path)
435
436 if b2bii.isB2BII() and particle.name in ['K_S0', 'Lambda0']:
437 pvfit = basf2.register_module('ParticleVertexFitter')
438 pvfit.set_name(f'ParticleVertexFitter_{channel.name}')
439 pvfit.param('listName', channel.name)
440 pvfit.param('confidenceLevel', channel.preCutConfig.vertexCut)
441 pvfit.param('vertexFitter', 'KFit')
442 pvfit.param('fitType', 'vertex')
443 pvfit.set_log_level(basf2.logging.log_level.ERROR) # let's not produce gigabytes of uninteresting warnings
444 path.add_module(pvfit)
445 elif re.findall(r"[\w']+", channel.decayString).count('pi0') > 1 and particle.name != 'pi0':
446 basf2.B2INFO(f"Ignoring vertex fit for {channel.name} because multiple pi0 are not supported yet.")
447 elif len(channel.daughters) > 1:
448 pvfit = basf2.register_module('ParticleVertexFitter')
449 pvfit.set_name(f'ParticleVertexFitter_{channel.name}')
450 pvfit.param('listName', channel.name)
451 pvfit.param('confidenceLevel', channel.preCutConfig.vertexCut)
452 pvfit.param('vertexFitter', 'KFit')
453 if particle.name in ['pi0']:
454 pvfit.param('fitType', 'mass')
455 else:
456 pvfit.param('fitType', 'vertex')
457 pvfit.set_log_level(basf2.logging.log_level.ERROR) # let's not produce gigabytes of uninteresting warnings
458 path.add_module(pvfit)
459
460 if self.config.monitor:
461 if self.config.monitor == 'simple':
462 hist_variables = [channel.mvaConfig.target, 'extraInfo(decayModeID)']
463 hist_variables_2d = [(channel.mvaConfig.target, 'extraInfo(decayModeID)')]
464 else:
465 hist_variables = ['chiProb', 'mcErrors', 'mcParticleStatus',
466 channel.mvaConfig.target] + list(channel.mvaConfig.spectators.keys())
467 hist_variables_2d = [('chiProb', channel.mvaConfig.target),
468 ('chiProb', 'mcErrors'),
469 ('chiProb', 'mcParticleStatus')]
470 for specVar in channel.mvaConfig.spectators:
471 hist_variables_2d.append(('chiProb', specVar))
472 hist_variables_2d.append((channel.mvaConfig.target, specVar))
473 filename = os.path.join(self.config.monitoring_path, 'Monitor_PreReconstruction_AfterVertex.root')
474 ma.variablesToHistogram(
475 channel.name,
476 variables=config.variables2binnings(hist_variables),
477 variables_2d=config.variables2binnings_2d(hist_variables_2d),
478 filename=filename,
479 ignoreCommandLineOverride=True,
480 directory=f'{channel.label}',
481 path=path)
482
483 return path
484
485
487 """
488 Steers the reconstruction phase after the mva method was applied
489 It Includes:
490 - The application of the mva method itself.
491 - Copying all channel lists in a common one for each particle defined in particles
492 - Tag unique signal candidates, to avoid double counting of channels with overlap
493 """
494
495 def __init__(self, particles: typing.Sequence[config.Particle], config: config.FeiConfiguration):
496 """
497 Create a new PostReconstruction object
498 @param particles list of config.Particle objects
499 @param config config.FeiConfiguration object
500 """
501
502 self.particles = particles
503
504 self.config = config
505
506 def get_missing_channels(self) -> typing.Sequence[str]:
507 """
508 Returns all channels for which the weightfile is missing
509 """
510 missing = []
511 for particle in self.particles:
512 for channel in particle.channels:
513 # weightfile = self.config.prefix + '_' + channel.label
514 weightfile = f'{channel.label}.xml'
515 if not basf2_mva.available(weightfile):
516 missing += [channel.label]
517 return missing
518
519 def available(self) -> bool:
520 """
521 Check if the relevant information is already available
522 """
523 return len(self.get_missing_channels()) == 0
524
525 def reconstruct(self) -> pybasf2.Path:
526 """
527 Returns pybasf2.Path which reconstructs the particles and does the vertex fitting if necessary
528 """
529 import ROOT # noqa
530 path = basf2.create_path()
531
532 for particle in self.particles:
533 for channel in particle.channels:
534 expert = basf2.register_module('MVAExpert')
535 expert.set_name(f'MVAExpert_{channel.name}')
536 if self.config.training:
537 expert.param('identifier', f'{channel.label}.xml')
538 else:
539 expert.param('identifier', f'{self.config.prefix}_{channel.label}')
540 expert.param('extraInfoName', 'SignalProbability')
541 expert.param('listNames', [channel.name])
542 # suppress warning that signal probability won't be overwritten if it already exists
543 expert.set_log_level(basf2.logging.log_level.ERROR)
544 path.add_module(expert)
545
546 if self.config.monitor:
547 if self.config.monitor == 'simple':
548 hist_variables = [channel.mvaConfig.target, 'extraInfo(decayModeID)']
549 hist_variables_2d = [(channel.mvaConfig.target, 'extraInfo(decayModeID)')]
550 else:
551 hist_variables = ['mcErrors',
552 'mcParticleStatus',
553 'extraInfo(SignalProbability)',
554 channel.mvaConfig.target,
555 'extraInfo(decayModeID)'] + list(channel.mvaConfig.spectators.keys())
556 hist_variables_2d = [('extraInfo(SignalProbability)', channel.mvaConfig.target),
557 ('extraInfo(SignalProbability)', 'mcErrors'),
558 ('extraInfo(SignalProbability)', 'mcParticleStatus'),
559 ('extraInfo(decayModeID)', channel.mvaConfig.target),
560 ('extraInfo(decayModeID)', 'mcErrors'),
561 ('extraInfo(decayModeID)', 'mcParticleStatus')]
562 for specVar in channel.mvaConfig.spectators:
563 hist_variables_2d.append(('extraInfo(SignalProbability)', specVar))
564 hist_variables_2d.append(('extraInfo(decayModeID)', specVar))
565 hist_variables_2d.append((channel.mvaConfig.target, specVar))
566 filename = os.path.join(self.config.monitoring_path, 'Monitor_PostReconstruction_AfterMVA.root')
567 ma.variablesToHistogram(
568 channel.name,
569 variables=config.variables2binnings(hist_variables),
570 variables_2d=config.variables2binnings_2d(hist_variables_2d),
571 filename=filename,
572 ignoreCommandLineOverride=True,
573 directory=f'{channel.label}',
574 path=path)
575
576 cutstring = ''
577 if particle.postCutConfig.value > 0.0:
578 cutstring = f'{particle.postCutConfig.value} < extraInfo(SignalProbability)'
579
580 ma.mergeListsWithBestDuplicate(particle.identifier, [c.name for c in particle.channels],
581 variable='particleSource', writeOut=True, path=path)
582
583 if self.config.monitor:
584 if self.config.monitor == 'simple':
585 hist_variables = [particle.mvaConfig.target, 'extraInfo(decayModeID)']
586 hist_variables_2d = [(particle.mvaConfig.target, 'extraInfo(decayModeID)')]
587 else:
588 hist_variables = ['mcErrors',
589 'mcParticleStatus',
590 'extraInfo(SignalProbability)',
591 particle.mvaConfig.target,
592 'extraInfo(decayModeID)'] + list(particle.mvaConfig.spectators.keys())
593 hist_variables_2d = [('extraInfo(decayModeID)', particle.mvaConfig.target),
594 ('extraInfo(decayModeID)', 'mcErrors'),
595 ('extraInfo(decayModeID)', 'mcParticleStatus')]
596 for specVar in particle.mvaConfig.spectators:
597 hist_variables_2d.append(('extraInfo(SignalProbability)', specVar))
598 hist_variables_2d.append(('extraInfo(decayModeID)', specVar))
599 hist_variables_2d.append((particle.mvaConfig.target, specVar))
600 filename = os.path.join(self.config.monitoring_path, 'Monitor_PostReconstruction_BeforePostCut.root')
601 ma.variablesToHistogram(
602 particle.identifier,
603 variables=config.variables2binnings(hist_variables),
604 variables_2d=config.variables2binnings_2d(hist_variables_2d),
605 filename=filename,
606 ignoreCommandLineOverride=True,
607 directory=config.removeJPsiSlash(f'{particle.identifier}'),
608 path=path)
609
610 ma.applyCuts(particle.identifier, cutstring, path=path)
611
612 if self.config.monitor:
613 filename = os.path.join(self.config.monitoring_path, 'Monitor_PostReconstruction_BeforeRanking.root')
614 ma.variablesToHistogram(
615 particle.identifier,
616 variables=config.variables2binnings(hist_variables),
617 variables_2d=config.variables2binnings_2d(hist_variables_2d),
618 filename=filename,
619 ignoreCommandLineOverride=True,
620 directory=config.removeJPsiSlash(f'{particle.identifier}'),
621 path=path)
622
623 ma.rankByHighest(particle.identifier, 'extraInfo(SignalProbability)',
624 particle.postCutConfig.bestCandidateCut, 'postCut_rank', path=path)
625
626 uniqueSignal = basf2.register_module('TagUniqueSignal')
627 uniqueSignal.param('particleList', particle.identifier)
628 uniqueSignal.param('target', particle.mvaConfig.target)
629 uniqueSignal.param('extraInfoName', 'uniqueSignal')
630 uniqueSignal.set_name(f'TagUniqueSignal_{particle.identifier}')
631 # suppress warning that unique signal extra info won't be overwritten if it already exists
632 uniqueSignal.set_log_level(basf2.logging.log_level.ERROR)
633 path.add_module(uniqueSignal)
634
635 if self.config.monitor:
636 if self.config.monitor != 'simple':
637 hist_variables += ['extraInfo(postCut_rank)']
638 hist_variables_2d += [('extraInfo(decayModeID)', 'extraInfo(postCut_rank)'),
639 (particle.mvaConfig.target, 'extraInfo(postCut_rank)'),
640 ('mcErrors', 'extraInfo(postCut_rank)'),
641 ('mcParticleStatus', 'extraInfo(postCut_rank)')]
642 for specVar in particle.mvaConfig.spectators:
643 hist_variables_2d.append(('extraInfo(postCut_rank)', specVar))
644 filename = os.path.join(self.config.monitoring_path, 'Monitor_PostReconstruction_AfterRanking.root')
645 ma.variablesToHistogram(
646 particle.identifier,
647 variables=config.variables2binnings(hist_variables),
648 variables_2d=config.variables2binnings_2d(hist_variables_2d),
649 filename=filename,
650 ignoreCommandLineOverride=True,
651 directory=config.removeJPsiSlash(f'{particle.identifier}'),
652 path=path)
653
654 filename = os.path.join(self.config.monitoring_path, 'Monitor_Final.root')
655 if self.config.monitor == 'simple':
656 hist_variables = ['extraInfo(uniqueSignal)', 'extraInfo(decayModeID)']
657 hist_variables_2d = [('extraInfo(uniqueSignal)', 'extraInfo(decayModeID)')]
658 ma.variablesToHistogram(
659 particle.identifier,
660 variables=config.variables2binnings(hist_variables),
661 variables_2d=config.variables2binnings_2d(hist_variables_2d),
662 filename=filename,
663 ignoreCommandLineOverride=True,
664 directory=config.removeJPsiSlash(f'{particle.identifier}'),
665 path=path)
666 else:
667 variables = ['extraInfo(SignalProbability)', 'mcErrors', 'mcParticleStatus', particle.mvaConfig.target,
668 'extraInfo(uniqueSignal)', 'extraInfo(decayModeID)'] + list(particle.mvaConfig.spectators.keys())
669
670 ma.variablesToNtuple(
671 particle.identifier,
672 variables,
673 treename=ROOT.Belle2.MakeROOTCompatible.makeROOTCompatible(
674 config.removeJPsiSlash(f'{particle.identifier} variables')),
675 filename=filename,
676 ignoreCommandLineOverride=True,
677 path=path)
678 return path
679
680
682 """
683 Performs all necessary trainings for all training data files which are
684 available but where there is no weight file available yet.
685 This class is usually used by the do_trainings function below, to perform the necessary trainings after each stage.
686 The trainings are run in parallel using multi-threading of python.
687 Each training is done by a subprocess call, the training command (passed by config.externTeacher) can be either
688 * basf2_mva_teacher, the training will be done directly on the machine
689 * externClustTeacher, the training will be submitted to the batch system of KEKCC
690 """
691
693 MaximumNumberOfMVASamples = int(1e7)
694
696 MinimumNumberOfMVASamples = int(5e2)
697
698 def __init__(self, particles: typing.Sequence[config.Particle], config: config.FeiConfiguration):
699 """
700 Create a new Teacher object
701 @param particles list of config.Particle objects
702 @param config config.FeiConfiguration object
703 """
704
705 self.particles = particles
706
707 self.config = config
708
709 @staticmethod
710 def create_fake_weightfile(channel: str):
711 """
712 Create a fake weight file using the trivial method, it will always return 0.0
713 @param channel for which we create a fake weight file
714 """
715 content = f"""
716 <?xml version="1.0" encoding="utf-8"?>
717 <method>Trivial</method>
718 <weightfile>{channel}.xml</weightfile>
719 <treename>tree</treename>
720 <target_variable>isSignal</target_variable>
721 <weight_variable>__weight__</weight_variable>
722 <signal_class>1</signal_class>
723 <max_events>0</max_events>
724 <number_feature_variables>1</number_feature_variables>
725 <variable0>M</variable0>
726 <number_spectator_variables>0</number_spectator_variables>
727 <number_data_files>1</number_data_files>
728 <datafile0>train.root</datafile0>
729 <Trivial_version>1</Trivial_version>
730 <Trivial_output>0</Trivial_output>
731 <signal_fraction>0.066082567</signal_fraction>
732 """
733 with open(f'{channel}.xml', "w") as f:
734 f.write(content)
735
736 @staticmethod
737 def check_if_weightfile_is_fake(filename: str):
738 """
739 Checks if the provided filename is a fake-weight file or not
740 @param filename the filename of the weight file
741 """
742 try:
743 return '<method>Trivial</method>' in open(filename).readlines()[2]
744 except BaseException:
745 return True
746 return True
747
748 def upload(self, channel: str):
749 """
750 Upload the weight file into the condition database
751 @param channel whose weight file is uploaded
752 """
753 disk = f'{channel}.xml'
754 dbase = f'{self.config.prefix}_{channel}'
755 basf2_mva.upload(disk, dbase)
756 print(f"FEI-core: Uploading {dbase} to localdb")
757 return (disk, dbase)
758
760 """
761 Do all trainings for which we find training data
762 """
763 # Always avoid the top-level 'import ROOT'.
764 import ROOT # noqa
765 # FEI uses multi-threading for parallel execution of tasks therefore
766 # the ROOT gui-thread is disabled, which otherwise interferes sometimes
767 ROOT.PyConfig.StartGuiThread = False
768 job_list = []
769
770 all_stage_particles = get_stages_from_particles(self.particles)
771 if self.config.cache is None:
772 stagesToTrain = range(1, len(all_stage_particles)+1)
773 else:
774 stagesToTrain = [self.config.cache]
775
776 filename = 'training_input.root'
777 if os.path.isfile(filename):
778 f = ROOT.TFile.Open(filename, 'read')
779 if f.IsZombie():
780 B2WARNING(f'Training of MVC failed: {filename}. ROOT file corrupt. No weight files will be provided.')
781 elif len([k.GetName() for k in f.GetListOfKeys()]) == 0:
782 B2WARNING(
783 f'Training of MVC failed: {filename}. ROOT file has no trees. No weight files will be provided.')
784 else:
785 for istage in stagesToTrain:
786 for particle in all_stage_particles[istage-1]:
787 for channel in particle.channels:
788 weightfile = f'{channel.label}.xml'
789 if basf2_mva.available(weightfile):
790 B2INFO(f"FEI-core: Skipping {weightfile}, already available")
791 continue
792 else:
793 treeName = ROOT.Belle2.MakeROOTCompatible.makeROOTCompatible(f'{channel.label} variables')
794 keys = [m for m in f.GetListOfKeys() if treeName in m.GetName()]
795 if not keys:
796 B2WARNING("Training of MVC failed. "
797 f"Couldn't find tree for channel {channel}. Ignoring channel.")
798 continue
799 elif len(keys) > 1:
800 B2WARNING(f"Found more than one tree for channel {channel}. Taking first tree from: {keys}")
801 tree = keys[0].ReadObj()
802 total_entries = tree.GetEntries()
803 nSig = tree.GetEntries(f'{channel.mvaConfig.target}==1.0')
804 nBg = tree.GetEntries(f'{channel.mvaConfig.target}==0.0')
805 B2INFO(
806 f'FEI-core: Number of events for channel: {channel.label}, '
807 f'Total: {total_entries}, Signal: {nSig}, Background: {nBg}')
808 if nSig < Teacher.MinimumNumberOfMVASamples:
809 B2WARNING("Training of MVC failed. "
810 f"Tree contains too few signal events {nSig}. Ignoring channel {channel}.")
811 self.create_fake_weightfile(channel.label)
812 self.upload(channel.label)
813 continue
814 if nBg < Teacher.MinimumNumberOfMVASamples:
815 B2WARNING("Training of MVC failed. "
816 f"Tree contains too few bckgrd events {nBg}. Ignoring channel {channel}.")
817 self.create_fake_weightfile(channel.label)
818 self.upload(channel.label)
819 continue
820 variable_str = "' '".join(channel.mvaConfig.variables)
821
822 spectators = list(channel.mvaConfig.spectators.keys())
823 if channel.mvaConfig.sPlotVariable is not None:
824 spectators.append(channel.mvaConfig.sPlotVariable)
825 spectators_str = "' '".join(spectators)
826
827 treeName = ROOT.Belle2.MakeROOTCompatible.makeROOTCompatible(f'{channel.label} variables')
828 command = (f"{self.config.externTeacher}"
829 f" --method '{channel.mvaConfig.method}'"
830 f" --target_variable '{channel.mvaConfig.target}'"
831 f" --treename '{treeName}'"
832 f" --datafile 'training_input.root'"
833 f" --signal_class 1"
834 f" --variables '{variable_str}'"
835 f" --identifier '{weightfile}'")
836 if len(spectators) > 0:
837 command += f" --spectators '{spectators_str}'"
838 command += f" {channel.mvaConfig.config} > '{channel.label}'.log 2>&1"
839 B2INFO(f"Used following command to invoke teacher: \n {command}")
840 job_list.append((channel.label, command))
841 f.Close()
842
843 if len(job_list) > 0:
844 p = multiprocessing.Pool(None, maxtasksperchild=1)
845 func = functools.partial(subprocess.call, shell=True)
846 p.map(func, [c for _, c in job_list])
847 p.close()
848 p.join()
849 weightfiles = []
850 for name, _ in job_list:
851 if not basf2_mva.available(f'{name}.xml'):
852 B2WARNING("Training of MVC failed. For unknown reasons, check the logfile", f'{name}.log')
853 self.create_fake_weightfile(name)
854 weightfiles.append(self.upload(name))
855 return weightfiles
856
857
858def convert_legacy_training(particles: typing.Sequence[config.Particle], configuration: config.FeiConfiguration):
859 """
860 Convert an old FEI training into the new format.
861 The old format used hashes for the weight files, the hashes can be converted to the new naming scheme
862 using the Summary.pickle file outputted by the FEIv3. This file must be passes by the parameter configuration.legacy.
863 @param particles list of config.Particle objects
864 @param config config.FeiConfiguration object
865 """
866 summary = pickle.load(open(configuration.legacy, 'rb'))
867 channel2lists = {k: v[2] for k, v in summary['channel2lists'].items()}
868
869 teacher = Teacher(particles, configuration)
870
871 for particle in particles:
872 for channel in particle.channels:
873 new_weightfile = f'{configuration.prefix}_{channel.label}'
874 old_weightfile = f'{configuration.prefix}_{channel2lists[channel.label.replace("Jpsi", "J/psi")]}'
875 if not basf2_mva.available(new_weightfile):
876 if old_weightfile is None or not basf2_mva.available(old_weightfile):
877 Teacher.create_fake_weightfile(channel.label)
878 teacher.upload(channel.label)
879 else:
880 basf2_mva.download(old_weightfile, f'{channel.label}.xml')
881 teacher.upload(channel.label)
882
883
884def get_stages_from_particles(particles: typing.Sequence[typing.Union[config.Particle, str]]):
885 """
886 Returns the hierarchical structure of the FEI.
887 Each stage depends on the particles in the previous stage.
888 The final stage is empty (meaning everything is done, and the training is finished at this point).
889 @param particles list of config.Particle or string objects
890 """
891 def get_pname(p):
892 return p.split(":")[0] if isinstance(p, str) else p.name
893
894 def get_plabel(p):
895 return (p.split(":")[1] if isinstance(p, str) else p.label).lower()
896
897 stages = [
898 [p for p in particles if get_pname(p) in ['e+', 'K+', 'pi+', 'mu+', 'gamma', 'p+', 'K_L0']],
899 [p for p in particles if get_pname(p) in ['pi0', 'J/psi', 'Lambda0']],
900 [p for p in particles if get_pname(p) in ['K_S0', 'Sigma+']],
901 [p for p in particles if get_pname(p) in ['D+', 'D0', 'D_s+', 'Lambda_c+'] and 'tag' not in get_plabel(p)],
902 [p for p in particles if get_pname(p) in ['D*+', 'D*0', 'D_s*+'] and 'tag' not in get_plabel(p)],
903 [p for p in particles if get_pname(p) in ['B0', 'B+', 'B_s0'] or 'tag' in get_plabel(p)],
904 []
905 ]
906
907 for p in particles:
908 pname = get_pname(p)
909 if pname not in [pname for stage in stages for p in stage]:
910 raise RuntimeError(f"Unknown particle {pname}: Not implemented in FEI")
911
912 return stages
913
914
915def do_trainings(particles: typing.Sequence[config.Particle], configuration: config.FeiConfiguration):
916 """
917 Performs the training of mva classifiers for all available training data,
918 this function must be either called by the user after each stage of the FEI during training,
919 or (more likely) is called by the distributed.py script after merging the outputs of all jobs,
920 @param particles list of config.Particle objects
921 @param config config.FeiConfiguration object
922 @return list of tuple with weight file on disk and identifier in database for all trained classifiers
923 """
924 teacher = Teacher(particles, configuration)
925 return teacher.do_all_trainings()
926
927
928def save_summary(particles: typing.Sequence[config.Particle],
929 configuration: config.FeiConfiguration,
930 cache: int,
931 roundMode: int = None,
932 pickleName: str = 'Summary.pickle'):
933 """
934 Creates the Summary.pickle, which is used to keep track of the stage during the training,
935 and can be used later to investigate which configuration was used exactly to create the training.
936 @param particles list of config.Particle objects
937 @param config config.FeiConfiguration object
938 @param cache current cache level
939 @param roundMode mode of current round of training
940 @param pickleName name of the pickle file
941 """
942 if roundMode is None:
943 roundMode = configuration.roundMode
944 configuration = configuration._replace(cache=cache, roundMode=roundMode)
945 # Backup existing Summary.pickle files
946 for i in range(8, -1, -1):
947 if os.path.isfile(f'{pickleName}.backup_{i}'):
948 shutil.copyfile(f'{pickleName}.backup_{i}', f'{pickleName}.backup_{i+1}')
949 if os.path.isfile(pickleName):
950 shutil.copyfile(pickleName, f'{pickleName}.backup_0')
951 pickle.dump((particles, configuration), open(pickleName, 'wb'))
952
953
954def get_path(particles: typing.Sequence[config.Particle], configuration: config.FeiConfiguration) -> FeiState:
955 """
956 The most important function of the FEI.
957 This creates the FEI path for training/fitting (both terms are equal), and application/inference (both terms are equal).
958 The whole FEI is defined by the particles which are reconstructed (see default_channels.py)
959 and the configuration (see config.py).
960
961 TRAINING
962 For training this function is called multiple times, each time the FEI reconstructs one more stage in the hierarchical structure
963 i.e. we start with FSP, pi0, KS_0, D, D*, and with B mesons. You have to set configuration.training to True for training mode.
964 All weight files created during the training will be stored in your local database.
965 If you want to use the FEI training everywhere without copying this database by hand, you have to upload your local database
966 to the central database first (see documentation for the Belle2 Condition Database).
967
968 APPLICATION
969 For application you call this function once, and it returns the whole path which will reconstruct B mesons
970 with an associated signal probability. You have to set configuration.training to False for application mode.
971
972 MONITORING
973 You can always turn on the monitoring (configuration.monitor != False),
974 to write out ROOT Histograms of many quantities for each stage,
975 using these histograms you can use the printReporting.py or latexReporting.py scripts to automatically create pdf files.
976
977 LEGACY
978 This function can also use old FEI trainings (version 3), just pass the Summary.pickle file of the old training,
979 and the weight files will be automatically converted to the new naming scheme.
980
981 @param particles list of config.Particle objects
982 @param config config.FeiConfiguration object
983 """
984 print(r"""
985 ____ _ _ _ _ ____ _ _ ____ _ _ ___ _ _ _ ___ ____ ____ ___ ____ ____ ___ ____ ___ _ ____ _ _
986 |___ | | | | |___ | | |___ |\ | | | |\ | | |___ |__/ |__] |__/ |___ | |__| | | | | |\ |
987 | |__| |___ |___ |___ \/ |___ | \| | | | \| | |___ | \ | | \ |___ | | | | | |__| | \|
988
989 Author: Thomas Keck 2014 - 2017
990 Please cite my PhD thesis
991 """)
992
993 # The cache parameter of the configuration object is used during training to keep track,
994 # which reconstruction steps are already performed.
995 # For fitting/training we start by default with -1, meaning we still have to create the TrainingDataInformation,
996 # which is used to determine the number of candidates we have to write out for the FSP trainings in stage 0.
997 # For inference/application we start by default with 0, because we don't need the TrainingDataInformation in stage 0.
998 # RoundMode plays a similar role as cache,
999 # it is used to keep track in which phase within a stage the basf2 execution stops, relevant only for training.
1000 # During the training we save the particles and configuration (including the current cache stage) in the Summary.pickle object.
1001 if configuration.training and (configuration.monitor and (configuration.monitoring_path != '')):
1002 B2ERROR("FEI-core: Custom Monitoring path is not allowed during training!")
1003
1004 if configuration.cache is None:
1005 pickleName = 'Summary.pickle'
1006 if configuration.monitor:
1007 pickleName = os.path.join(configuration.monitoring_path, pickleName)
1008
1009 if os.path.isfile(pickleName):
1010 particles_bkp, config_bkp = pickle.load(open(pickleName, 'rb'))
1011 # check if configuration changed
1012 for fd in configuration._fields:
1013 if fd == 'cache' or fd == 'roundMode':
1014 continue
1015 if getattr(configuration, fd) != getattr(config_bkp, fd):
1016 B2WARNING(
1017 f"FEI-core: Configuration changed: {fd} from {getattr(config_bkp, fd)} to {getattr(configuration, fd)}")
1018
1019 configuration = config_bkp
1020 cache = configuration.cache
1021 print("Cache: Replaced particles from steering and configuration from Summary.pickle: ", cache, configuration.roundMode)
1022 else:
1023 if configuration.training:
1024 cache = -1
1025 else:
1026 cache = 0
1027 else:
1028 cache = configuration.cache
1029
1030 # Now we start building the training or application path
1031 path = basf2.create_path()
1032
1033 # There are in total 7 stages.
1034 # For training we start with -1 and go to 7 one stage at a time
1035 # For application we can run stage 0 to 7 at once
1036 stages = get_stages_from_particles(particles)
1037
1038 # If the user provided a Summary.pickle file of a FEIv3 training we
1039 # convert the old weight files (with hashes), to the new naming scheme.
1040 # Afterwards the algorithm runs as usual
1041 if configuration.legacy is not None:
1042 convert_legacy_training(particles, configuration)
1043
1044 # During the training we require the number of MC particles in the whole processed
1045 # data sample, because we don't want to write out billions of e.g. pion candidates.
1046 # Knowing the total amount of MC particles we can write out only every e.g. 10th candidate
1047 # That's why we have to write out the TrainingDataInformation before doing anything during the training phase.
1048 # During application we only need this if we run in monitor mode, and want to write out a summary in the end,
1049 # the summary contains efficiency, and the efficiency calculation requires the total number of MC particles.
1050 training_data_information = TrainingDataInformation(particles, outputPath=configuration.monitoring_path)
1051 if cache < 0 and configuration.training:
1052 print("Stage 0: Run over all files to count the number of events and McParticles")
1053 path.add_path(training_data_information.reconstruct())
1054 if configuration.training:
1055 save_summary(particles, configuration, 0)
1056 return FeiState(path, 0, [], [], [])
1057 elif not configuration.training and configuration.monitor:
1058 path.add_path(training_data_information.reconstruct())
1059
1060 # We load the Final State particles
1061 # It is assumed that the user takes care of adding RootInput, Geometry, and everything
1062 # which is required to read in data, so we directly start to load the FSP particles
1063 # used by the FEI.
1064 loader = FSPLoader(particles, configuration)
1065 if cache < 1:
1066 print("Stage 0: Load FSP particles")
1067 path.add_path(loader.reconstruct())
1068
1069 # Now we reconstruct each stage one after another.
1070 # Each stage consists of two parts:
1071 # PreReconstruction (before the mva method was applied):
1072 # - Particle combination
1073 # - Do vertex fitting
1074 # - Some simple cuts and best candidate selection
1075 # PostReconstruction (after the mva method was applied):
1076 # - Apply the mva method
1077 # - Apply cuts on the mva output and best candidate selection
1078 #
1079 # If the weight files for the PostReconstruction are not available for the current stage and we are in training mode,
1080 # we have to create the training data. The training itself is done by the do_trainings function which is called
1081 # either by the user after each step, or by the distributed.py script
1082 #
1083 # If the weight files for the PostReconstruction are not available for the current stage and we are not in training mode,
1084 # we keep going, as soon as the user will call process on the produced path he will get an error message that the
1085 # weight files are missing.
1086 #
1087 # Finally we keep track of the ParticleLists we use, so the user can run the RemoveParticles module to reduce the size of the
1088 # intermediate output of RootOutput.
1089 used_lists = []
1090 for stage, stage_particles in enumerate(stages):
1091 if len(stage_particles) == 0:
1092 print(f"Stage {stage}: No particles to reconstruct in this stage, skipping!")
1093 continue
1094
1095 pre_reconstruction = PreReconstruction(stage_particles, configuration)
1096 post_reconstruction = PostReconstruction(stage_particles, configuration)
1097
1098 if stage >= cache:
1099 print(f"Stage {stage}: PreReconstruct particles: ", [p.name for p in stage_particles])
1100 path.add_path(pre_reconstruction.reconstruct())
1101 if configuration.training and not (post_reconstruction.available() and configuration.roundMode == 0):
1102 print(f"Stage {stage}: Create training data for particles: ", [p.name for p in stage_particles])
1103 mc_counts = training_data_information.get_mc_counts()
1104 training_data = TrainingData(stage_particles, configuration, mc_counts)
1105 path.add_path(training_data.reconstruct())
1106 used_lists += [channel.name for particle in stage_particles for channel in particle.channels]
1107 break
1108
1109 used_lists += [particle.identifier for particle in stage_particles]
1110 if (stage >= cache - 1) and not ((configuration.roundMode == 1) and configuration.training):
1111 if (configuration.roundMode == 3) and configuration.training:
1112 print(f"Stage {stage}: BDTs already applied for particles, no postReco needed: ", [p.name for p in stage_particles])
1113 else:
1114 print(f"Stage {stage}: Apply BDT for particles: ", [p.name for p in stage_particles])
1115 if configuration.training and not post_reconstruction.available():
1116 raise RuntimeError("FEI-core: training of current stage was not successful, please retrain!")
1117 path.add_path(post_reconstruction.reconstruct())
1118 if (((configuration.roundMode == 2) or (configuration.roundMode == 3)) and configuration.training):
1119 break
1120 fsps_of_all_stages = [fsp for sublist in get_stages_from_particles(loader.get_fsp_lists()) for fsp in sublist]
1121
1122 excludelists = []
1123 if configuration.training and (configuration.roundMode == 3):
1124 dontRemove = used_lists + fsps_of_all_stages
1125 # cleanup higher stages
1126 cleanup = basf2.register_module('RemoveParticlesNotInLists')
1127 print("FEI-REtrain: pruning basf2_input.root of higher stages")
1128 cleanup.param('particleLists', dontRemove)
1129 path.add_module(cleanup)
1130
1131 # check which lists we have to exclude from the output
1132 import ROOT # noqa
1133 excludedParticlesNonConjugated = [p.identifier for p in particles if p.identifier not in dontRemove]
1134 excludedParticles = [
1135 str(name) for name in list(
1136 ROOT.Belle2.ParticleListName.addAntiParticleLists(excludedParticlesNonConjugated))]
1137 root_file = ROOT.TFile.Open('basf2_input.root', "READ")
1138 tree = root_file.Get('tree')
1139 for branch in tree.GetListOfBranches():
1140 branchName = branch.GetName()
1141 if any(exParticle in branchName for exParticle in excludedParticles):
1142 excludelists.append(branchName)
1143 print("Exclude lists from output: ", excludelists)
1144
1145 # If we run in monitor mode we are interested in the ModuleStatistics,
1146 # these statistics contain the runtime for each module which was run.
1147 if configuration.monitor:
1148 print("Add ModuleStatistics")
1149 output = basf2.register_module('RootOutput')
1150 output.param('outputFileName', os.path.join(configuration.monitoring_path, 'Monitor_ModuleStatistics.root'))
1151 output.param('branchNames', ['EventMetaData']) # cannot be removed, but of only small effect on file size
1152 output.param('branchNamesPersistent', ['ProcessStatistics'])
1153 output.param('ignoreCommandLineOverride', True)
1154 path.add_module(output)
1155
1156 # As mentioned above the FEI keeps track of the stages which are already reconstructed during the training
1157 # so we write out the Summary.pickle here, and increase the stage by one.
1158 if configuration.training or configuration.monitor:
1159 print("Save Summary.pickle")
1160 save_summary(particles, configuration, stage+1, pickleName=os.path.join(configuration.monitoring_path, 'Summary.pickle'))
1161
1162 # Finally we return the path, the stage and the used lists to the user.
1163 return FeiState(path, stage+1, plists=used_lists, fsplists=fsps_of_all_stages, excludelists=excludelists)
isB2BII()
Definition b2bii.py:14
pybasf2.Path reconstruct(self)
Definition core.py:175
__init__(self, typing.Sequence[config.Particle] particles, config.FeiConfiguration config)
Definition core.py:154
typing.List[str] get_fsp_lists(self)
Definition core.py:165
config
config.FeiConfiguration object
Definition core.py:163
particles
list of config.Particle objects
Definition core.py:161
pybasf2.Path reconstruct(self)
Definition core.py:525
__init__(self, typing.Sequence[config.Particle] particles, config.FeiConfiguration config)
Definition core.py:495
typing.Sequence[str] get_missing_channels(self)
Definition core.py:506
config
config.FeiConfiguration object
Definition core.py:504
particles
list of config.Particle objects
Definition core.py:502
pybasf2.Path reconstruct(self)
Definition core.py:318
__init__(self, typing.Sequence[config.Particle] particles, config.FeiConfiguration config)
Definition core.py:307
config
config.FeiConfiguration object
Definition core.py:316
particles
list of config.Particle objects
Definition core.py:314
__init__(self, typing.Sequence[config.Particle] particles, config.FeiConfiguration config)
Definition core.py:698
upload(self, str channel)
Definition core.py:748
create_fake_weightfile(str channel)
Definition core.py:710
do_all_trainings(self)
Definition core.py:759
config
config.FeiConfiguration object
Definition core.py:707
particles
list of config.Particle objects
Definition core.py:705
check_if_weightfile_is_fake(str filename)
Definition core.py:737
pybasf2.Path reconstruct(self)
Definition core.py:101
__init__(self, typing.Sequence[config.Particle] particles, str outputPath='')
Definition core.py:84
particles
list of config.Particle objects
Definition core.py:91
pybasf2.Path reconstruct(self)
Definition core.py:228
mc_counts
containing number of MC Particles
Definition core.py:226
config
config.FeiConfiguration object
Definition core.py:224
__init__(self, typing.Sequence[config.Particle] particles, config.FeiConfiguration config, typing.Mapping[int, typing.Mapping[str, float]] mc_counts)
Definition core.py:214
particles
list of config.Particle objects
Definition core.py:222
from_name(name)
Definition pdg.py:63