Belle II Software development
core.py
1#!/usr/bin/env python3
2
3
10
11"""
12 The Full Event Interpretation Algorithm
13
14 Some basic facts:
15 - The algorithm will automatically reconstruct B mesons and calculate a signal probability for each candidate.
16 - It can be used for hadronic and semileptonic tagging.
17 - The algorithm has to be trained on MC, and can afterwards be applied on data.
18 - The training requires O(100) million MC events
19 - The weight files are stored in the Belle II Condition database
20
21 Read this file if you want to understand the technical details of the FEI.
22
23 The FEI follows a hierarchical approach.
24 There are 7 stages:
25 (Stage -1: Write out information about the provided data sample)
26 Stage 0: Final State Particles (FSP)
27 Stage 1: pi0, J/Psi, Lambda0
28 Stage 2: K_S0, Sigma+
29 Stage 3: D and Lambda_c mesons
30 Stage 4: D* mesons
31 Stage 5: B mesons
32 Stage 6: Finish
33
34 Most stages consists of:
35 - Create Particle Candidates
36 - Apply Cuts
37 - Do vertex Fitting
38 - Apply a multivariate classification method
39 - Apply more Cuts
40
41 The FEI will reconstruct these 7 stages during the training phase,
42 since the stages depend on one another, you have to run basf2 multiple (7) times on the same data
43 to train all the necessary multivariate classifiers.
44"""
45
46# Import basf2
47import basf2
48from basf2 import B2INFO, B2WARNING, B2ERROR
49import pybasf2
50import modularAnalysis as ma
51import b2bii
52
53# Should come after basf2 import
54import pdg
55from fei import config
56import basf2_mva
57
58# Standard python modules
59import collections
60import os
61import shutil
62import typing
63import pickle
64import re
65import functools
66import subprocess
67import multiprocessing
68
69# Simple object containing the output of fei
70FeiState = collections.namedtuple('FeiState', 'path, stage, plists, fsplists, excludelists')
71
72
74 """
75 Contains the relevant information about the used training data.
76 Basically we write out the number of MC particles in the whole dataset.
77 This numbers we can use to calculate what fraction of candidates we have to write
78 out as TrainingData to get a reasonable amount of candidates to train on
79 (too few candidates will lead to heavy overtraining, too many won't fit into memory).
80 Secondly we can use this information for the generation of the monitoring pdfs,
81 where we calculate reconstruction efficiencies.
82 """
83
84 def __init__(self, particles: typing.Sequence[config.Particle], outputPath: str = ''):
85 """
86 Create a new TrainingData object
87 @param particles list of config.Particle objects
88 @param outputPath path to the output directory
89 """
90
91 self.particles = particles
92
93 self.filename = os.path.join(outputPath, 'mcParticlesCount.root')
94
95 def available(self) -> bool:
96 """
97 Check if the relevant information is already available
98 """
99 return os.path.isfile(self.filename)
100
101 def reconstruct(self) -> pybasf2.Path:
102 """
103 Returns pybasf2.Path which counts the number of MCParticles in each event.
104 @param particles list of config.Particle objects
105 """
106 # Unique absolute pdg-codes of all particles
107 pdgs = {abs(pdg.from_name(particle.name)) for particle in self.particles}
108
109 path = basf2.create_path()
110 module = basf2.register_module('VariablesToHistogram')
111 module.set_name("VariablesToHistogram_MCCount")
112 module.param('variables', [(f'NumberOfMCParticlesInEvent({pdg})', 100, -0.5, 99.5) for pdg in pdgs])
113 module.param('fileName', self.filename)
114 module.param('ignoreCommandLineOverride', True)
115 path.add_module(module)
116 return path
117
118 def get_mc_counts(self):
119 """
120 Read out the number of MC particles from the file created by reconstruct
121 """
122 # Unique absolute pdg-codes of all particles
123 # Always avoid the top-level 'import ROOT'.
124 import ROOT # noqa
125 root_file = ROOT.TFile.Open(self.filename, 'read')
126 mc_counts = {}
127
128 for key in root_file.GetListOfKeys():
129 variable = ROOT.Belle2.MakeROOTCompatible.invertMakeROOTCompatible(key.GetName())
130 pdg = abs(int(variable[len('NumberOfMCParticlesInEvent('):-len(")")]))
131 hist = key.ReadObj()
132 mc_counts[pdg] = {}
133 mc_counts[pdg]['sum'] = sum(hist.GetXaxis().GetBinCenter(bin + 1) * hist.GetBinContent(bin + 1)
134 for bin in range(hist.GetNbinsX()))
135 mc_counts[pdg]['std'] = hist.GetStdDev()
136 mc_counts[pdg]['avg'] = hist.GetMean()
137 mc_counts[pdg]['max'] = hist.GetXaxis().GetBinCenter(hist.FindLastBinAbove(0.0))
138 mc_counts[pdg]['min'] = hist.GetXaxis().GetBinCenter(hist.FindFirstBinAbove(0.0))
139
140 mc_counts[0] = {}
141 mc_counts[0]['sum'] = hist.GetEntries() # this is the total number of ALL events, does not matter which hist we take
142 root_file.Close()
143 return mc_counts
144
145
147 """
148 Steers the loading of FSP particles.
149 This does NOT include RootInput, Geometry or anything required before loading FSPs,
150 the user has to add this himself (because it depends on the MC campaign and if you want
151 to use Belle or Belle II).
152 """
153
154 def __init__(self, particles: typing.Sequence[config.Particle], config: config.FeiConfiguration):
155 """
156 Create a new FSPLoader object
157 @param particles list of config.Particle objects
158 @param config config.FeiConfiguration object
159 """
160
161 self.particles = particles
162
163 self.config = config
164
165 def get_fsp_lists(self) -> typing.List[str]:
166 """
167 Returns a list of FSP particle lists which are used in the FEI.
168 This is used to create the RootOutput module.
169 """
170 fsps = ['K+:FSP', 'pi+:FSP', 'e+:FSP', 'mu+:FSP', 'p+:FSP', 'gamma:FSP', 'K_S0:V0', 'Lambda0:V0', 'K_L0:FSP', 'gamma:V0']
171 if b2bii.isB2BII():
172 fsps += ['pi0:FSP']
173 return fsps
174
175 def reconstruct(self) -> pybasf2.Path:
176 """
177 Returns pybasf2.Path which loads the FSP Particles
178 """
179 path = basf2.create_path()
180
181 if b2bii.isB2BII():
182 ma.fillParticleLists([('K+:FSP', ''), ('pi+:FSP', ''), ('e+:FSP', ''),
183 ('mu+:FSP', ''), ('p+:FSP', '')], writeOut=True, path=path)
184 for outputList, inputList in [('gamma:FSP', 'gamma:mdst'), ('K_S0:V0', 'K_S0:mdst'),
185 ('Lambda0:V0', 'Lambda0:mdst'), ('K_L0:FSP', 'K_L0:mdst'),
186 ('pi0:FSP', 'pi0:mdst'), ('gamma:V0', 'gamma:v0mdst')]:
187 ma.copyParticles(outputList, inputList, writeOut=True, path=path)
188 else:
189 ma.fillParticleLists([('K+:FSP', ''), ('pi+:FSP', ''), ('e+:FSP', ''),
190 ('mu+:FSP', ''), ('gamma:FSP', ''),
191 ('p+:FSP', ''), ('K_L0:FSP', '')], writeOut=True, path=path)
192 ma.fillParticleList('K_S0:V0 -> pi+ pi-', '', writeOut=True, path=path)
193 ma.fillParticleList('Lambda0:V0 -> p+ pi-', '', writeOut=True, path=path)
194 ma.fillConvertedPhotonsList('gamma:V0 -> e+ e-', '', writeOut=True, path=path)
195
196 if self.config.monitor:
197 names = ['e+', 'K+', 'pi+', 'mu+', 'gamma', 'K_S0', 'p+', 'K_L0', 'Lambda0', 'pi0']
198 filename = os.path.join(self.config.monitoring_path, 'Monitor_FSPLoader.root')
199 pdgs = {abs(pdg.from_name(name)) for name in names}
200 variables = [(f'NumberOfMCParticlesInEvent({pdg})', 100, -0.5, 99.5) for pdg in pdgs]
201 ma.variablesToHistogram('', variables=variables, filename=filename, ignoreCommandLineOverride=True, path=path)
202 return path
203
204
206 """
207 Steers the creation of the training data.
208 The training data is used to train a multivariate classifier for each channel.
209 The training of the FEI at its core is just generating this training data for each channel.
210 After we created the training data for a stage, we have to train the classifiers (see Teacher class further down).
211 """
212
213 def __init__(self, particles: typing.Sequence[config.Particle], config: config.FeiConfiguration,
214 mc_counts: typing.Mapping[int, typing.Mapping[str, float]]):
215 """
216 Create a new TrainingData object
217 @param particles list of config.Particle objects
218 @param config config.FeiConfiguration object
219 @param mc_counts containing number of MC Particles
220 """
221
222 self.particles = particles
223
224 self.config = config
225
226 self.mc_counts = mc_counts
227
228 def reconstruct(self) -> pybasf2.Path:
229 """
230 Returns pybasf2.Path which creates the training data for the given particles
231 """
232 import ROOT # noqa
233 path = basf2.create_path()
234
235 for particle in self.particles:
236 pdgcode = abs(pdg.from_name(particle.name))
237 nSignal = self.mc_counts[pdgcode]['sum']
238 print(f"FEI-core: TrainingData: nSignal for {particle.name}: {nSignal}")
239
240 # For D-Mesons we usually have a efficiency of 10^-3 including branching fraction
241 if pdgcode > 400:
242 nSignal /= 1000
243 # For B-Mesons we usually have a efficiency of 10^-4 including branching fraction
244 if pdgcode > 500:
245 nSignal /= 10000
246
247 for channel in particle.channels:
248 weightfile = f'{channel.label}.xml'
249 if basf2_mva.available(weightfile):
250 B2INFO(f"FEI-core: Skipping preparing Training Data for {weightfile}, already available")
251 continue
252 filename = 'training_input.root'
253
254 # nBackground = nEvents * nBestCandidates
255 nBackground = self.mc_counts[0]['sum'] * channel.preCutConfig.bestCandidateCut
256 inverseSamplingRates = {}
257 # For some very pure channels (Jpsi), this sampling can be too aggressive and training fails.
258 # It can therefore be disabled in the preCutConfig.
259 if nBackground > Teacher.MaximumNumberOfMVASamples and not channel.preCutConfig.noBackgroundSampling:
260 inverseSamplingRates[0] = max(
261 1, int((int(nBackground / Teacher.MaximumNumberOfMVASamples) + 1) * channel.preCutConfig.bkgSamplingFactor))
262 elif channel.preCutConfig.bkgSamplingFactor > 1:
263 inverseSamplingRates[0] = int(channel.preCutConfig.bkgSamplingFactor)
264
265 if nSignal > Teacher.MaximumNumberOfMVASamples and not channel.preCutConfig.noSignalSampling:
266 inverseSamplingRates[1] = int(nSignal / Teacher.MaximumNumberOfMVASamples) + 1
267
268 spectators = [channel.mvaConfig.target] + list(channel.mvaConfig.spectators.keys())
269 if channel.mvaConfig.sPlotVariable is not None:
270 spectators.append(channel.mvaConfig.sPlotVariable)
271
272 if self.config.monitor:
273 hist_variables = ['mcErrors', 'mcParticleStatus'] + channel.mvaConfig.variables + spectators
274 hist_variables_2d = [(x, channel.mvaConfig.target)
275 for x in channel.mvaConfig.variables + spectators if x is not channel.mvaConfig.target]
276 hist_filename = os.path.join(self.config.monitoring_path, 'Monitor_TrainingData.root')
277 ma.variablesToHistogram(channel.name, variables=config.variables2binnings(hist_variables),
278 variables_2d=config.variables2binnings_2d(hist_variables_2d),
279 filename=hist_filename,
280 ignoreCommandLineOverride=True,
281 directory=config.removeJPsiSlash(f'{channel.label}'), path=path)
282
283 teacher = basf2.register_module('VariablesToNtuple')
284 teacher.set_name(f'VariablesToNtuple_{channel.name}')
285 teacher.param('fileName', filename)
286 teacher.param('treeName', ROOT.Belle2.MakeROOTCompatible.makeROOTCompatible(f'{channel.label} variables'))
287 teacher.param('variables', channel.mvaConfig.variables + spectators)
288 teacher.param('particleList', channel.name)
289 teacher.param('sampling', (channel.mvaConfig.target, inverseSamplingRates))
290 teacher.param('ignoreCommandLineOverride', True)
291 path.add_module(teacher)
292 return path
293
294
296 """
297 Steers the reconstruction phase before the mva method was applied
298 It Includes:
299 - The ParticleCombination (for each particle and channel we create candidates using
300 the daughter candidates from the previous stages)
301 - MC Matching
302 - Vertex Fitting (this is the slowest part of the whole FEI, KFit is used by default,
303 but you can use fastFit as a drop-in replacement https://github.com/thomaskeck/FastFit/,
304 this will speed up the whole FEI by a factor 2-3)
305 """
306
307 def __init__(self, particles: typing.Sequence[config.Particle], config: config.FeiConfiguration):
308 """
309 Create a new PreReconstruction object
310 @param particles list of config.Particle objects
311 @param config config.FeiConfiguration object
312 """
313
314 self.particles = particles
315
316 self.config = config
317
318 def reconstruct(self) -> pybasf2.Path:
319 """
320 Returns pybasf2.Path which reconstructs the particles and does the vertex fitting if necessary
321 """
322 path = basf2.create_path()
323
324 for particle in self.particles:
325 for channel in particle.channels:
326
327 if (len(channel.daughters) == 1) and (pdg.from_name(
328 channel.daughters[0].split(':')[0]) == pdg.from_name(particle.name)):
329 ma.cutAndCopyList(channel.name, channel.daughters[0], channel.preCutConfig.userCut, writeOut=True, path=path)
330 v2EI = basf2.register_module('VariablesToExtraInfo')
331 v2EI.set_name(f'VariablesToExtraInfo_{channel.name}')
332 v2EI.param('particleList', channel.name)
333 v2EI.param('variables', {f'constant({channel.decayModeID})': 'decayModeID'})
334 # suppress warning that decay mode ID won't be overwritten if it already exists
335 v2EI.set_log_level(basf2.logging.log_level.ERROR)
336 path.add_module(v2EI)
337 else:
338 ma.reconstructDecay(channel.decayString, channel.preCutConfig.userCut, channel.decayModeID,
339 writeOut=True, path=path)
340 if self.config.monitor:
341 ma.matchMCTruth(channel.name, path=path)
342 bc_variable = channel.preCutConfig.bestCandidateVariable
343 if self.config.monitor == 'simple':
344 hist_variables = [channel.mvaConfig.target, 'extraInfo(decayModeID)']
345 hist_variables_2d = [(channel.mvaConfig.target, 'extraInfo(decayModeID)')]
346 else:
347 hist_variables = [bc_variable, 'mcErrors', 'mcParticleStatus',
348 channel.mvaConfig.target] + list(channel.mvaConfig.spectators.keys())
349 hist_variables_2d = [(bc_variable, channel.mvaConfig.target),
350 (bc_variable, 'mcErrors'),
351 (bc_variable, 'mcParticleStatus')]
352 for specVar in channel.mvaConfig.spectators:
353 hist_variables_2d.append((bc_variable, specVar))
354 hist_variables_2d.append((channel.mvaConfig.target, specVar))
355 filename = os.path.join(self.config.monitoring_path, 'Monitor_PreReconstruction_BeforeRanking.root')
356 ma.variablesToHistogram(
357 channel.name,
358 variables=config.variables2binnings(hist_variables),
359 variables_2d=config.variables2binnings_2d(hist_variables_2d),
360 filename=filename,
361 ignoreCommandLineOverride=True,
362 directory=f'{channel.label}',
363 path=path)
364
365 if channel.preCutConfig.bestCandidateMode == 'lowest':
366 ma.rankByLowest(channel.name,
367 channel.preCutConfig.bestCandidateVariable,
368 channel.preCutConfig.bestCandidateCut,
369 'preCut_rank',
370 path=path)
371 elif channel.preCutConfig.bestCandidateMode == 'highest':
372 ma.rankByHighest(channel.name,
373 channel.preCutConfig.bestCandidateVariable,
374 channel.preCutConfig.bestCandidateCut,
375 'preCut_rank',
376 path=path)
377 else:
378 raise RuntimeError(f'Unknown bestCandidateMode {repr(channel.preCutConfig.bestCandidateMode)}')
379
380 if 'gamma' in channel.decayString and channel.pi0veto:
381 ma.buildRestOfEvent(channel.name, path=path)
382 Ddaughter_roe_path = basf2.Path()
383 deadEndPath = basf2.Path()
384 ma.signalSideParticleFilter(channel.name, '', Ddaughter_roe_path, deadEndPath)
385 ma.fillParticleList('gamma:roe', 'isInRestOfEvent == 1', path=Ddaughter_roe_path)
386
387 matches = list(re.finditer('gamma', channel.decayString))
388 pi0lists = []
389 for igamma in range(len(matches)):
390 start, end = matches[igamma-1].span()
391 tempString = f'{channel.decayString[:start]}^gamma{channel.decayString[end:]}'
392 ma.fillSignalSideParticleList(f'gamma:sig_{igamma}', tempString, path=Ddaughter_roe_path)
393 ma.reconstructDecay(f'pi0:veto_{igamma} -> gamma:sig_{igamma} gamma:roe', '', path=Ddaughter_roe_path)
394 pi0lists.append(f'pi0:veto_{igamma}')
395 ma.copyLists('pi0:veto', pi0lists, writeOut=False, path=Ddaughter_roe_path)
396 ma.rankByLowest('pi0:veto', 'abs(dM)', 1, path=Ddaughter_roe_path)
397 ma.matchMCTruth('pi0:veto', path=Ddaughter_roe_path)
398 ma.variableToSignalSideExtraInfo(
399 'pi0:veto',
400 {
401 'InvM': 'pi0vetoMass',
402 'formula((daughter(0,E)-daughter(1,E))/(daughter(0,E)+daughter(1,E)))': 'pi0vetoEnergyAsymmetry',
403 },
404 path=Ddaughter_roe_path
405 )
406 path.for_each('RestOfEvent', 'RestOfEvents', Ddaughter_roe_path)
407
408 if self.config.monitor:
409 filename = os.path.join(self.config.monitoring_path, 'Monitor_PreReconstruction_AfterRanking.root')
410 if self.config.monitor != 'simple':
411 hist_variables += ['extraInfo(preCut_rank)']
412 hist_variables_2d += [('extraInfo(preCut_rank)', channel.mvaConfig.target),
413 ('extraInfo(preCut_rank)', 'mcErrors'),
414 ('extraInfo(preCut_rank)', 'mcParticleStatus')]
415 for specVar in channel.mvaConfig.spectators:
416 hist_variables_2d.append(('extraInfo(preCut_rank)', specVar))
417 ma.variablesToHistogram(
418 channel.name,
419 variables=config.variables2binnings(hist_variables),
420 variables_2d=config.variables2binnings_2d(hist_variables_2d),
421 filename=filename,
422 ignoreCommandLineOverride=True,
423 directory=f'{channel.label}',
424 path=path)
425 # If we are not in monitor mode we do the mc matching now,
426 # otherwise we did it above already!
427 elif self.config.training:
428 ma.matchMCTruth(channel.name, path=path)
429
430 if b2bii.isB2BII() and particle.name in ['K_S0', 'Lambda0']:
431 pvfit = basf2.register_module('ParticleVertexFitter')
432 pvfit.set_name(f'ParticleVertexFitter_{channel.name}')
433 pvfit.param('listName', channel.name)
434 pvfit.param('confidenceLevel', channel.preCutConfig.vertexCut)
435 pvfit.param('vertexFitter', 'KFit')
436 pvfit.param('fitType', 'vertex')
437 pvfit.set_log_level(basf2.logging.log_level.ERROR) # let's not produce gigabytes of uninteresting warnings
438 path.add_module(pvfit)
439 elif re.findall(r"[\w']+", channel.decayString).count('pi0') > 1 and particle.name != 'pi0':
440 basf2.B2INFO(f"Ignoring vertex fit for {channel.name} because multiple pi0 are not supported yet.")
441 elif len(channel.daughters) > 1:
442 pvfit = basf2.register_module('ParticleVertexFitter')
443 pvfit.set_name(f'ParticleVertexFitter_{channel.name}')
444 pvfit.param('listName', channel.name)
445 pvfit.param('confidenceLevel', channel.preCutConfig.vertexCut)
446 pvfit.param('vertexFitter', 'KFit')
447 if particle.name in ['pi0']:
448 pvfit.param('fitType', 'mass')
449 else:
450 pvfit.param('fitType', 'vertex')
451 pvfit.set_log_level(basf2.logging.log_level.ERROR) # let's not produce gigabytes of uninteresting warnings
452 path.add_module(pvfit)
453
454 if self.config.monitor:
455 if self.config.monitor == 'simple':
456 hist_variables = [channel.mvaConfig.target, 'extraInfo(decayModeID)']
457 hist_variables_2d = [(channel.mvaConfig.target, 'extraInfo(decayModeID)')]
458 else:
459 hist_variables = ['chiProb', 'mcErrors', 'mcParticleStatus',
460 channel.mvaConfig.target] + list(channel.mvaConfig.spectators.keys())
461 hist_variables_2d = [('chiProb', channel.mvaConfig.target),
462 ('chiProb', 'mcErrors'),
463 ('chiProb', 'mcParticleStatus')]
464 for specVar in channel.mvaConfig.spectators:
465 hist_variables_2d.append(('chiProb', specVar))
466 hist_variables_2d.append((channel.mvaConfig.target, specVar))
467 filename = os.path.join(self.config.monitoring_path, 'Monitor_PreReconstruction_AfterVertex.root')
468 ma.variablesToHistogram(
469 channel.name,
470 variables=config.variables2binnings(hist_variables),
471 variables_2d=config.variables2binnings_2d(hist_variables_2d),
472 filename=filename,
473 ignoreCommandLineOverride=True,
474 directory=f'{channel.label}',
475 path=path)
476
477 return path
478
479
481 """
482 Steers the reconstruction phase after the mva method was applied
483 It Includes:
484 - The application of the mva method itself.
485 - Copying all channel lists in a common one for each particle defined in particles
486 - Tag unique signal candidates, to avoid double counting of channels with overlap
487 """
488
489 def __init__(self, particles: typing.Sequence[config.Particle], config: config.FeiConfiguration):
490 """
491 Create a new PostReconstruction object
492 @param particles list of config.Particle objects
493 @param config config.FeiConfiguration object
494 """
495
496 self.particles = particles
497
498 self.config = config
499
500 def get_missing_channels(self) -> typing.Sequence[str]:
501 """
502 Returns all channels for which the weightfile is missing
503 """
504 missing = []
505 for particle in self.particles:
506 for channel in particle.channels:
507 # weightfile = self.config.prefix + '_' + channel.label
508 weightfile = f'{channel.label}.xml'
509 if not basf2_mva.available(weightfile):
510 missing += [channel.label]
511 return missing
512
513 def available(self) -> bool:
514 """
515 Check if the relevant information is already available
516 """
517 return len(self.get_missing_channels()) == 0
518
519 def reconstruct(self) -> pybasf2.Path:
520 """
521 Returns pybasf2.Path which reconstructs the particles and does the vertex fitting if necessary
522 """
523 import ROOT # noqa
524 path = basf2.create_path()
525
526 for particle in self.particles:
527 for channel in particle.channels:
528 expert = basf2.register_module('MVAExpert')
529 expert.set_name(f'MVAExpert_{channel.name}')
530 if self.config.training:
531 expert.param('identifier', f'{channel.label}.xml')
532 else:
533 expert.param('identifier', f'{self.config.prefix}_{channel.label}')
534 expert.param('extraInfoName', 'SignalProbability')
535 expert.param('listNames', [channel.name])
536 # suppress warning that signal probability won't be overwritten if it already exists
537 expert.set_log_level(basf2.logging.log_level.ERROR)
538 path.add_module(expert)
539
540 if self.config.monitor:
541 if self.config.monitor == 'simple':
542 hist_variables = [channel.mvaConfig.target, 'extraInfo(decayModeID)']
543 hist_variables_2d = [(channel.mvaConfig.target, 'extraInfo(decayModeID)')]
544 else:
545 hist_variables = ['mcErrors',
546 'mcParticleStatus',
547 'extraInfo(SignalProbability)',
548 channel.mvaConfig.target,
549 'extraInfo(decayModeID)'] + list(channel.mvaConfig.spectators.keys())
550 hist_variables_2d = [('extraInfo(SignalProbability)', channel.mvaConfig.target),
551 ('extraInfo(SignalProbability)', 'mcErrors'),
552 ('extraInfo(SignalProbability)', 'mcParticleStatus'),
553 ('extraInfo(decayModeID)', channel.mvaConfig.target),
554 ('extraInfo(decayModeID)', 'mcErrors'),
555 ('extraInfo(decayModeID)', 'mcParticleStatus')]
556 for specVar in channel.mvaConfig.spectators:
557 hist_variables_2d.append(('extraInfo(SignalProbability)', specVar))
558 hist_variables_2d.append(('extraInfo(decayModeID)', specVar))
559 hist_variables_2d.append((channel.mvaConfig.target, specVar))
560 filename = os.path.join(self.config.monitoring_path, 'Monitor_PostReconstruction_AfterMVA.root')
561 ma.variablesToHistogram(
562 channel.name,
563 variables=config.variables2binnings(hist_variables),
564 variables_2d=config.variables2binnings_2d(hist_variables_2d),
565 filename=filename,
566 ignoreCommandLineOverride=True,
567 directory=f'{channel.label}',
568 path=path)
569
570 cutstring = ''
571 if particle.postCutConfig.value > 0.0:
572 cutstring = f'{particle.postCutConfig.value} < extraInfo(SignalProbability)'
573
574 ma.mergeListsWithBestDuplicate(particle.identifier, [c.name for c in particle.channels],
575 variable='particleSource', writeOut=True, path=path)
576
577 if self.config.monitor:
578 if self.config.monitor == 'simple':
579 hist_variables = [particle.mvaConfig.target, 'extraInfo(decayModeID)']
580 hist_variables_2d = [(particle.mvaConfig.target, 'extraInfo(decayModeID)')]
581 else:
582 hist_variables = ['mcErrors',
583 'mcParticleStatus',
584 'extraInfo(SignalProbability)',
585 particle.mvaConfig.target,
586 'extraInfo(decayModeID)'] + list(particle.mvaConfig.spectators.keys())
587 hist_variables_2d = [('extraInfo(decayModeID)', particle.mvaConfig.target),
588 ('extraInfo(decayModeID)', 'mcErrors'),
589 ('extraInfo(decayModeID)', 'mcParticleStatus')]
590 for specVar in particle.mvaConfig.spectators:
591 hist_variables_2d.append(('extraInfo(SignalProbability)', specVar))
592 hist_variables_2d.append(('extraInfo(decayModeID)', specVar))
593 hist_variables_2d.append((particle.mvaConfig.target, specVar))
594 filename = os.path.join(self.config.monitoring_path, 'Monitor_PostReconstruction_BeforePostCut.root')
595 ma.variablesToHistogram(
596 particle.identifier,
597 variables=config.variables2binnings(hist_variables),
598 variables_2d=config.variables2binnings_2d(hist_variables_2d),
599 filename=filename,
600 ignoreCommandLineOverride=True,
601 directory=config.removeJPsiSlash(f'{particle.identifier}'),
602 path=path)
603
604 ma.applyCuts(particle.identifier, cutstring, path=path)
605
606 if self.config.monitor:
607 filename = os.path.join(self.config.monitoring_path, 'Monitor_PostReconstruction_BeforeRanking.root')
608 ma.variablesToHistogram(
609 particle.identifier,
610 variables=config.variables2binnings(hist_variables),
611 variables_2d=config.variables2binnings_2d(hist_variables_2d),
612 filename=filename,
613 ignoreCommandLineOverride=True,
614 directory=config.removeJPsiSlash(f'{particle.identifier}'),
615 path=path)
616
617 ma.rankByHighest(particle.identifier, 'extraInfo(SignalProbability)',
618 particle.postCutConfig.bestCandidateCut, 'postCut_rank', path=path)
619
620 uniqueSignal = basf2.register_module('TagUniqueSignal')
621 uniqueSignal.param('particleList', particle.identifier)
622 uniqueSignal.param('target', particle.mvaConfig.target)
623 uniqueSignal.param('extraInfoName', 'uniqueSignal')
624 uniqueSignal.set_name(f'TagUniqueSignal_{particle.identifier}')
625 # suppress warning that unique signal extra info won't be overwritten if it already exists
626 uniqueSignal.set_log_level(basf2.logging.log_level.ERROR)
627 path.add_module(uniqueSignal)
628
629 if self.config.monitor:
630 if self.config.monitor != 'simple':
631 hist_variables += ['extraInfo(postCut_rank)']
632 hist_variables_2d += [('extraInfo(decayModeID)', 'extraInfo(postCut_rank)'),
633 (particle.mvaConfig.target, 'extraInfo(postCut_rank)'),
634 ('mcErrors', 'extraInfo(postCut_rank)'),
635 ('mcParticleStatus', 'extraInfo(postCut_rank)')]
636 for specVar in particle.mvaConfig.spectators:
637 hist_variables_2d.append(('extraInfo(postCut_rank)', specVar))
638 filename = os.path.join(self.config.monitoring_path, 'Monitor_PostReconstruction_AfterRanking.root')
639 ma.variablesToHistogram(
640 particle.identifier,
641 variables=config.variables2binnings(hist_variables),
642 variables_2d=config.variables2binnings_2d(hist_variables_2d),
643 filename=filename,
644 ignoreCommandLineOverride=True,
645 directory=config.removeJPsiSlash(f'{particle.identifier}'),
646 path=path)
647
648 filename = os.path.join(self.config.monitoring_path, 'Monitor_Final.root')
649 if self.config.monitor == 'simple':
650 hist_variables = ['extraInfo(uniqueSignal)', 'extraInfo(decayModeID)']
651 hist_variables_2d = [('extraInfo(uniqueSignal)', 'extraInfo(decayModeID)')]
652 ma.variablesToHistogram(
653 particle.identifier,
654 variables=config.variables2binnings(hist_variables),
655 variables_2d=config.variables2binnings_2d(hist_variables_2d),
656 filename=filename,
657 ignoreCommandLineOverride=True,
658 directory=config.removeJPsiSlash(f'{particle.identifier}'),
659 path=path)
660 else:
661 variables = ['extraInfo(SignalProbability)', 'mcErrors', 'mcParticleStatus', particle.mvaConfig.target,
662 'extraInfo(uniqueSignal)', 'extraInfo(decayModeID)'] + list(particle.mvaConfig.spectators.keys())
663
664 ma.variablesToNtuple(
665 particle.identifier,
666 variables,
667 treename=ROOT.Belle2.MakeROOTCompatible.makeROOTCompatible(
668 config.removeJPsiSlash(f'{particle.identifier} variables')),
669 filename=filename,
670 ignoreCommandLineOverride=True,
671 path=path)
672 return path
673
674
676 """
677 Performs all necessary trainings for all training data files which are
678 available but where there is no weight file available yet.
679 This class is usually used by the do_trainings function below, to perform the necessary trainings after each stage.
680 The trainings are run in parallel using multi-threading of python.
681 Each training is done by a subprocess call, the training command (passed by config.externTeacher) can be either
682 * basf2_mva_teacher, the training will be done directly on the machine
683 * externClustTeacher, the training will be submitted to the batch system of KEKCC
684 """
685
687 MaximumNumberOfMVASamples = int(1e7)
688
690 MinimumNumberOfMVASamples = int(5e2)
691
692 def __init__(self, particles: typing.Sequence[config.Particle], config: config.FeiConfiguration):
693 """
694 Create a new Teacher object
695 @param particles list of config.Particle objects
696 @param config config.FeiConfiguration object
697 """
698
699 self.particles = particles
700
701 self.config = config
702
703 @staticmethod
704 def create_fake_weightfile(channel: str):
705 """
706 Create a fake weight file using the trivial method, it will always return 0.0
707 @param channel for which we create a fake weight file
708 """
709 content = f"""
710 <?xml version="1.0" encoding="utf-8"?>
711 <method>Trivial</method>
712 <weightfile>{channel}.xml</weightfile>
713 <treename>tree</treename>
714 <target_variable>isSignal</target_variable>
715 <weight_variable>__weight__</weight_variable>
716 <signal_class>1</signal_class>
717 <max_events>0</max_events>
718 <number_feature_variables>1</number_feature_variables>
719 <variable0>M</variable0>
720 <number_spectator_variables>0</number_spectator_variables>
721 <number_data_files>1</number_data_files>
722 <datafile0>train.root</datafile0>
723 <Trivial_version>1</Trivial_version>
724 <Trivial_output>0</Trivial_output>
725 <signal_fraction>0.066082567</signal_fraction>
726 """
727 with open(f'{channel}.xml', "w") as f:
728 f.write(content)
729
730 @staticmethod
731 def check_if_weightfile_is_fake(filename: str):
732 """
733 Checks if the provided filename is a fake-weight file or not
734 @param filename the filename of the weight file
735 """
736 try:
737 return '<method>Trivial</method>' in open(filename).readlines()[2]
738 except BaseException:
739 return True
740 return True
741
742 def upload(self, channel: str):
743 """
744 Upload the weight file into the condition database
745 @param channel whose weight file is uploaded
746 """
747 disk = f'{channel}.xml'
748 dbase = f'{self.config.prefix}_{channel}'
749 basf2_mva.upload(disk, dbase)
750 print(f"FEI-core: Uploading {dbase} to localdb")
751 return (disk, dbase)
752
754 """
755 Do all trainings for which we find training data
756 """
757 # Always avoid the top-level 'import ROOT'.
758 import ROOT # noqa
759 # FEI uses multi-threading for parallel execution of tasks therefore
760 # the ROOT gui-thread is disabled, which otherwise interferes sometimes
761 ROOT.PyConfig.StartGuiThread = False
762 job_list = []
763
764 all_stage_particles = get_stages_from_particles(self.particles)
765 if self.config.cache is None:
766 stagesToTrain = range(1, len(all_stage_particles)+1)
767 else:
768 stagesToTrain = [self.config.cache]
769
770 filename = 'training_input.root'
771 if os.path.isfile(filename):
772 f = ROOT.TFile.Open(filename, 'read')
773 if f.IsZombie():
774 B2WARNING(f'Training of MVC failed: {filename}. ROOT file corrupt. No weight files will be provided.')
775 elif len([k.GetName() for k in f.GetListOfKeys()]) == 0:
776 B2WARNING(
777 f'Training of MVC failed: {filename}. ROOT file has no trees. No weight files will be provided.')
778 else:
779 for istage in stagesToTrain:
780 for particle in all_stage_particles[istage-1]:
781 for channel in particle.channels:
782 weightfile = f'{channel.label}.xml'
783 if basf2_mva.available(weightfile):
784 B2INFO(f"FEI-core: Skipping {weightfile}, already available")
785 continue
786 else:
787 treeName = ROOT.Belle2.MakeROOTCompatible.makeROOTCompatible(f'{channel.label} variables')
788 keys = [m for m in f.GetListOfKeys() if treeName in m.GetName()]
789 if not keys:
790 B2WARNING("Training of MVC failed. "
791 f"Couldn't find tree for channel {channel}. Ignoring channel.")
792 continue
793 elif len(keys) > 1:
794 B2WARNING(f"Found more than one tree for channel {channel}. Taking first tree from: {keys}")
795 tree = keys[0].ReadObj()
796 total_entries = tree.GetEntries()
797 nSig = tree.GetEntries(f'{channel.mvaConfig.target}==1.0')
798 nBg = tree.GetEntries(f'{channel.mvaConfig.target}==0.0')
799 B2INFO(
800 f'FEI-core: Number of events for channel: {channel.label}, '
801 f'Total: {total_entries}, Signal: {nSig}, Background: {nBg}')
802 if nSig < Teacher.MinimumNumberOfMVASamples:
803 B2WARNING("Training of MVC failed. "
804 f"Tree contains too few signal events {nSig}. Ignoring channel {channel}.")
805 self.create_fake_weightfile(channel.label)
806 self.upload(channel.label)
807 continue
808 if nBg < Teacher.MinimumNumberOfMVASamples:
809 B2WARNING("Training of MVC failed. "
810 f"Tree contains too few bckgrd events {nBg}. Ignoring channel {channel}.")
811 self.create_fake_weightfile(channel.label)
812 self.upload(channel.label)
813 continue
814 variable_str = "' '".join(channel.mvaConfig.variables)
815
816 spectators = list(channel.mvaConfig.spectators.keys())
817 if channel.mvaConfig.sPlotVariable is not None:
818 spectators.append(channel.mvaConfig.sPlotVariable)
819 spectators_str = "' '".join(spectators)
820
821 treeName = ROOT.Belle2.MakeROOTCompatible.makeROOTCompatible(f'{channel.label} variables')
822 command = (f"{self.config.externTeacher}"
823 f" --method '{channel.mvaConfig.method}'"
824 f" --target_variable '{channel.mvaConfig.target}'"
825 f" --treename '{treeName}'"
826 f" --datafile 'training_input.root'"
827 f" --signal_class 1"
828 f" --variables '{variable_str}'"
829 f" --identifier '{weightfile}'")
830 if len(spectators) > 0:
831 command += f" --spectators '{spectators_str}'"
832 command += f" {channel.mvaConfig.config} > '{channel.label}'.log 2>&1"
833 B2INFO(f"Used following command to invoke teacher: \n {command}")
834 job_list.append((channel.label, command))
835 f.Close()
836
837 if len(job_list) > 0:
838 p = multiprocessing.Pool(None, maxtasksperchild=1)
839 func = functools.partial(subprocess.call, shell=True)
840 p.map(func, [c for _, c in job_list])
841 p.close()
842 p.join()
843 weightfiles = []
844 for name, _ in job_list:
845 if not basf2_mva.available(f'{name}.xml'):
846 B2WARNING("Training of MVC failed. For unknown reasons, check the logfile", f'{name}.log')
847 self.create_fake_weightfile(name)
848 weightfiles.append(self.upload(name))
849 return weightfiles
850
851
852def convert_legacy_training(particles: typing.Sequence[config.Particle], configuration: config.FeiConfiguration):
853 """
854 Convert an old FEI training into the new format.
855 The old format used hashes for the weight files, the hashes can be converted to the new naming scheme
856 using the Summary.pickle file outputted by the FEIv3. This file must be passes by the parameter configuration.legacy.
857 @param particles list of config.Particle objects
858 @param config config.FeiConfiguration object
859 """
860 summary = pickle.load(open(configuration.legacy, 'rb'))
861 channel2lists = {k: v[2] for k, v in summary['channel2lists'].items()}
862
863 teacher = Teacher(particles, configuration)
864
865 for particle in particles:
866 for channel in particle.channels:
867 new_weightfile = f'{configuration.prefix}_{channel.label}'
868 old_weightfile = f'{configuration.prefix}_{channel2lists[channel.label.replace("Jpsi", "J/psi")]}'
869 if not basf2_mva.available(new_weightfile):
870 if old_weightfile is None or not basf2_mva.available(old_weightfile):
871 Teacher.create_fake_weightfile(channel.label)
872 teacher.upload(channel.label)
873 else:
874 basf2_mva.download(old_weightfile, f'{channel.label}.xml')
875 teacher.upload(channel.label)
876
877
878def get_stages_from_particles(particles: typing.Sequence[typing.Union[config.Particle, str]]):
879 """
880 Returns the hierarchical structure of the FEI.
881 Each stage depends on the particles in the previous stage.
882 The final stage is empty (meaning everything is done, and the training is finished at this point).
883 @param particles list of config.Particle or string objects
884 """
885 def get_pname(p):
886 return p.split(":")[0] if isinstance(p, str) else p.name
887
888 def get_plabel(p):
889 return (p.split(":")[1] if isinstance(p, str) else p.label).lower()
890
891 stages = [
892 [p for p in particles if get_pname(p) in ['e+', 'K+', 'pi+', 'mu+', 'gamma', 'p+', 'K_L0']],
893 [p for p in particles if get_pname(p) in ['pi0', 'J/psi', 'Lambda0']],
894 [p for p in particles if get_pname(p) in ['K_S0', 'Sigma+']],
895 [p for p in particles if get_pname(p) in ['D+', 'D0', 'D_s+', 'Lambda_c+'] and 'tag' not in get_plabel(p)],
896 [p for p in particles if get_pname(p) in ['D*+', 'D*0', 'D_s*+'] and 'tag' not in get_plabel(p)],
897 [p for p in particles if get_pname(p) in ['B0', 'B+', 'B_s0'] or 'tag' in get_plabel(p)],
898 []
899 ]
900
901 for p in particles:
902 pname = get_pname(p)
903 if pname not in [pname for stage in stages for p in stage]:
904 raise RuntimeError(f"Unknown particle {pname}: Not implemented in FEI")
905
906 return stages
907
908
909def do_trainings(particles: typing.Sequence[config.Particle], configuration: config.FeiConfiguration):
910 """
911 Performs the training of mva classifiers for all available training data,
912 this function must be either called by the user after each stage of the FEI during training,
913 or (more likely) is called by the distributed.py script after merging the outputs of all jobs,
914 @param particles list of config.Particle objects
915 @param config config.FeiConfiguration object
916 @return list of tuple with weight file on disk and identifier in database for all trained classifiers
917 """
918 teacher = Teacher(particles, configuration)
919 return teacher.do_all_trainings()
920
921
922def save_summary(particles: typing.Sequence[config.Particle],
923 configuration: config.FeiConfiguration,
924 cache: int,
925 roundMode: int = None,
926 pickleName: str = 'Summary.pickle'):
927 """
928 Creates the Summary.pickle, which is used to keep track of the stage during the training,
929 and can be used later to investigate which configuration was used exactly to create the training.
930 @param particles list of config.Particle objects
931 @param config config.FeiConfiguration object
932 @param cache current cache level
933 @param roundMode mode of current round of training
934 @param pickleName name of the pickle file
935 """
936 if roundMode is None:
937 roundMode = configuration.roundMode
938 configuration = configuration._replace(cache=cache, roundMode=roundMode)
939 # Backup existing Summary.pickle files
940 for i in range(8, -1, -1):
941 if os.path.isfile(f'{pickleName}.backup_{i}'):
942 shutil.copyfile(f'{pickleName}.backup_{i}', f'{pickleName}.backup_{i+1}')
943 if os.path.isfile(pickleName):
944 shutil.copyfile(pickleName, f'{pickleName}.backup_0')
945 pickle.dump((particles, configuration), open(pickleName, 'wb'))
946
947
948def get_path(particles: typing.Sequence[config.Particle], configuration: config.FeiConfiguration) -> FeiState:
949 """
950 The most important function of the FEI.
951 This creates the FEI path for training/fitting (both terms are equal), and application/inference (both terms are equal).
952 The whole FEI is defined by the particles which are reconstructed (see default_channels.py)
953 and the configuration (see config.py).
954
955 TRAINING
956 For training this function is called multiple times, each time the FEI reconstructs one more stage in the hierarchical structure
957 i.e. we start with FSP, pi0, KS_0, D, D*, and with B mesons. You have to set configuration.training to True for training mode.
958 All weight files created during the training will be stored in your local database.
959 If you want to use the FEI training everywhere without copying this database by hand, you have to upload your local database
960 to the central database first (see documentation for the Belle2 Condition Database).
961
962 APPLICATION
963 For application you call this function once, and it returns the whole path which will reconstruct B mesons
964 with an associated signal probability. You have to set configuration.training to False for application mode.
965
966 MONITORING
967 You can always turn on the monitoring (configuration.monitor != False),
968 to write out ROOT Histograms of many quantities for each stage,
969 using these histograms you can use the printReporting.py or latexReporting.py scripts to automatically create pdf files.
970
971 LEGACY
972 This function can also use old FEI trainings (version 3), just pass the Summary.pickle file of the old training,
973 and the weight files will be automatically converted to the new naming scheme.
974
975 @param particles list of config.Particle objects
976 @param config config.FeiConfiguration object
977 """
978 print(r"""
979 ____ _ _ _ _ ____ _ _ ____ _ _ ___ _ _ _ ___ ____ ____ ___ ____ ____ ___ ____ ___ _ ____ _ _
980 |___ | | | | |___ | | |___ |\ | | | |\ | | |___ |__/ |__] |__/ |___ | |__| | | | | |\ |
981 | |__| |___ |___ |___ \/ |___ | \| | | | \| | |___ | \ | | \ |___ | | | | | |__| | \|
982
983 Author: Thomas Keck 2014 - 2017
984 Please cite my PhD thesis
985 """)
986
987 # The cache parameter of the configuration object is used during training to keep track,
988 # which reconstruction steps are already performed.
989 # For fitting/training we start by default with -1, meaning we still have to create the TrainingDataInformation,
990 # which is used to determine the number of candidates we have to write out for the FSP trainings in stage 0.
991 # For inference/application we start by default with 0, because we don't need the TrainingDataInformation in stage 0.
992 # RoundMode plays a similar role as cache,
993 # it is used to keep track in which phase within a stage the basf2 execution stops, relevant only for training.
994 # During the training we save the particles and configuration (including the current cache stage) in the Summary.pickle object.
995 if configuration.training and (configuration.monitor and (configuration.monitoring_path != '')):
996 B2ERROR("FEI-core: Custom Monitoring path is not allowed during training!")
997
998 if configuration.cache is None:
999 pickleName = 'Summary.pickle'
1000 if configuration.monitor:
1001 pickleName = os.path.join(configuration.monitoring_path, pickleName)
1002
1003 if os.path.isfile(pickleName):
1004 particles_bkp, config_bkp = pickle.load(open(pickleName, 'rb'))
1005 # check if configuration changed
1006 for fd in configuration._fields:
1007 if fd == 'cache' or fd == 'roundMode':
1008 continue
1009 if getattr(configuration, fd) != getattr(config_bkp, fd):
1010 B2WARNING(
1011 f"FEI-core: Configuration changed: {fd} from {getattr(config_bkp, fd)} to {getattr(configuration, fd)}")
1012
1013 configuration = config_bkp
1014 cache = configuration.cache
1015 print("Cache: Replaced particles from steering and configuration from Summary.pickle: ", cache, configuration.roundMode)
1016 else:
1017 if configuration.training:
1018 cache = -1
1019 else:
1020 cache = 0
1021 else:
1022 cache = configuration.cache
1023
1024 # Now we start building the training or application path
1025 path = basf2.create_path()
1026
1027 # There are in total 7 stages.
1028 # For training we start with -1 and go to 7 one stage at a time
1029 # For application we can run stage 0 to 7 at once
1030 stages = get_stages_from_particles(particles)
1031
1032 # If the user provided a Summary.pickle file of a FEIv3 training we
1033 # convert the old weight files (with hashes), to the new naming scheme.
1034 # Afterwards the algorithm runs as usual
1035 if configuration.legacy is not None:
1036 convert_legacy_training(particles, configuration)
1037
1038 # During the training we require the number of MC particles in the whole processed
1039 # data sample, because we don't want to write out billions of e.g. pion candidates.
1040 # Knowing the total amount of MC particles we can write out only every e.g. 10th candidate
1041 # That's why we have to write out the TrainingDataInformation before doing anything during the training phase.
1042 # During application we only need this if we run in monitor mode, and want to write out a summary in the end,
1043 # the summary contains efficiency, and the efficiency calculation requires the total number of MC particles.
1044 training_data_information = TrainingDataInformation(particles, outputPath=configuration.monitoring_path)
1045 if cache < 0 and configuration.training:
1046 print("Stage 0: Run over all files to count the number of events and McParticles")
1047 path.add_path(training_data_information.reconstruct())
1048 if configuration.training:
1049 save_summary(particles, configuration, 0)
1050 return FeiState(path, 0, [], [], [])
1051 elif not configuration.training and configuration.monitor:
1052 path.add_path(training_data_information.reconstruct())
1053
1054 # We load the Final State particles
1055 # It is assumed that the user takes care of adding RootInput, Geometry, and everything
1056 # which is required to read in data, so we directly start to load the FSP particles
1057 # used by the FEI.
1058 loader = FSPLoader(particles, configuration)
1059 if cache < 1:
1060 print("Stage 0: Load FSP particles")
1061 path.add_path(loader.reconstruct())
1062
1063 # Now we reconstruct each stage one after another.
1064 # Each stage consists of two parts:
1065 # PreReconstruction (before the mva method was applied):
1066 # - Particle combination
1067 # - Do vertex fitting
1068 # - Some simple cuts and best candidate selection
1069 # PostReconstruction (after the mva method was applied):
1070 # - Apply the mva method
1071 # - Apply cuts on the mva output and best candidate selection
1072 #
1073 # If the weight files for the PostReconstruction are not available for the current stage and we are in training mode,
1074 # we have to create the training data. The training itself is done by the do_trainings function which is called
1075 # either by the user after each step, or by the distributed.py script
1076 #
1077 # If the weight files for the PostReconstruction are not available for the current stage and we are not in training mode,
1078 # we keep going, as soon as the user will call process on the produced path he will get an error message that the
1079 # weight files are missing.
1080 #
1081 # Finally we keep track of the ParticleLists we use, so the user can run the RemoveParticles module to reduce the size of the
1082 # intermediate output of RootOutput.
1083 used_lists = []
1084 for stage, stage_particles in enumerate(stages):
1085 if len(stage_particles) == 0:
1086 print(f"Stage {stage}: No particles to reconstruct in this stage, skipping!")
1087 continue
1088
1089 pre_reconstruction = PreReconstruction(stage_particles, configuration)
1090 post_reconstruction = PostReconstruction(stage_particles, configuration)
1091
1092 if stage >= cache:
1093 print(f"Stage {stage}: PreReconstruct particles: ", [p.name for p in stage_particles])
1094 path.add_path(pre_reconstruction.reconstruct())
1095 if configuration.training and not (post_reconstruction.available() and configuration.roundMode == 0):
1096 print(f"Stage {stage}: Create training data for particles: ", [p.name for p in stage_particles])
1097 mc_counts = training_data_information.get_mc_counts()
1098 training_data = TrainingData(stage_particles, configuration, mc_counts)
1099 path.add_path(training_data.reconstruct())
1100 used_lists += [channel.name for particle in stage_particles for channel in particle.channels]
1101 break
1102
1103 used_lists += [particle.identifier for particle in stage_particles]
1104 if (stage >= cache - 1) and not ((configuration.roundMode == 1) and configuration.training):
1105 if (configuration.roundMode == 3) and configuration.training:
1106 print(f"Stage {stage}: BDTs already applied for particles, no postReco needed: ", [p.name for p in stage_particles])
1107 else:
1108 print(f"Stage {stage}: Apply BDT for particles: ", [p.name for p in stage_particles])
1109 if configuration.training and not post_reconstruction.available():
1110 raise RuntimeError("FEI-core: training of current stage was not successful, please retrain!")
1111 path.add_path(post_reconstruction.reconstruct())
1112 if (((configuration.roundMode == 2) or (configuration.roundMode == 3)) and configuration.training):
1113 break
1114 fsps_of_all_stages = [fsp for sublist in get_stages_from_particles(loader.get_fsp_lists()) for fsp in sublist]
1115
1116 excludelists = []
1117 if configuration.training and (configuration.roundMode == 3):
1118 dontRemove = used_lists + fsps_of_all_stages
1119 # cleanup higher stages
1120 cleanup = basf2.register_module('RemoveParticlesNotInLists')
1121 print("FEI-REtrain: pruning basf2_input.root of higher stages")
1122 cleanup.param('particleLists', dontRemove)
1123 path.add_module(cleanup)
1124
1125 # check which lists we have to exclude from the output
1126 import ROOT # noqa
1127 excludedParticlesNonConjugated = [p.identifier for p in particles if p.identifier not in dontRemove]
1128 excludedParticles = [
1129 str(name) for name in list(
1130 ROOT.Belle2.ParticleListName.addAntiParticleLists(excludedParticlesNonConjugated))]
1131 root_file = ROOT.TFile.Open('basf2_input.root', "READ")
1132 tree = root_file.Get('tree')
1133 for branch in tree.GetListOfBranches():
1134 branchName = branch.GetName()
1135 if any(exParticle in branchName for exParticle in excludedParticles):
1136 excludelists.append(branchName)
1137 print("Exclude lists from output: ", excludelists)
1138
1139 # If we run in monitor mode we are interested in the ModuleStatistics,
1140 # these statistics contain the runtime for each module which was run.
1141 if configuration.monitor:
1142 print("Add ModuleStatistics")
1143 output = basf2.register_module('RootOutput')
1144 output.param('outputFileName', os.path.join(configuration.monitoring_path, 'Monitor_ModuleStatistics.root'))
1145 output.param('branchNames', ['EventMetaData']) # cannot be removed, but of only small effect on file size
1146 output.param('branchNamesPersistent', ['ProcessStatistics'])
1147 output.param('ignoreCommandLineOverride', True)
1148 path.add_module(output)
1149
1150 # As mentioned above the FEI keeps track of the stages which are already reconstructed during the training
1151 # so we write out the Summary.pickle here, and increase the stage by one.
1152 if configuration.training or configuration.monitor:
1153 print("Save Summary.pickle")
1154 save_summary(particles, configuration, stage+1, pickleName=os.path.join(configuration.monitoring_path, 'Summary.pickle'))
1155
1156 # Finally we return the path, the stage and the used lists to the user.
1157 return FeiState(path, stage+1, plists=used_lists, fsplists=fsps_of_all_stages, excludelists=excludelists)
isB2BII()
Definition b2bii.py:14
pybasf2.Path reconstruct(self)
Definition core.py:175
__init__(self, typing.Sequence[config.Particle] particles, config.FeiConfiguration config)
Definition core.py:154
typing.List[str] get_fsp_lists(self)
Definition core.py:165
config
config.FeiConfiguration object
Definition core.py:163
particles
list of config.Particle objects
Definition core.py:161
pybasf2.Path reconstruct(self)
Definition core.py:519
__init__(self, typing.Sequence[config.Particle] particles, config.FeiConfiguration config)
Definition core.py:489
typing.Sequence[str] get_missing_channels(self)
Definition core.py:500
config
config.FeiConfiguration object
Definition core.py:498
particles
list of config.Particle objects
Definition core.py:496
pybasf2.Path reconstruct(self)
Definition core.py:318
__init__(self, typing.Sequence[config.Particle] particles, config.FeiConfiguration config)
Definition core.py:307
config
config.FeiConfiguration object
Definition core.py:316
particles
list of config.Particle objects
Definition core.py:314
__init__(self, typing.Sequence[config.Particle] particles, config.FeiConfiguration config)
Definition core.py:692
upload(self, str channel)
Definition core.py:742
create_fake_weightfile(str channel)
Definition core.py:704
do_all_trainings(self)
Definition core.py:753
config
config.FeiConfiguration object
Definition core.py:701
particles
list of config.Particle objects
Definition core.py:699
check_if_weightfile_is_fake(str filename)
Definition core.py:731
pybasf2.Path reconstruct(self)
Definition core.py:101
__init__(self, typing.Sequence[config.Particle] particles, str outputPath='')
Definition core.py:84
particles
list of config.Particle objects
Definition core.py:91
pybasf2.Path reconstruct(self)
Definition core.py:228
mc_counts
containing number of MC Particles
Definition core.py:226
config
config.FeiConfiguration object
Definition core.py:224
__init__(self, typing.Sequence[config.Particle] particles, config.FeiConfiguration config, typing.Mapping[int, typing.Mapping[str, float]] mc_counts)
Definition core.py:214
particles
list of config.Particle objects
Definition core.py:222
from_name(name)
Definition pdg.py:63