14 Contains classes to read in the monitoring output
15 and some simple plotting routines.
17 This is used by printReporting.py and latexReporting.py
18 to create summaries for a FEI training or application.
22 from generators
import get_default_decayfile
23except ModuleNotFoundError:
24 print(
"MonitoringBranchingFractions won't work.")
25from basf2_mva_evaluation
import plotting
36def removeJPsiSlash(string):
37 """ Remove slashes in a string, which is not allowed for filenames. """
38 return string.replace(
'/',
'')
42 """ Load the FEI configuration from the Summary.pickle file. """
43 if not os.path.isfile(
'Summary.pickle'):
44 raise RuntimeError(
"""Could not find Summary.pickle!
45 This file is automatically created by the FEI training.
46 But you can also create it yourself using:
47 pickle.dump((particles, configuration), open('Summary.pickle', 'wb'))""")
48 return pickle.load(open(
'Summary.pickle',
'rb'))
53 This class provides the efficiency, purity and other quantities for a
54 given number of true signal candidates, signal candidates and background candidates
57 def __init__(self, nTrueSig, nSig, nBg):
59 Create a new Statistic object
60 @param nTrueSig the number of true signal particles
61 @param nSig the number of reconstructed signal candidates
62 @param nBg the number of reconstructed background candidates
65 self._nTrueSig = nTrueSig
73 """ Returns the number of reconstructed signal candidates. """
78 """ Returns the number of reconstructed true signal candidates. """
83 """ Returns the number of reconstructed background candidates. """
88 """ Returns total number of reconstructed candidates. """
89 return self._nSig + self._nBg
93 """ Returns the purity of the reconstructed candidates. """
98 return self._nSig / float(self.nTotal)
101 def efficiency(self):
102 """ Returns the efficiency of the reconstructed signal candidates with respect to the number of true signal particles. """
105 if self._nTrueSig == 0:
107 return self._nSig / float(self._nTrueSig)
110 def purityError(self):
111 """ Returns the uncertainty of the purity. """
114 return self.calcStandardDeviation(self._nSig, self.nTotal)
117 def efficiencyError(self):
119 Returns the uncertainty of the efficiency.
120 For an efficiency eps = self._nSig/self._nTrueSig, this function calculates the
121 standard deviation according to http://arxiv.org/abs/physics/0701199 .
123 if self._nTrueSig == 0:
125 return self.calcStandardDeviation(self._nSig, self._nTrueSig)
127 def calcStandardDeviation(self, k, n):
128 """ Helper method to calculate the standard deviation for efficiencies. """
131 variance = (k + 1) * (k + 2) / ((n + 2) * (n + 3)) - (k + 1) ** 2 / ((n + 2) ** 2)
134 return math.sqrt(variance)
137 """ Returns a string representation of a Statistic object. """
138 o = f
"nTrueSig {self.nTrueSig} nSig {self.nSig} nBg {self.nBg}\n"
139 o += f
"Efficiency {self.efficiency:.3f} ({self.efficiencyError:.3f})\n"
140 o += f
"Purity {self.purity:.3f} ({self.purityError:.3f})\n"
143 def __add__(self, a):
144 """ Adds two Statistics objects and returns a new object. """
145 return Statistic(self.nTrueSig, self.nSig + a.nSig, self.nBg + a.nBg)
147 def __radd__(self, a):
149 Returns a new Statistic object if the current one is added to zero.
150 Necessary to apply sum-function to Statistic objects.
153 return NotImplemented
154 return Statistic(self.nTrueSig, self.nSig, self.nBg)
159 Reads all TH1F and TH2F from a ROOT file
160 and puts them into a more accessible format.
163 def __init__(self, filename, dirname):
165 Reads histograms from the given file
166 @param filename the name of the ROOT file
175 self.two_dimensional = {}
179 self.valid = os.path.isfile(filename)
184 f = ROOT.TFile.Open(filename,
'read')
185 d = f.Get(ROOT.Belle2.MakeROOTCompatible.makeROOTCompatible(dirname))
187 for key
in d.GetListOfKeys():
188 name = ROOT.Belle2.MakeROOTCompatible.invertMakeROOTCompatible(key.GetName())
190 if not (isinstance(hist, ROOT.TH1D)
or isinstance(hist, ROOT.TH1F)
or
191 isinstance(hist, ROOT.TH2D)
or isinstance(hist, ROOT.TH2F)):
193 self.two_dimensional[name] = isinstance(hist, ROOT.TH2D)
or isinstance(hist, ROOT.TH2F)
194 if self.two_dimensional[name]:
195 nbins = (hist.GetNbinsX(), hist.GetNbinsY())
196 self.centers[name] = [[hist.GetXaxis().GetBinCenter(i)
for i
in range(nbins[0] + 2)],
197 [hist.GetYaxis().GetBinCenter(i)
for i
in range(nbins[1] + 2)]]
198 self.values[name] = [[hist.GetBinContent(i, j)
for i
in range(nbins[0] + 2)]
for j
in range(nbins[1] + 2)]
199 self.nbins[name] = nbins
201 nbins = hist.GetNbinsX()
202 self.centers[name] = np.array([hist.GetBinCenter(i)
for i
in range(nbins + 2)])
203 self.values[name] = np.array([hist.GetBinContent(i)
for i
in range(nbins + 2)])
204 self.nbins[name] = nbins
208 Calculates the sum of a given histogram (== sum of all entries)
209 @param name key of the histogram
211 if name
not in self.centers:
213 if self.two_dimensional[name]:
215 for i
in range(len(self.values[name])):
216 for j
in range(len(self.values[name][i])):
217 tempsum += self.values[name][i][j]
219 return np.sum(self.values[name])
221 def mean(self, name):
223 Calculates the mean of a given histogram
224 @param name key of the histogram
226 if name
not in self.centers:
228 if self.two_dimensional[name]:
230 for i
in range(len(self.values[name])):
231 for j
in range(len(self.values[name][i])):
232 tempsum += self.centers[name][i][j] * self.values[name][i][j]
233 return tempsum / self.sum(name)
234 return np.average(self.centers[name], weights=self.values[name])
238 Calculates the standard deviation of a given histogram
239 @param name key of the histogram
241 if name
not in self.centers:
243 if self.two_dimensional[name]:
244 avg = self.mean(name)
246 for i
in range(len(self.values[name])):
247 for j
in range(len(self.values[name][i])):
248 tempsum += self.values[name][i][j] * (self.centers[name][i][j] - avg)**2
249 return np.sqrt(tempsum / self.sum(name))
250 avg = np.average(self.centers[name], weights=self.values[name])
251 return np.sqrt(np.average((self.centers[name] - avg)**2, weights=self.values[name]))
255 Calculates the minimum of a given histogram
256 @param name key of the histogram
258 if name
not in self.centers:
260 if self.two_dimensional[name]:
262 for i
in range(len(self.values[name])):
263 for j
in range(len(self.values[name][i])):
264 if self.values[name][i][j] < tempmin:
265 tempmin = self.centers[name][i][j]
267 nonzero = np.nonzero(self.values[name])[0]
268 if len(nonzero) == 0:
270 return self.centers[name][nonzero[0]]
274 Calculates the maximum of a given histogram
275 @param name key of the histogram
277 if name
not in self.centers:
279 if self.two_dimensional[name]:
281 for i
in range(len(self.values[name])):
282 for j
in range(len(self.values[name][i])):
283 if self.values[name][i][j] > tempmax:
284 tempmax = self.centers[name][i][j]
286 nonzero = np.nonzero(self.values[name])[0]
287 if len(nonzero) == 0:
289 return self.centers[name][nonzero[-1]]
292class MonitoringNTuple:
294 Reads the ntuple named variables from a ROOT file
297 def __init__(self, filename, treenameprefix):
299 Reads ntuple from the given file
300 @param filename the name of the ROOT file
305 self.valid = os.path.isfile(filename)
306 print(f
'FEI-monitoring: Looking for {filename}')
308 raise RuntimeError(f
"Could not find {filename}: current dir is {os.getcwd()}")
311 self.f = ROOT.TFile.Open(filename,
'read')
312 print(f
'FEI-monitoring: Found {filename}')
314 self.tree = self.f.Get(ROOT.Belle2.MakeROOTCompatible.makeROOTCompatible(f
'{treenameprefix} variables'))
315 print(f
'FEI-monitoring: Found {treenameprefix} variables')
317 self.filename = filename
320class MonitoringModuleStatistics:
322 Reads the module statistics for a single particle from the outputted root file
323 and puts them into a more accessible format
326 def __init__(self, particle):
328 Reads the module statistics from the file named Monitor_ModuleStatistics.root
329 @param particle the particle for which the statistics are read
333 root_file = ROOT.TFile.Open(
'Monitor_ModuleStatistics.root',
'read')
334 persistentTree = root_file.Get(
'persistent')
335 persistentTree.GetEntry(0)
337 stats = persistentTree.ProcessStatistics.Clone()
340 numEntries = persistentTree.GetEntriesFast()
341 for i
in range(1, numEntries):
342 persistentTree.GetEntry(i)
343 stats.merge(persistentTree.ProcessStatistics)
346 statistic = {m.getName(): m.getTimeSum(m.c_Event) / 1e9
for m
in stats.getAll()}
349 self.channel_time = {}
351 self.channel_time_per_module = {}
352 for channel
in particle.channels:
353 if channel.label
not in self.channel_time:
354 self.channel_time[channel.label] = 0.0
355 self.channel_time_per_module[channel.label] = {
'ParticleCombiner': 0.0,
356 'BestCandidateSelection': 0.0,
357 'PListCutAndCopy': 0.0,
358 'VariablesToExtraInfo': 0.0,
360 'ParticleSelector': 0.0,
362 'ParticleVertexFitter': 0.0,
363 'TagUniqueSignal': 0.0,
364 'VariablesToHistogram': 0.0,
365 'VariablesToNtuple': 0.0}
366 for key, time
in statistic.items():
367 if (channel.decayString
in key
or channel.name
in key):
368 self.channel_time[channel.label] += time
369 for k
in self.channel_time_per_module[channel.label]:
371 self.channel_time_per_module[channel.label][k] += time
374 self.particle_time = 0
375 for key, time
in statistic.items():
376 if particle.identifier
in key:
377 self.particle_time += time
380def MonitorSigProbPlot(particle, filename):
381 """ Creates a Signal probability plot using ROOT. """
382 if not particle.final_ntuple.valid:
385 [
'extraInfo__bouniqueSignal__bc',
386 'extraInfo__boSignalProbability__bc', particle.particle.mvaConfig.target],
387 [
'unique',
'probability',
'signal'], max_entries=int(1e8))
390 common = (df[
'probability'] >= 0) & (df[
'probability'] <= 1)
392 p.add(df,
'probability', (df[
'signal'] == 1), label=
"Signal")
393 p.add(df,
'probability', (df[
'signal'] == 0), label=
"Background")
395 p.axis.set_title(
"Signal probability")
396 p.axis.set_xlabel(
"Probability")
397 p.save(filename +
'.png')
400def MonitorSpectatorPlot(particle, spectator, filename, range=(
None,
None)):
401 """ Creates a spectator plot using ROOT. """
402 if not particle.final_ntuple.valid:
405 [
'extraInfo__bouniqueSignal__bc', spectator,
406 'extraInfo__boSignalProbability__bc', particle.particle.mvaConfig.target],
407 [
'unique', spectator,
'probability',
'signal'], max_entries=int(1e8))
408 for i, cut
in enumerate([0.0, 0.01, 0.05, 0.1, 0.2, 0.5]):
410 common = (df[
'probability'] >= cut)
411 if range[0]
is not None:
412 common &= (df[spectator] >= range[0])
413 if range[1]
is not None:
414 common &= (df[spectator] <= range[1])
416 p.add(df, spectator, (df[
'signal'] == 1), label=
"Signal")
417 p.add(df, spectator, (df[
'signal'] == 0), label=
"Background")
419 p.axis.set_title(f
"{spectator} for signal probability >= {cut:.2f}")
420 p.axis.set_xlabel(spectator)
421 p.save(f
'{filename}_{i}.png')
424def MonitorROCPlot(particle, filename):
425 """ Creates a ROC plot using ROOT. """
426 if not particle.final_ntuple.valid:
429 [
'extraInfo__bouniqueSignal__bc',
430 'extraInfo__boSignalProbability__bc', particle.particle.mvaConfig.target],
431 [
'unique',
'probability',
'signal'], max_entries=int(1e8))
433 p.add(df,
'probability', df[
'signal'] == 1, df[
'signal'] == 0, label=
'All')
435 p.save(filename +
'.png')
438def MonitorDiagPlot(particle, filename):
439 """ Creates a Diagonal plot using ROOT. """
440 if not particle.final_ntuple.valid:
443 [
'extraInfo__bouniqueSignal__bc',
444 'extraInfo__boSignalProbability__bc', particle.particle.mvaConfig.target],
445 [
'unique',
'probability',
'signal'], max_entries=int(1e8))
447 p.add(df,
'probability', df[
'signal'] == 1, df[
'signal'] == 0)
449 p.save(filename +
'.png')
452def MonitoringMCCount(particle):
454 Reads the MC Counts for a given particle from the ROOT file mcParticlesCount.root
455 @param particle the particle for which the MC counts are read
456 @return dictionary with 'sum', 'std', 'avg', 'max', and 'min'
460 root_file = ROOT.TFile.Open(
'mcParticlesCount.root',
'read')
462 key = f
'NumberOfMCParticlesInEvent({abs(pdg.from_name(particle.name))})'
464 key = ROOT.Belle2.MakeROOTCompatible.makeROOTCompatible(key)
465 hist = root_file.Get(key)
467 mc_counts = {
'sum': 0,
'std': 0,
'avg': 0,
'min': 0,
'max': 0}
469 mc_counts[
'sum'] = sum(hist.GetXaxis().GetBinCenter(bin + 1) * hist.GetBinContent(bin + 1)
470 for bin
in range(hist.GetNbinsX()))
471 mc_counts[
'std'] = hist.GetStdDev()
472 mc_counts[
'avg'] = hist.GetMean()
473 mc_counts[
'max'] = hist.GetXaxis().GetBinCenter(hist.FindLastBinAbove(0.0))
474 mc_counts[
'min'] = hist.GetXaxis().GetBinCenter(hist.FindFirstBinAbove(0.0))
478class MonitoringBranchingFractions:
479 """ Class extracts the branching fractions of a decay channel from the DECAY.DEC file. """
485 Create a new MonitoringBranchingFraction object.
486 The extracted branching fractions are cached, hence creating more than one object does not do anything.
488 if MonitoringBranchingFractions._shared
is None:
489 decay_file = get_default_decayfile()
491 self.exclusive_branching_fractions = self.loadExclusiveBranchingFractions(decay_file)
493 self.inclusive_branching_fractions = self.loadInclusiveBranchingFractions(self.exclusive_branching_fractions)
494 MonitoringBranchingFractions._shared = (self.exclusive_branching_fractions, self.inclusive_branching_fractions)
496 self.exclusive_branching_fractions, self.inclusive_branching_fractions = MonitoringBranchingFractions._shared
498 def getExclusive(self, particle):
499 """ Returns the exclusive (i.e. without the branching fractions of the daughters) branching fraction of a particle. """
500 return self.getBranchingFraction(particle, self.exclusive_branching_fractions)
502 def getInclusive(self, particle):
503 """ Returns the inclusive (i.e. including all branching fractions of the daughters) branching fraction of a particle. """
504 return self.getBranchingFraction(particle, self.inclusive_branching_fractions)
506 def getBranchingFraction(self, particle, branching_fractions):
507 """ Returns the branching fraction of a particle given a branching_fraction table. """
508 result = {c.label: 0.0
for c
in particle.channels}
510 channels = [tuple(sorted(d.split(
':')[0]
for d
in channel.daughters))
for channel
in particle.channels]
511 if name
not in branching_fractions:
513 channels = [tuple(
pdg.conjugate(d)
for d
in channel)
for channel
in channels]
514 if name
not in branching_fractions:
516 for c, key
in zip(particle.channels, channels):
517 if key
in branching_fractions[name]:
518 result[c.label] = branching_fractions[name][key]
521 def loadExclusiveBranchingFractions(self, filename):
523 Load branching fraction from MC decay-file.
526 def isFloat(element):
527 """ Checks if element is a convertible to float"""
534 def isValidParticle(element):
535 """ Checks if element is a valid pdg name for a particle"""
542 branching_fractions = {
'UNKOWN': {}}
545 with open(filename)
as f:
547 fields = line.split(
' ')
548 fields = [x
for x
in fields
if x !=
'']
549 if len(fields) < 2
or fields[0][0] ==
'#':
551 if fields[0] ==
'Decay':
552 mother = fields[1].strip()
553 if not isValidParticle(mother):
556 if fields[0] ==
'Enddecay':
559 if mother ==
'UNKOWN':
562 if len(fields) < 1
or not isFloat(fields[0]):
564 while len(fields) > 1:
565 if isValidParticle(fields[-1]):
568 if len(fields) < 1
or not all(isValidParticle(p)
for p
in fields[1:]):
570 neutrinoTag_list = [
'nu_e',
'nu_mu',
'nu_tau',
'anti-nu_e',
'anti-nu_mu',
'anti-nu_tau']
571 daughters = tuple(sorted(p
for p
in fields[1:]
if p
not in neutrinoTag_list))
572 if mother
not in branching_fractions:
573 branching_fractions[mother] = {}
574 if daughters
not in branching_fractions[mother]:
575 branching_fractions[mother][daughters] = 0.0
576 branching_fractions[mother][daughters] += float(fields[0])
578 del branching_fractions[
'UNKOWN']
579 return branching_fractions
581 def loadInclusiveBranchingFractions(self, exclusive_branching_fractions):
583 Get covered branching fraction of a particle using a recursive algorithm
584 and the given exclusive branching_fractions (given as Hashable List)
585 @param particle identifier of the particle
586 @param branching_fractions
588 particles = set(exclusive_branching_fractions.keys())
590 particles = sorted(particles, key=
lambda x:
pdg.get(x).Mass())
591 inclusive_branching_fractions = copy.deepcopy(exclusive_branching_fractions)
594 if p
in inclusive_branching_fractions:
595 br = sum(inclusive_branching_fractions[p].values())
597 br = sum(inclusive_branching_fractions[
pdg.conjugate(p)].values())
598 for p_br
in inclusive_branching_fractions.values():
600 for i
in range(c.count(p)):
602 return inclusive_branching_fractions
605class MonitoringParticle:
607 Monitoring object containing all the monitoring information
608 about a single particle
611 def __init__(self, particle):
613 Read the monitoring information of the given particle
614 @param particle the particle for which the information is read
617 self.particle = particle
618 particlesInStages = fei.core.get_stages_from_particles([particle])
620 for i
in range(len(particlesInStages)):
621 for iparticle
in particlesInStages[i]:
622 if iparticle.identifier == self.particle.identifier:
626 raise RuntimeError(f
"Could not find particle {self.particle.identifier} in the list of stages.")
629 self.mc_count = MonitoringMCCount(particle)
631 self.module_statistic = MonitoringModuleStatistics(particle)
633 self.time_per_channel = self.module_statistic.channel_time
635 self.time_per_channel_per_module = self.module_statistic.channel_time_per_module
637 self.total_time = self.module_statistic.particle_time + sum(self.time_per_channel.values())
640 self.total_number_of_channels = len(self.particle.channels)
642 self.reconstructed_number_of_channels = 0
645 self.branching_fractions = MonitoringBranchingFractions()
647 self.exc_br_per_channel = self.branching_fractions.getExclusive(particle)
649 self.inc_br_per_channel = self.branching_fractions.getInclusive(particle)
652 self.before_ranking = {}
654 self.after_ranking = {}
656 self.after_vertex = {}
658 self.after_classifier = {}
660 self.training_data = {}
662 self.ignored_channels = {}
664 for channel
in self.particle.channels:
665 hist = MonitoringHist(
'Monitor_PreReconstruction_BeforeRanking.root', f
'{channel.label}')
666 self.before_ranking[channel.label] = self.calculateStatistic(hist, channel.mvaConfig.target)
667 hist = MonitoringHist(
'Monitor_PreReconstruction_AfterRanking.root', f
'{channel.label}')
668 self.after_ranking[channel.label] = self.calculateStatistic(hist, channel.mvaConfig.target)
669 hist = MonitoringHist(
'Monitor_PreReconstruction_AfterVertex.root', f
'{channel.label}')
670 self.after_vertex[channel.label] = self.calculateStatistic(hist, channel.mvaConfig.target)
671 hist = MonitoringHist(
'Monitor_PostReconstruction_AfterMVA.root', f
'{channel.label}')
672 self.after_classifier[channel.label] = self.calculateStatistic(hist, channel.mvaConfig.target)
673 if hist.valid
and hist.sum(channel.mvaConfig.target) > 0:
674 self.reconstructed_number_of_channels += 1
675 self.ignored_channels[channel.label] =
False
677 self.ignored_channels[channel.label] =
True
678 hist = MonitoringHist(
'Monitor_TrainingData.root', f
'{channel.label}')
679 self.training_data[channel.label] = hist
681 plist = removeJPsiSlash(particle.identifier)
682 hist = MonitoringHist(
'Monitor_PostReconstruction_BeforePostCut.root', f
'{plist}')
684 self.before_postcut = self.calculateStatistic(hist, self.particle.mvaConfig.target)
685 hist = MonitoringHist(
'Monitor_PostReconstruction_BeforeRanking.root', f
'{plist}')
687 self.before_ranking_postcut = self.calculateStatistic(hist, self.particle.mvaConfig.target)
688 hist = MonitoringHist(
'Monitor_PostReconstruction_AfterRanking.root', f
'{plist}')
690 self.after_ranking_postcut = self.calculateStatistic(hist, self.particle.mvaConfig.target)
692 self.before_tag = self.calculateStatistic(hist, self.particle.mvaConfig.target)
694 self.final_ntuple = MonitoringNTuple(
'Monitor_Final.root', f
'{plist}')
696 self.after_tag = self.calculateUniqueStatistic(self.final_ntuple.tree)
698 def calculateStatistic(self, hist, target):
700 Calculate Statistic object where all signal candidates are considered signal
702 nTrueSig = self.mc_count[
'sum']
704 return Statistic(nTrueSig, 0, 0)
705 signal_bins = (hist.centers[target] > 0.5)
706 bckgrd_bins = ~signal_bins
707 nSig = hist.values[target][signal_bins].sum()
708 nBg = hist.values[target][bckgrd_bins].sum()
709 return Statistic(nTrueSig, nSig, nBg)
711 def calculateUniqueStatistic(self, tree):
713 Calculate Static object where only unique signal candidates are considered signal
715 nTrueSig = self.mc_count[
'sum']
717 return Statistic(nTrueSig, 0, 0)
721 if getattr(entry,
"extraInfo__bouniqueSignal__bc") == 1:
725 return Statistic(nTrueSig, nSig, nBg)
chain2dict(chain, tree_columns, dict_columns=None, max_entries=None)