14 Contains classes to read in the monitoring output
15 and some simple plotting routines.
17 This is used by printReporting.py and latexReporting.py
18 to create summaries for a FEI training or application.
22 from generators
import get_default_decayfile
23 except ModuleNotFoundError:
24 print(
"MonitoringBranchingFractions won't work.")
25 from basf2_mva_evaluation
import plotting
35 def removeJPsiSlash(string):
36 """ Remove slashes in a string, which is not allowed for filenames. """
37 return string.replace(
'/',
'')
41 """ Load the FEI configuration from the Summary.pickle file. """
42 if not os.path.isfile(
'Summary.pickle'):
43 raise RuntimeError(
"""Could not find Summary.pickle!
44 This file is automatically created by the FEI training.
45 But you can also create it yourself using:
46 pickle.dump((particles, configuration), open('Summary.pickle', 'wb'))""")
47 return pickle.load(open(
'Summary.pickle',
'rb'))
52 This class provides the efficiency, purity and other quantities for a
53 given number of true signal candidates, signal candidates and background candidates
56 def __init__(self, nTrueSig, nSig, nBg):
58 Create a new Statistic object
59 @param nTrueSig the number of true signal particles
60 @param nSig the number of reconstructed signal candidates
61 @param nBg the number of reconstructed background candidates
64 self.nTrueSig = nTrueSig
72 """ Returns total number of reconstructed candidates. """
73 return self.nSig + self.nBg
77 """ Returns the purity of the reconstructed candidates. """
82 return self.nSig / float(self.nTotal)
86 """ Returns the efficiency of the reconstructed signal candidates with respect to the number of true signal particles. """
89 if self.nTrueSig == 0:
91 return self.nSig / float(self.nTrueSig)
94 def purityError(self):
95 """ Returns the uncertainty of the purity. """
98 return self.calcStandardDeviation(self.nSig, self.nTotal)
101 def efficiencyError(self):
103 Returns the uncertainty of the efficiency.
104 For an efficiency eps = self.nSig/self.nTrueSig, this function calculates the
105 standard deviation according to http://arxiv.org/abs/physics/0701199 .
107 if self.nTrueSig == 0:
109 return self.calcStandardDeviation(self.nSig, self.nTrueSig)
111 def calcStandardDeviation(self, k, n):
112 """ Helper method to calculate the standard deviation for efficiencies. """
115 variance = (k + 1) * (k + 2) / ((n + 2) * (n + 3)) - (k + 1) ** 2 / ((n + 2) ** 2)
118 return math.sqrt(variance)
121 """ Returns a string representation of a Statistic object. """
122 o = f
"nTrueSig {self.nTrueSig} nSig {self.nSig} nBg {self.nBg}\n"
123 o += f
"Efficiency {self.efficiency:.3f} ({self.efficiencyError:.3f})\n"
124 o += f
"Purity {self.purity:.3f} ({self.purityError:.3f})\n"
127 def __add__(self, a):
128 """ Adds two Statistics objects and returns a new object. """
129 return Statistic(self.nTrueSig, self.nSig + a.nSig, self.nBg + a.nBg)
131 def __radd__(self, a):
133 Returns a new Statistic object if the current one is added to zero.
134 Necessary to apply sum-function to Statistic objects.
137 return NotImplemented
138 return Statistic(self.nTrueSig, self.nSig, self.nBg)
141 class MonitoringHist:
143 Reads all TH1F and TH2F from a ROOT file
144 and puts them into a more accessible format.
147 def __init__(self, filename, dirname):
149 Reads histograms from the given file
150 @param filename the name of the ROOT file
161 self.valid = os.path.isfile(filename)
166 f = ROOT.TFile.Open(filename,
'read')
167 d = f.Get(ROOT.Belle2.MakeROOTCompatible.makeROOTCompatible(dirname))
169 for key
in d.GetListOfKeys():
170 name = ROOT.Belle2.MakeROOTCompatible.invertMakeROOTCompatible(key.GetName())
172 if not (isinstance(hist, ROOT.TH1D)
or isinstance(hist, ROOT.TH1F)
or
173 isinstance(hist, ROOT.TH2D)
or isinstance(hist, ROOT.TH2F)):
175 two_dimensional = isinstance(hist, ROOT.TH2D)
or isinstance(hist, ROOT.TH2F)
177 nbins = (hist.GetNbinsX(), hist.GetNbinsY())
178 self.centers[name] = np.array([[hist.GetXaxis().GetBinCenter(i)
for i
in range(nbins[0] + 2)],
179 [hist.GetYaxis().GetBinCenter(i)
for i
in range(nbins[1] + 2)]])
180 self.values[name] = np.array([[hist.GetBinContent(i, j)
for i
in range(nbins[0] + 2)]
for j
in range(nbins[1] + 2)])
181 self.nbins[name] = nbins
183 nbins = hist.GetNbinsX()
184 self.centers[name] = np.array([hist.GetBinCenter(i)
for i
in range(nbins + 2)])
185 self.values[name] = np.array([hist.GetBinContent(i)
for i
in range(nbins + 2)])
186 self.nbins[name] = nbins
190 Calculates the sum of a given histogram (== sum of all entries)
191 @param name key of the histogram
193 if name
not in self.centers:
195 return np.sum(self.values[name])
197 def mean(self, name):
199 Calculates the mean of a given histogram
200 @param name key of the histogram
202 if name
not in self.centers:
204 return np.average(self.centers[name], weights=self.values[name])
208 Calculates the standard deviation of a given histogram
209 @param name key of the histogram
211 if name
not in self.centers:
213 avg = np.average(self.centers[name], weights=self.values[name])
214 return np.sqrt(np.average((self.centers[name] - avg)**2, weights=self.values[name]))
218 Calculates the minimum of a given histogram
219 @param name key of the histogram
221 if name
not in self.centers:
223 nonzero = np.nonzero(self.values[name])[0]
224 if len(nonzero) == 0:
226 return self.centers[name][nonzero[0]]
230 Calculates the maximum of a given histogram
231 @param name key of the histogram
233 if name
not in self.centers:
235 nonzero = np.nonzero(self.values[name])[0]
236 if len(nonzero) == 0:
238 return self.centers[name][nonzero[-1]]
241 class MonitoringNTuple:
243 Reads the ntuple named variables from a ROOT file
246 def __init__(self, filename, treenameprefix):
248 Reads ntuple from the given file
249 @param filename the name of the ROOT file
254 self.valid = os.path.isfile(filename)
258 self.f = ROOT.TFile.Open(filename,
'read')
260 self.tree = self.f.Get(f
'{treenameprefix} variables')
262 self.filename = filename
265 class MonitoringModuleStatistics:
267 Reads the module statistics for a single particle from the outputted root file
268 and puts them into a more accessible format
271 def __init__(self, particle):
273 Reads the module statistics from the file named Monitor_ModuleStatistics.root
274 @param particle the particle for which the statistics are read
278 root_file = ROOT.TFile.Open(
'Monitor_ModuleStatistics.root',
'read')
279 persistentTree = root_file.Get(
'persistent')
280 persistentTree.GetEntry(0)
282 stats = persistentTree.ProcessStatistics.Clone()
285 numEntries = persistentTree.GetEntriesFast()
286 for i
in range(1, numEntries):
287 persistentTree.GetEntry(i)
288 stats.merge(persistentTree.ProcessStatistics)
291 statistic = {m.getName(): m.getTimeSum(m.c_Event) / 1e9
for m
in stats.getAll()}
294 self.channel_time = {}
296 self.channel_time_per_module = {}
297 for channel
in particle.channels:
298 if channel.label
not in self.channel_time:
299 self.channel_time[channel.label] = 0.0
300 self.channel_time_per_module[channel.label] = {
'ParticleCombiner': 0.0,
301 'BestCandidateSelection': 0.0,
302 'PListCutAndCopy': 0.0,
303 'VariablesToExtraInfo': 0.0,
305 'ParticleSelector': 0.0,
307 'ParticleVertexFitter': 0.0,
308 'TagUniqueSignal': 0.0,
309 'VariablesToHistogram': 0.0,
310 'VariablesToNtuple': 0.0}
311 for key, time
in statistic.items():
312 if(channel.decayString
in key
or channel.name
in key):
313 self.channel_time[channel.label] += time
314 for k
in self.channel_time_per_module[channel.label]:
316 self.channel_time_per_module[channel.label][k] += time
319 self.particle_time = 0
320 for key, time
in statistic.items():
321 if particle.identifier
in key:
322 self.particle_time += time
325 def MonitorCosBDLPlot(particle, filename):
326 """ Creates a CosBDL plot using ROOT. """
327 if not particle.final_ntuple.valid:
329 df = basf2_mva_util.tree2dict(particle.final_ntuple.tree,
330 [
'extraInfo__bouniqueSignal__bc',
'cosThetaBetweenParticleAndNominalB',
331 'extraInfo__boSignalProbability__bc', particle.particle.mvaConfig.target],
332 [
'unique',
'cosThetaBDl',
'probability',
'signal'])
333 for i, cut
in enumerate([0.0, 0.01, 0.05, 0.1, 0.2, 0.5]):
335 common = (np.abs(df[
'cosThetaBDl']) < 10) & (df[
'probability'] >= cut)
337 p.add(df,
'cosThetaBDl', (df[
'signal'] == 1), label=
"Signal")
338 p.add(df,
'cosThetaBDl', (df[
'signal'] == 0), label=
"Background")
340 p.axis.set_title(f
"Cosine of Theta between B and Dl system for signal probability >= {cut:.2f}")
341 p.axis.set_xlabel(
"CosThetaBDl")
342 p.save(f
'{filename}_{i}.png')
345 def MonitorMbcPlot(particle, filename):
346 """ Creates a Mbc plot using ROOT. """
347 if not particle.final_ntuple.valid:
349 df = basf2_mva_util.tree2dict(particle.final_ntuple.tree,
350 [
'extraInfo__bouniqueSignal__bc',
'Mbc',
351 'extraInfo__boSignalProbability__bc', particle.particle.mvaConfig.target],
352 [
'unique',
'Mbc',
'probability',
'signal'])
353 for i, cut
in enumerate([0.0, 0.01, 0.05, 0.1, 0.2, 0.5]):
355 common = (df[
'Mbc'] > 5.23) & (df[
'probability'] >= cut)
357 p.add(df,
'Mbc', (df[
'signal'] == 1), label=
"Signal")
358 p.add(df,
'Mbc', (df[
'signal'] == 0), label=
"Background")
360 p.axis.set_title(f
"Beam constrained mass for signal probability >= {cut:.2f}")
361 p.axis.set_xlabel(
"Mbc")
362 p.save(f
'{filename}_{i}.png')
365 def MonitorROCPlot(particle, filename):
366 """ Creates a ROC plot using ROOT. """
367 if not particle.final_ntuple.valid:
369 df = basf2_mva_util.tree2dict(particle.final_ntuple.tree,
370 [
'extraInfo__bouniqueSignal__bc',
371 'extraInfo__boSignalProbability__bc', particle.particle.mvaConfig.target],
372 [
'unique',
'probability',
'signal'])
374 p.add(df,
'probability', df[
'signal'] == 1, df[
'signal'] == 0, label=
'All')
376 p.save(filename +
'.png')
379 def MonitorDiagPlot(particle, filename):
380 """ Creates a Diagonal plot using ROOT. """
381 if not particle.final_ntuple.valid:
383 df = basf2_mva_util.tree2dict(particle.final_ntuple.tree,
384 [
'extraInfo__bouniqueSignal__bc',
385 'extraInfo__boSignalProbability__bc', particle.particle.mvaConfig.target],
386 [
'unique',
'probability',
'signal'])
388 p.add(df,
'probability', df[
'signal'] == 1, df[
'signal'] == 0)
390 p.save(filename +
'.png')
393 def MonitoringMCCount(particle):
395 Reads the MC Counts for a given particle from the ROOT file mcParticlesCount.root
396 @param particle the particle for which the MC counts are read
397 @return dictionary with 'sum', 'std', 'avg', 'max', and 'min'
401 root_file = ROOT.TFile.Open(
'mcParticlesCount.root',
'read')
403 key = f
'NumberOfMCParticlesInEvent({abs(pdg.from_name(particle.name))})'
405 key = ROOT.Belle2.MakeROOTCompatible.makeROOTCompatible(key)
406 hist = root_file.Get(key)
408 mc_counts = {
'sum': 0,
'std': 0,
'avg': 0,
'min': 0,
'max': 0}
410 mc_counts[
'sum'] = sum(hist.GetXaxis().GetBinCenter(bin + 1) * hist.GetBinContent(bin + 1)
411 for bin
in range(hist.GetNbinsX()))
412 mc_counts[
'std'] = hist.GetStdDev()
413 mc_counts[
'avg'] = hist.GetMean()
414 mc_counts[
'max'] = hist.GetXaxis().GetBinCenter(hist.FindLastBinAbove(0.0))
415 mc_counts[
'min'] = hist.GetXaxis().GetBinCenter(hist.FindFirstBinAbove(0.0))
419 class MonitoringBranchingFractions:
420 """ Class extracts the branching fractions of a decay channel from the DECAY.DEC file. """
426 Create a new MonitoringBranchingFraction object.
427 The extracted branching fractions are cached, hence creating more than one object does not do anything.
429 if MonitoringBranchingFractions._shared
is None:
430 decay_file = get_default_decayfile()
432 self.exclusive_branching_fractions = self.loadExclusiveBranchingFractions(decay_file)
434 self.inclusive_branching_fractions = self.loadInclusiveBranchingFractions(self.exclusive_branching_fractions)
435 MonitoringBranchingFractions._shared = (self.exclusive_branching_fractions, self.inclusive_branching_fractions)
437 self.exclusive_branching_fractions, self.inclusive_branching_fractions = MonitoringBranchingFractions._shared
439 def getExclusive(self, particle):
440 """ Returns the exclusive (i.e. without the branching fractions of the daughters) branching fraction of a particle. """
441 return self.getBranchingFraction(particle, self.exclusive_branching_fractions)
443 def getInclusive(self, particle):
444 """ Returns the inclusive (i.e. including all branching fractions of the daughters) branching fraction of a particle. """
445 return self.getBranchingFraction(particle, self.inclusive_branching_fractions)
447 def getBranchingFraction(self, particle, branching_fractions):
448 """ Returns the branching fraction of a particle given a branching_fraction table. """
449 result = {c.label: 0.0
for c
in particle.channels}
451 channels = [tuple(sorted(d.split(
':')[0]
for d
in channel.daughters))
for channel
in particle.channels]
452 if name
not in branching_fractions:
454 channels = [tuple(
pdg.conjugate(d)
for d
in channel)
for channel
in channels]
455 if name
not in branching_fractions:
457 for c, key
in zip(particle.channels, channels):
458 if key
in branching_fractions[name]:
459 result[c.label] = branching_fractions[name][key]
462 def loadExclusiveBranchingFractions(self, filename):
464 Load branching fraction from MC decay-file.
467 def isFloat(element):
468 """ Checks if element is a convertible to float"""
475 def isValidParticle(element):
476 """ Checks if element is a valid pdg name for a particle"""
483 branching_fractions = {
'UNKOWN': {}}
486 with open(filename)
as f:
488 fields = line.split(
' ')
489 fields = [x
for x
in fields
if x !=
'']
490 if len(fields) < 2
or fields[0][0] ==
'#':
492 if fields[0] ==
'Decay':
493 mother = fields[1].strip()
494 if not isValidParticle(mother):
497 if fields[0] ==
'Enddecay':
500 if mother ==
'UNKOWN':
503 if len(fields) < 1
or not isFloat(fields[0]):
505 while len(fields) > 1:
506 if isValidParticle(fields[-1]):
509 if len(fields) < 1
or not all(isValidParticle(p)
for p
in fields[1:]):
511 neutrinoTag_list = [
'nu_e',
'nu_mu',
'nu_tau',
'anti-nu_e',
'anti-nu_mu',
'anti-nu_tau']
512 daughters = tuple(sorted(p
for p
in fields[1:]
if p
not in neutrinoTag_list))
513 if mother
not in branching_fractions:
514 branching_fractions[mother] = {}
515 if daughters
not in branching_fractions[mother]:
516 branching_fractions[mother][daughters] = 0.0
517 branching_fractions[mother][daughters] += float(fields[0])
519 del branching_fractions[
'UNKOWN']
520 return branching_fractions
522 def loadInclusiveBranchingFractions(self, exclusive_branching_fractions):
524 Get covered branching fraction of a particle using a recursive algorithm
525 and the given exclusive branching_fractions (given as Hashable List)
526 @param particle identifier of the particle
527 @param branching_fractions
529 particles = set(exclusive_branching_fractions.keys())
531 particles = sorted(particles, key=
lambda x:
pdg.get(x).Mass())
532 inclusive_branching_fractions = copy.deepcopy(exclusive_branching_fractions)
535 if p
in inclusive_branching_fractions:
536 br = sum(inclusive_branching_fractions[p].values())
538 br = sum(inclusive_branching_fractions[
pdg.conjugate(p)].values())
539 for p_br
in inclusive_branching_fractions.values():
541 for i
in range(c.count(p)):
543 return inclusive_branching_fractions
546 class MonitoringParticle:
548 Monitoring object containing all the monitoring information
549 about a single particle
552 def __init__(self, particle):
554 Read the monitoring information of the given particle
555 @param particle the particle for which the information is read
558 self.particle = particle
560 self.mc_count = MonitoringMCCount(particle)
562 self.module_statistic = MonitoringModuleStatistics(particle)
564 self.time_per_channel = self.module_statistic.channel_time
566 self.time_per_channel_per_module = self.module_statistic.channel_time_per_module
568 self.total_time = self.module_statistic.particle_time + sum(self.time_per_channel.values())
571 self.total_number_of_channels = len(self.particle.channels)
573 self.reconstructed_number_of_channels = 0
576 self.branching_fractions = MonitoringBranchingFractions()
578 self.exc_br_per_channel = self.branching_fractions.getExclusive(particle)
580 self.inc_br_per_channel = self.branching_fractions.getInclusive(particle)
583 self.before_ranking = {}
585 self.after_ranking = {}
587 self.after_vertex = {}
589 self.after_classifier = {}
591 self.training_data = {}
593 self.ignored_channels = {}
595 for channel
in self.particle.channels:
596 hist = MonitoringHist(
'Monitor_PreReconstruction_BeforeRanking.root', f
'{channel.label}')
597 self.before_ranking[channel.label] = self.calculateStatistic(hist, channel.mvaConfig.target)
598 hist = MonitoringHist(
'Monitor_PreReconstruction_AfterRanking.root', f
'{channel.label}')
599 self.after_ranking[channel.label] = self.calculateStatistic(hist, channel.mvaConfig.target)
600 hist = MonitoringHist(
'Monitor_PreReconstruction_AfterVertex.root', f
'{channel.label}')
601 self.after_vertex[channel.label] = self.calculateStatistic(hist, channel.mvaConfig.target)
602 hist = MonitoringHist(
'Monitor_PostReconstruction_AfterMVA.root', f
'{channel.label}')
603 self.after_classifier[channel.label] = self.calculateStatistic(hist, channel.mvaConfig.target)
604 if hist.valid
and hist.sum(channel.mvaConfig.target) > 0:
605 self.reconstructed_number_of_channels += 1
606 self.ignored_channels[channel.label] =
False
608 self.ignored_channels[channel.label] =
True
609 hist = MonitoringHist(
'Monitor_TrainingData.root', f
'{channel.label}')
610 self.training_data[channel.label] = hist
612 plist = removeJPsiSlash(particle.identifier)
613 hist = MonitoringHist(
'Monitor_PostReconstruction_BeforePostCut.root', f
'{plist}')
615 self.before_postcut = self.calculateStatistic(hist, self.particle.mvaConfig.target)
616 hist = MonitoringHist(
'Monitor_PostReconstruction_BeforeRanking.root', f
'{plist}')
618 self.before_ranking_postcut = self.calculateStatistic(hist, self.particle.mvaConfig.target)
619 hist = MonitoringHist(
'Monitor_PostReconstruction_AfterRanking.root', f
'{plist}')
621 self.after_ranking_postcut = self.calculateStatistic(hist, self.particle.mvaConfig.target)
623 self.before_tag = self.calculateStatistic(hist, self.particle.mvaConfig.target)
625 self.after_tag = self.calculateUniqueStatistic(hist)
627 self.final_ntuple = MonitoringNTuple(
'Monitor_Final.root', f
'{plist}')
629 def calculateStatistic(self, hist, target):
631 Calculate Statistic object where all signal candidates are considered signal
633 nTrueSig = self.mc_count[
'sum']
635 return Statistic(nTrueSig, 0, 0)
636 signal_bins = (hist.centers[target] > 0.5)
637 bckgrd_bins = ~signal_bins
638 nSig = hist.values[target][signal_bins].sum()
639 nBg = hist.values[target][bckgrd_bins].sum()
640 return Statistic(nTrueSig, nSig, nBg)
642 def calculateUniqueStatistic(self, hist):
644 Calculate Static object where only unique signal candidates are considered signal
646 nTrueSig = self.mc_count[
'sum']
648 return Statistic(nTrueSig, 0, 0)
649 signal_bins = hist.centers[
'extraInfo(uniqueSignal)'] > 0.5
650 bckgrd_bins = hist.centers[
'extraInfo(uniqueSignal)'] <= 0.5
651 nSig = hist.values[
'extraInfo(uniqueSignal)'][signal_bins].sum()
652 nBg = hist.values[
'extraInfo(uniqueSignal)'][bckgrd_bins].sum()
653 return Statistic(nTrueSig, nSig, nBg)