14 Contains classes to read in the monitoring output
15 and some simple plotting routines.
17 This is used by printReporting.py and latexReporting.py
18 to create summaries for a FEI training or application.
22 from generators
import get_default_decayfile
23except ModuleNotFoundError:
24 print(
"MonitoringBranchingFractions won't work.")
25from basf2_mva_evaluation
import plotting
35def removeJPsiSlash(string):
36 """ Remove slashes in a string, which is not allowed for filenames. """
37 return string.replace(
'/',
'')
41 """ Load the FEI configuration from the Summary.pickle file. """
42 if not os.path.isfile(
'Summary.pickle'):
43 raise RuntimeError(
"""Could not find Summary.pickle!
44 This file is automatically created by the FEI training.
45 But you can also create it yourself using:
46 pickle.dump((particles, configuration), open(
'Summary.pickle',
'wb'))
""")
47 return pickle.load(open(
'Summary.pickle',
'rb'))
52 This class provides the efficiency, purity and other quantities for a given number of true signal candidates, signal candidates
and background candidates
55 def __init__(self, nTrueSig, nSig, nBg):
57 Create a new Statistic object
58 @param nTrueSig the number of true signal particles
59 @param nSig the number of reconstructed signal candidates
60 @param nBg the number of reconstructed background candidates
63 self.nTrueSig = nTrueSig
71 """ Returns total number of reconstructed candidates. """
72 return self.nSig + self.nBg
76 """ Returns the purity of the reconstructed candidates. """
81 return self.nSig / float(self.nTotal)
85 """ Returns the efficiency of the reconstructed signal candidates with respect to the number of true signal particles. """
88 if self.nTrueSig == 0:
90 return self.nSig / float(self.nTrueSig)
93 def purityError(self):
94 """ Returns the uncertainty of the purity. """
97 return self.calcStandardDeviation(self.nSig, self.nTotal)
100 def efficiencyError(self):
102 Returns the uncertainty of the efficiency.
103 For an efficiency eps = self.nSig/self.nTrueSig, this function calculates the
104 standard deviation according to http://arxiv.org/abs/physics/0701199 .
106 if self.nTrueSig == 0:
108 return self.calcStandardDeviation(self.nSig, self.nTrueSig)
110 def calcStandardDeviation(self, k, n):
111 """ Helper method to calculate the standard deviation for efficiencies. """
114 variance = (k + 1) * (k + 2) / ((n + 2) * (n + 3)) - (k + 1) ** 2 / ((n + 2) ** 2)
117 return math.sqrt(variance)
120 """ Returns a string representation of a Statistic object. """
121 o = f
"nTrueSig {self.nTrueSig} nSig {self.nSig} nBg {self.nBg}\n"
122 o += f
"Efficiency {self.efficiency:.3f} ({self.efficiencyError:.3f})\n"
123 o += f
"Purity {self.purity:.3f} ({self.purityError:.3f})\n"
126 def __add__(self, a):
127 """ Adds two Statistics objects and returns a new object. """
128 return Statistic(self.nTrueSig, self.nSig + a.nSig, self.nBg + a.nBg)
130 def __radd__(self, a):
132 Returns a new Statistic object if the current one
is added to zero.
133 Necessary to apply sum-function to Statistic objects.
136 return NotImplemented
137 return Statistic(self.nTrueSig, self.nSig, self.nBg)
142 Reads all TH1F and TH2F
from a ROOT file
143 and puts them into a more accessible format.
146 def __init__(self, filename, dirname):
148 Reads histograms from the given file
149 @param filename the name of the ROOT file
160 self.valid = os.path.isfile(filename)
165 f = ROOT.TFile.Open(filename,
'read')
166 d = f.Get(ROOT.Belle2.MakeROOTCompatible.makeROOTCompatible(dirname))
168 for key
in d.GetListOfKeys():
169 name = ROOT.Belle2.MakeROOTCompatible.invertMakeROOTCompatible(key.GetName())
171 if not (isinstance(hist, ROOT.TH1D)
or isinstance(hist, ROOT.TH1F)
or
172 isinstance(hist, ROOT.TH2D)
or isinstance(hist, ROOT.TH2F)):
174 two_dimensional = isinstance(hist, ROOT.TH2D)
or isinstance(hist, ROOT.TH2F)
176 nbins = (hist.GetNbinsX(), hist.GetNbinsY())
177 self.centers[name] = np.array([[hist.GetXaxis().GetBinCenter(i)
for i
in range(nbins[0] + 2)],
178 [hist.GetYaxis().GetBinCenter(i)
for i
in range(nbins[1] + 2)]])
179 self.values[name] = np.array([[hist.GetBinContent(i, j)
for i
in range(nbins[0] + 2)]
for j
in range(nbins[1] + 2)])
180 self.nbins[name] = nbins
182 nbins = hist.GetNbinsX()
183 self.centers[name] = np.array([hist.GetBinCenter(i)
for i
in range(nbins + 2)])
184 self.values[name] = np.array([hist.GetBinContent(i)
for i
in range(nbins + 2)])
185 self.nbins[name] = nbins
189 Calculates the sum of a given histogram (== sum of all entries)
190 @param name key of the histogram
192 if name
not in self.centers:
194 return np.sum(self.values[name])
196 def mean(self, name):
198 Calculates the mean of a given histogram
199 @param name key of the histogram
201 if name
not in self.centers:
203 return np.average(self.centers[name], weights=self.values[name])
207 Calculates the standard deviation of a given histogram
208 @param name key of the histogram
210 if name
not in self.centers:
212 avg = np.average(self.centers[name], weights=self.values[name])
213 return np.sqrt(np.average((self.centers[name] - avg)**2, weights=self.values[name]))
217 Calculates the minimum of a given histogram
218 @param name key of the histogram
220 if name
not in self.centers:
222 nonzero = np.nonzero(self.values[name])[0]
223 if len(nonzero) == 0:
225 return self.centers[name][nonzero[0]]
229 Calculates the maximum of a given histogram
230 @param name key of the histogram
232 if name
not in self.centers:
234 nonzero = np.nonzero(self.values[name])[0]
235 if len(nonzero) == 0:
237 return self.centers[name][nonzero[-1]]
240class MonitoringNTuple:
242 Reads the ntuple named variables from a ROOT file
245 def __init__(self, filename, treenameprefix):
247 Reads ntuple from the given file
248 @param filename the name of the ROOT file
253 self.valid = os.path.isfile(filename)
257 self.f = ROOT.TFile.Open(filename,
'read')
259 self.tree = self.f.Get(f
'{treenameprefix} variables')
261 self.filename = filename
264class MonitoringModuleStatistics:
266 Reads the module statistics for a single particle
from the outputted root file
267 and puts them into a more accessible format
270 def __init__(self, particle):
272 Reads the module statistics from the file named Monitor_ModuleStatistics.root
273 @param particle the particle
for which the statistics are read
277 root_file = ROOT.TFile.Open(
'Monitor_ModuleStatistics.root',
'read')
278 persistentTree = root_file.Get(
'persistent')
279 persistentTree.GetEntry(0)
281 stats = persistentTree.ProcessStatistics.Clone()
284 numEntries = persistentTree.GetEntriesFast()
285 for i
in range(1, numEntries):
286 persistentTree.GetEntry(i)
287 stats.merge(persistentTree.ProcessStatistics)
290 statistic = {m.getName(): m.getTimeSum(m.c_Event) / 1e9
for m
in stats.getAll()}
293 self.channel_time = {}
295 self.channel_time_per_module = {}
296 for channel
in particle.channels:
297 if channel.label
not in self.channel_time:
298 self.channel_time[channel.label] = 0.0
299 self.channel_time_per_module[channel.label] = {
'ParticleCombiner': 0.0,
300 'BestCandidateSelection': 0.0,
301 'PListCutAndCopy': 0.0,
302 'VariablesToExtraInfo': 0.0,
304 'ParticleSelector': 0.0,
306 'ParticleVertexFitter': 0.0,
307 'TagUniqueSignal': 0.0,
308 'VariablesToHistogram': 0.0,
309 'VariablesToNtuple': 0.0}
310 for key, time
in statistic.items():
311 if(channel.decayString
in key
or channel.name
in key):
312 self.channel_time[channel.label] += time
313 for k
in self.channel_time_per_module[channel.label]:
315 self.channel_time_per_module[channel.label][k] += time
318 self.particle_time = 0
319 for key, time
in statistic.items():
320 if particle.identifier
in key:
321 self.particle_time += time
324def MonitorCosBDLPlot(particle, filename):
325 """ Creates a CosBDL plot using ROOT. """
326 if not particle.final_ntuple.valid:
328 df = basf2_mva_util.tree2dict(particle.final_ntuple.tree,
329 [
'extraInfo__bouniqueSignal__bc',
'cosThetaBetweenParticleAndNominalB',
330 'extraInfo__boSignalProbability__bc', particle.particle.mvaConfig.target],
331 [
'unique',
'cosThetaBDl',
'probability',
'signal'])
332 for i, cut
in enumerate([0.0, 0.01, 0.05, 0.1, 0.2, 0.5]):
334 common = (np.abs(df[
'cosThetaBDl']) < 10) & (df[
'probability'] >= cut)
336 p.add(df,
'cosThetaBDl', (df[
'signal'] == 1), label=
"Signal")
337 p.add(df,
'cosThetaBDl', (df[
'signal'] == 0), label=
"Background")
339 p.axis.set_title(f
"Cosine of Theta between B and Dl system for signal probability >= {cut:.2f}")
340 p.axis.set_xlabel(
"CosThetaBDl")
341 p.save(f
'{filename}_{i}.png')
344def MonitorMbcPlot(particle, filename):
345 """ Creates a Mbc plot using ROOT. """
346 if not particle.final_ntuple.valid:
348 df = basf2_mva_util.tree2dict(particle.final_ntuple.tree,
349 [
'extraInfo__bouniqueSignal__bc',
'Mbc',
350 'extraInfo__boSignalProbability__bc', particle.particle.mvaConfig.target],
351 [
'unique',
'Mbc',
'probability',
'signal'])
352 for i, cut
in enumerate([0.0, 0.01, 0.05, 0.1, 0.2, 0.5]):
354 common = (df[
'Mbc'] > 5.23) & (df[
'probability'] >= cut)
356 p.add(df,
'Mbc', (df[
'signal'] == 1), label=
"Signal")
357 p.add(df,
'Mbc', (df[
'signal'] == 0), label=
"Background")
359 p.axis.set_title(f
"Beam constrained mass for signal probability >= {cut:.2f}")
360 p.axis.set_xlabel(
"Mbc")
361 p.save(f
'{filename}_{i}.png')
364def MonitorROCPlot(particle, filename):
365 """ Creates a ROC plot using ROOT. """
366 if not particle.final_ntuple.valid:
368 df = basf2_mva_util.tree2dict(particle.final_ntuple.tree,
369 [
'extraInfo__bouniqueSignal__bc',
370 'extraInfo__boSignalProbability__bc', particle.particle.mvaConfig.target],
371 [
'unique',
'probability',
'signal'])
373 p.add(df,
'probability', df[
'signal'] == 1, df[
'signal'] == 0, label=
'All')
375 p.save(filename +
'.png')
378def MonitorDiagPlot(particle, filename):
379 """ Creates a Diagonal plot using ROOT. """
380 if not particle.final_ntuple.valid:
382 df = basf2_mva_util.tree2dict(particle.final_ntuple.tree,
383 [
'extraInfo__bouniqueSignal__bc',
384 'extraInfo__boSignalProbability__bc', particle.particle.mvaConfig.target],
385 [
'unique',
'probability',
'signal'])
387 p.add(df,
'probability', df[
'signal'] == 1, df[
'signal'] == 0)
389 p.save(filename +
'.png')
392def MonitoringMCCount(particle):
394 Reads the MC Counts for a given particle
from the ROOT file mcParticlesCount.root
395 @param particle the particle
for which the MC counts are read
396 @return dictionary
with 'sum',
'std',
'avg',
'max',
and 'min'
400 root_file = ROOT.TFile.Open(
'mcParticlesCount.root',
'read')
402 key = f
'NumberOfMCParticlesInEvent({abs(pdg.from_name(particle.name))})'
404 key = ROOT.Belle2.MakeROOTCompatible.makeROOTCompatible(key)
405 hist = root_file.Get(key)
407 mc_counts = {
'sum': 0,
'std': 0,
'avg': 0,
'min': 0,
'max': 0}
409 mc_counts[
'sum'] = sum(hist.GetXaxis().GetBinCenter(bin + 1) * hist.GetBinContent(bin + 1)
410 for bin
in range(hist.GetNbinsX()))
411 mc_counts[
'std'] = hist.GetStdDev()
412 mc_counts[
'avg'] = hist.GetMean()
413 mc_counts[
'max'] = hist.GetXaxis().GetBinCenter(hist.FindLastBinAbove(0.0))
414 mc_counts[
'min'] = hist.GetXaxis().GetBinCenter(hist.FindFirstBinAbove(0.0))
418class MonitoringBranchingFractions:
419 """ Class extracts the branching fractions of a decay channel from the DECAY.DEC file. """
425 Create a new MonitoringBranchingFraction object.
426 The extracted branching fractions are cached, hence creating more than one object does not do anything.
428 if MonitoringBranchingFractions._shared
is None:
429 decay_file = get_default_decayfile()
431 self.exclusive_branching_fractions = self.loadExclusiveBranchingFractions(decay_file)
433 self.inclusive_branching_fractions = self.loadInclusiveBranchingFractions(self.exclusive_branching_fractions)
434 MonitoringBranchingFractions._shared = (self.exclusive_branching_fractions, self.inclusive_branching_fractions)
436 self.exclusive_branching_fractions, self.inclusive_branching_fractions = MonitoringBranchingFractions._shared
438 def getExclusive(self, particle):
439 """ Returns the exclusive (i.e. without the branching fractions of the daughters) branching fraction of a particle. """
440 return self.getBranchingFraction(particle, self.exclusive_branching_fractions)
442 def getInclusive(self, particle):
443 """ Returns the inclusive (i.e. including all branching fractions of the daughters) branching fraction of a particle. """
444 return self.getBranchingFraction(particle, self.inclusive_branching_fractions)
446 def getBranchingFraction(self, particle, branching_fractions):
447 """ Returns the branching fraction of a particle given a branching_fraction table. """
448 result = {c.label: 0.0
for c
in particle.channels}
450 channels = [tuple(sorted(d.split(
':')[0]
for d
in channel.daughters))
for channel
in particle.channels]
451 if name
not in branching_fractions:
453 channels = [tuple(
pdg.conjugate(d)
for d
in channel)
for channel
in channels]
454 if name
not in branching_fractions:
456 for c, key
in zip(particle.channels, channels):
457 if key
in branching_fractions[name]:
458 result[c.label] = branching_fractions[name][key]
461 def loadExclusiveBranchingFractions(self, filename):
463 Load branching fraction from MC decay-file.
466 def isFloat(element):
467 """ Checks if element is a convertible to float"""
474 def isValidParticle(element):
475 """ Checks if element is a valid pdg name for a particle"""
482 branching_fractions = {
'UNKOWN': {}}
485 with open(filename)
as f:
487 fields = line.split(
' ')
488 fields = [x
for x
in fields
if x !=
'']
489 if len(fields) < 2
or fields[0][0] ==
'#':
491 if fields[0] ==
'Decay':
492 mother = fields[1].strip()
493 if not isValidParticle(mother):
496 if fields[0] ==
'Enddecay':
499 if mother ==
'UNKOWN':
502 if len(fields) < 1
or not isFloat(fields[0]):
504 while len(fields) > 1:
505 if isValidParticle(fields[-1]):
508 if len(fields) < 1
or not all(isValidParticle(p)
for p
in fields[1:]):
510 neutrinoTag_list = [
'nu_e',
'nu_mu',
'nu_tau',
'anti-nu_e',
'anti-nu_mu',
'anti-nu_tau']
511 daughters = tuple(sorted(p
for p
in fields[1:]
if p
not in neutrinoTag_list))
512 if mother
not in branching_fractions:
513 branching_fractions[mother] = {}
514 if daughters
not in branching_fractions[mother]:
515 branching_fractions[mother][daughters] = 0.0
516 branching_fractions[mother][daughters] += float(fields[0])
518 del branching_fractions[
'UNKOWN']
519 return branching_fractions
521 def loadInclusiveBranchingFractions(self, exclusive_branching_fractions):
523 Get covered branching fraction of a particle using a recursive algorithm
524 and the given exclusive branching_fractions (given
as Hashable List)
525 @param particle identifier of the particle
526 @param branching_fractions
528 particles = set(exclusive_branching_fractions.keys())
530 particles = sorted(particles, key=
lambda x:
pdg.get(x).Mass())
531 inclusive_branching_fractions = copy.deepcopy(exclusive_branching_fractions)
534 if p
in inclusive_branching_fractions:
535 br = sum(inclusive_branching_fractions[p].values())
537 br = sum(inclusive_branching_fractions[
pdg.conjugate(p)].values())
538 for p_br
in inclusive_branching_fractions.values():
540 for i
in range(c.count(p)):
542 return inclusive_branching_fractions
545class MonitoringParticle:
547 Monitoring object containing all the monitoring information
548 about a single particle
551 def __init__(self, particle):
553 Read the monitoring information of the given particle
554 @param particle the particle
for which the information
is read
557 self.particle = particle
559 self.mc_count = MonitoringMCCount(particle)
561 self.module_statistic = MonitoringModuleStatistics(particle)
563 self.time_per_channel = self.module_statistic.channel_time
565 self.time_per_channel_per_module = self.module_statistic.channel_time_per_module
567 self.total_time = self.module_statistic.particle_time + sum(self.time_per_channel.values())
570 self.total_number_of_channels = len(self.particle.channels)
572 self.reconstructed_number_of_channels = 0
575 self.branching_fractions = MonitoringBranchingFractions()
577 self.exc_br_per_channel = self.branching_fractions.getExclusive(particle)
579 self.inc_br_per_channel = self.branching_fractions.getInclusive(particle)
582 self.before_ranking = {}
584 self.after_ranking = {}
586 self.after_vertex = {}
588 self.after_classifier = {}
590 self.training_data = {}
592 self.ignored_channels = {}
594 for channel
in self.particle.channels:
595 hist = MonitoringHist(
'Monitor_PreReconstruction_BeforeRanking.root', f
'{channel.label}')
596 self.before_ranking[channel.label] = self.calculateStatistic(hist, channel.mvaConfig.target)
597 hist = MonitoringHist(
'Monitor_PreReconstruction_AfterRanking.root', f
'{channel.label}')
598 self.after_ranking[channel.label] = self.calculateStatistic(hist, channel.mvaConfig.target)
599 hist = MonitoringHist(
'Monitor_PreReconstruction_AfterVertex.root', f
'{channel.label}')
600 self.after_vertex[channel.label] = self.calculateStatistic(hist, channel.mvaConfig.target)
601 hist = MonitoringHist(
'Monitor_PostReconstruction_AfterMVA.root', f
'{channel.label}')
602 self.after_classifier[channel.label] = self.calculateStatistic(hist, channel.mvaConfig.target)
603 if hist.valid
and hist.sum(channel.mvaConfig.target) > 0:
604 self.reconstructed_number_of_channels += 1
605 self.ignored_channels[channel.label] =
False
607 self.ignored_channels[channel.label] =
True
608 hist = MonitoringHist(
'Monitor_TrainingData.root', f
'{channel.label}')
609 self.training_data[channel.label] = hist
611 plist = removeJPsiSlash(particle.identifier)
612 hist = MonitoringHist(
'Monitor_PostReconstruction_BeforePostCut.root', f
'{plist}')
614 self.before_postcut = self.calculateStatistic(hist, self.particle.mvaConfig.target)
615 hist = MonitoringHist(
'Monitor_PostReconstruction_BeforeRanking.root', f
'{plist}')
617 self.before_ranking_postcut = self.calculateStatistic(hist, self.particle.mvaConfig.target)
618 hist = MonitoringHist(
'Monitor_PostReconstruction_AfterRanking.root', f
'{plist}')
620 self.after_ranking_postcut = self.calculateStatistic(hist, self.particle.mvaConfig.target)
622 self.before_tag = self.calculateStatistic(hist, self.particle.mvaConfig.target)
624 self.after_tag = self.calculateUniqueStatistic(hist)
626 self.final_ntuple = MonitoringNTuple(
'Monitor_Final.root', f
'{plist}')
628 def calculateStatistic(self, hist, target):
630 Calculate Statistic object where all signal candidates are considered signal
632 nTrueSig = self.mc_count['sum']
634 return Statistic(nTrueSig, 0, 0)
635 signal_bins = (hist.centers[target] > 0.5)
636 bckgrd_bins = ~signal_bins
637 nSig = hist.values[target][signal_bins].sum()
638 nBg = hist.values[target][bckgrd_bins].sum()
639 return Statistic(nTrueSig, nSig, nBg)
641 def calculateUniqueStatistic(self, hist):
643 Calculate Static object where only unique signal candidates are considered signal
645 nTrueSig = self.mc_count['sum']
647 return Statistic(nTrueSig, 0, 0)
648 signal_bins = hist.centers[
'extraInfo(uniqueSignal)'] > 0.5
649 bckgrd_bins = hist.centers[
'extraInfo(uniqueSignal)'] <= 0.5
650 nSig = hist.values[
'extraInfo(uniqueSignal)'][signal_bins].sum()
651 nBg = hist.values[
'extraInfo(uniqueSignal)'][bckgrd_bins].sum()
652 return Statistic(nTrueSig, nSig, nBg)