14 Contains classes to read in the monitoring output
15 and some simple plotting routines.
17 This is used by printReporting.py and latexReporting.py
18 to create summaries for a FEI training or application.
22 from generators
import get_default_decayfile
23 except ModuleNotFoundError:
24 print(
"MonitoringBranchingFractions won't work.")
25 from basf2_mva_evaluation
import plotting
35 def removeJPsiSlash(string):
36 """ Remove slashes in a string, which is not allowed for filenames. """
37 return string.replace(
'/',
'')
41 """ Load the FEI configuration from the Summary.pickle file. """
42 if not os.path.isfile(
'Summary.pickle'):
43 raise RuntimeError(
"""Could not find Summary.pickle!
44 This file is automatically created by the FEI training.
45 But you can also create it yourself using:
46 pickle.dump((particles, configuration), open('Summary.pickle', 'wb'))""")
47 return pickle.load(open(
'Summary.pickle',
'rb'))
52 This class provides the efficiency, purity and other quantities for a
53 given number of true signal candidates, signal candidates and background candidates
56 def __init__(self, nTrueSig, nSig, nBg):
58 Create a new Statistic object
59 @param nTrueSig the number of true signal particles
60 @param nSig the number of reconstructed signal candidates
61 @param nBg the number of reconstructed background candidates
64 self.nTrueSig = nTrueSig
72 """ Returns total number of reconstructed candidates. """
73 return self.nSig + self.nBg
77 """ Returns the purity of the reconstructed candidates. """
82 return self.nSig / float(self.nTotal)
86 """ Returns the efficiency of the reconstructed signal candidates with respect to the number of true signal particles. """
89 if self.nTrueSig == 0:
91 return self.nSig / float(self.nTrueSig)
94 def purityError(self):
95 """ Returns the uncertainty of the purity. """
98 return self.calcStandardDeviation(self.nSig, self.nTotal)
101 def efficiencyError(self):
103 Returns the uncertainty of the efficiency.
104 For an efficiency eps = self.nSig/self.nTrueSig, this function calculates the
105 standard deviation according to http://arxiv.org/abs/physics/0701199 .
107 if self.nTrueSig == 0:
109 return self.calcStandardDeviation(self.nSig, self.nTrueSig)
111 def calcStandardDeviation(self, k, n):
112 """ Helper method to calculate the standard deviation for efficiencies. """
115 variance = (k + 1) * (k + 2) / ((n + 2) * (n + 3)) - (k + 1) ** 2 / ((n + 2) ** 2)
118 return math.sqrt(variance)
121 """ Returns a string representation of a Statistic object. """
122 o = f
"nTrueSig {self.nTrueSig} nSig {self.nSig} nBg {self.nBg}\n"
123 o += f
"Efficiency {self.efficiency:.3f} ({self.efficiencyError:.3f})\n"
124 o += f
"Purity {self.purity:.3f} ({self.purityError:.3f})\n"
127 def __add__(self, a):
128 """ Adds two Statistics objects and returns a new object. """
129 return Statistic(self.nTrueSig, self.nSig + a.nSig, self.nBg + a.nBg)
131 def __radd__(self, a):
133 Returns a new Statistic object if the current one is added to zero.
134 Necessary to apply sum-function to Statistic objects.
137 return NotImplemented
138 return Statistic(self.nTrueSig, self.nSig, self.nBg)
141 class MonitoringHist:
143 Reads all TH1F and TH2F from a ROOT file
144 and puts them into a more accessible format.
147 def __init__(self, filename, dirname):
149 Reads histograms from the given file
150 @param filename the name of the ROOT file
160 self.valid = os.path.isfile(filename)
165 f = ROOT.TFile.Open(filename,
'read')
166 d = f.Get(ROOT.Belle2.MakeROOTCompatible.makeROOTCompatible(dirname))
168 for key
in d.GetListOfKeys():
169 name = ROOT.Belle2.MakeROOTCompatible.invertMakeROOTCompatible(key.GetName())
171 if not (isinstance(hist, ROOT.TH1D)
or isinstance(hist, ROOT.TH1F)
or
172 isinstance(hist, ROOT.TH2D)
or isinstance(hist, ROOT.TH2F)):
174 two_dimensional = isinstance(hist, ROOT.TH2D)
or isinstance(hist, ROOT.TH2F)
176 nbins = (hist.GetNbinsX(), hist.GetNbinsY())
177 self.centers[name] = np.array([[hist.GetXaxis().GetBinCenter(i)
for i
in range(nbins[0] + 2)],
178 [hist.GetYaxis().GetBinCenter(i)
for i
in range(nbins[1] + 2)]])
179 self.values[name] = np.array([[hist.GetBinContent(i, j)
for i
in range(nbins[0] + 2)]
for j
in range(nbins[1] + 2)])
180 self.nbins[name] = nbins
182 nbins = hist.GetNbinsX()
183 self.centers[name] = np.array([hist.GetBinCenter(i)
for i
in range(nbins + 2)])
184 self.values[name] = np.array([hist.GetBinContent(i)
for i
in range(nbins + 2)])
185 self.nbins[name] = nbins
189 Calculates the sum of a given histogram (== sum of all entries)
190 @param name key of the histogram
192 if name
not in self.centers:
194 return np.sum(self.values[name])
196 def mean(self, name):
198 Calculates the mean of a given histogram
199 @param name key of the histogram
201 if name
not in self.centers:
203 return np.average(self.centers[name], weights=self.values[name])
207 Calculates the standard deviation of a given histogram
208 @param name key of the histogram
210 if name
not in self.centers:
212 avg = np.average(self.centers[name], weights=self.values[name])
213 return np.sqrt(np.average((self.centers[name] - avg)**2, weights=self.values[name]))
217 Calculates the minimum of a given histogram
218 @param name key of the histogram
220 if name
not in self.centers:
222 nonzero = np.nonzero(self.values[name])[0]
223 if len(nonzero) == 0:
225 return self.centers[name][nonzero[0]]
229 Calculates the maximum of a given histogram
230 @param name key of the histogram
232 if name
not in self.centers:
234 nonzero = np.nonzero(self.values[name])[0]
235 if len(nonzero) == 0:
237 return self.centers[name][nonzero[-1]]
240 class MonitoringNTuple:
242 Reads the ntuple named variables from a ROOT file
245 def __init__(self, filename, treenameprefix):
247 Reads ntuple from the given file
248 @param filename the name of the ROOT file
252 self.valid = os.path.isfile(filename)
256 self.f = ROOT.TFile.Open(filename,
'read')
258 self.tree = self.f.Get(f
'{treenameprefix} variables')
260 self.filename = filename
263 class MonitoringModuleStatistics:
265 Reads the module statistics for a single particle from the outputted root file
266 and puts them into a more accessible format
269 def __init__(self, particle):
271 Reads the module statistics from the file named Monitor_ModuleStatistics.root
272 @param particle the particle for which the statistics are read
275 root_file = ROOT.TFile.Open(
'Monitor_ModuleStatistics.root',
'read')
276 persistentTree = root_file.Get(
'persistent')
277 persistentTree.GetEntry(0)
279 stats = persistentTree.ProcessStatistics.Clone()
282 numEntries = persistentTree.GetEntriesFast()
283 for i
in range(1, numEntries):
284 persistentTree.GetEntry(i)
285 stats.merge(persistentTree.ProcessStatistics)
288 statistic = {m.getName(): m.getTimeSum(m.c_Event) / 1e9
for m
in stats.getAll()}
291 self.channel_time = {}
293 self.channel_time_per_module = {}
294 for channel
in particle.channels:
295 if channel.label
not in self.channel_time:
296 self.channel_time[channel.label] = 0.0
297 self.channel_time_per_module[channel.label] = {
'ParticleCombiner': 0.0,
298 'BestCandidateSelection': 0.0,
299 'PListCutAndCopy': 0.0,
300 'VariablesToExtraInfo': 0.0,
302 'ParticleSelector': 0.0,
304 'ParticleVertexFitter': 0.0,
305 'TagUniqueSignal': 0.0,
306 'VariablesToHistogram': 0.0,
307 'VariablesToNtuple': 0.0}
308 for key, time
in statistic.items():
309 if(channel.decayString
in key
or channel.name
in key):
310 self.channel_time[channel.label] += time
311 for k
in self.channel_time_per_module[channel.label]:
313 self.channel_time_per_module[channel.label][k] += time
316 self.particle_time = 0
317 for key, time
in statistic.items():
318 if particle.identifier
in key:
319 self.particle_time += time
322 def MonitorCosBDLPlot(particle, filename):
323 """ Creates a CosBDL plot using ROOT. """
324 if not particle.final_ntuple.valid:
327 [
'extraInfo__bouniqueSignal__bc',
'cosThetaBetweenParticleAndNominalB',
328 'extraInfo__boSignalProbability__bc', particle.particle.mvaConfig.target],
329 [
'unique',
'cosThetaBDl',
'probability',
'signal'])
330 for i, cut
in enumerate([0.0, 0.01, 0.05, 0.1, 0.2, 0.5]):
332 common = (np.abs(df[
'cosThetaBDl']) < 10) & (df[
'probability'] >= cut)
334 p.add(df,
'cosThetaBDl', (df[
'signal'] == 1), label=
"Signal")
335 p.add(df,
'cosThetaBDl', (df[
'signal'] == 0), label=
"Background")
337 p.axis.set_title(f
"Cosine of Theta between B and Dl system for signal probability >= {cut:.2f}")
338 p.axis.set_xlabel(
"CosThetaBDl")
339 p.save(f
'{filename}_{i}.png')
342 def MonitorMbcPlot(particle, filename):
343 """ Creates a Mbc plot using ROOT. """
344 if not particle.final_ntuple.valid:
347 [
'extraInfo__bouniqueSignal__bc',
'Mbc',
348 'extraInfo__boSignalProbability__bc', particle.particle.mvaConfig.target],
349 [
'unique',
'Mbc',
'probability',
'signal'])
350 for i, cut
in enumerate([0.0, 0.01, 0.05, 0.1, 0.2, 0.5]):
352 common = (df[
'Mbc'] > 5.23) & (df[
'probability'] >= cut)
354 p.add(df,
'Mbc', (df[
'signal'] == 1), label=
"Signal")
355 p.add(df,
'Mbc', (df[
'signal'] == 0), label=
"Background")
357 p.axis.set_title(f
"Beam constrained mass for signal probability >= {cut:.2f}")
358 p.axis.set_xlabel(
"Mbc")
359 p.save(f
'{filename}_{i}.png')
362 def MonitorROCPlot(particle, filename):
363 """ Creates a ROC plot using ROOT. """
364 if not particle.final_ntuple.valid:
367 [
'extraInfo__bouniqueSignal__bc',
368 'extraInfo__boSignalProbability__bc', particle.particle.mvaConfig.target],
369 [
'unique',
'probability',
'signal'])
371 p.add(df,
'probability', df[
'signal'] == 1, df[
'signal'] == 0, label=
'All')
373 p.save(filename +
'.png')
376 def MonitorDiagPlot(particle, filename):
377 """ Creates a Diagonal plot using ROOT. """
378 if not particle.final_ntuple.valid:
381 [
'extraInfo__bouniqueSignal__bc',
382 'extraInfo__boSignalProbability__bc', particle.particle.mvaConfig.target],
383 [
'unique',
'probability',
'signal'])
385 p.add(df,
'probability', df[
'signal'] == 1, df[
'signal'] == 0)
387 p.save(filename +
'.png')
390 def MonitoringMCCount(particle):
392 Reads the MC Counts for a given particle from the ROOT file mcParticlesCount.root
393 @param particle the particle for which the MC counts are read
394 @return dictionary with 'sum', 'std', 'avg', 'max', and 'min'
397 root_file = ROOT.TFile.Open(
'mcParticlesCount.root',
'read')
399 key = f
'NumberOfMCParticlesInEvent({abs(pdg.from_name(particle.name))})'
401 key = ROOT.Belle2.MakeROOTCompatible.makeROOTCompatible(key)
402 hist = root_file.Get(key)
404 mc_counts = {
'sum': 0,
'std': 0,
'avg': 0,
'min': 0,
'max': 0}
406 mc_counts[
'sum'] = sum(hist.GetXaxis().GetBinCenter(bin + 1) * hist.GetBinContent(bin + 1)
407 for bin
in range(hist.GetNbinsX()))
408 mc_counts[
'std'] = hist.GetStdDev()
409 mc_counts[
'avg'] = hist.GetMean()
410 mc_counts[
'max'] = hist.GetXaxis().GetBinCenter(hist.FindLastBinAbove(0.0))
411 mc_counts[
'min'] = hist.GetXaxis().GetBinCenter(hist.FindFirstBinAbove(0.0))
415 class MonitoringBranchingFractions:
416 """ Class extracts the branching fractions of a decay channel from the DECAY.DEC file. """
422 Create a new MonitoringBranchingFraction object.
423 The extracted branching fractions are cached, hence creating more than one object does not do anything.
425 if MonitoringBranchingFractions._shared
is None:
426 decay_file = get_default_decayfile()
428 self.exclusive_branching_fractions = self.loadExclusiveBranchingFractions(decay_file)
430 self.inclusive_branching_fractions = self.loadInclusiveBranchingFractions(self.exclusive_branching_fractions)
431 MonitoringBranchingFractions._shared = (self.exclusive_branching_fractions, self.inclusive_branching_fractions)
433 self.exclusive_branching_fractions, self.inclusive_branching_fractions = MonitoringBranchingFractions._shared
435 def getExclusive(self, particle):
436 """ Returns the exclusive (i.e. without the branching fractions of the daughters) branching fraction of a particle. """
437 return self.getBranchingFraction(particle, self.exclusive_branching_fractions)
439 def getInclusive(self, particle):
440 """ Returns the inclusive (i.e. including all branching fractions of the daughters) branching fraction of a particle. """
441 return self.getBranchingFraction(particle, self.inclusive_branching_fractions)
443 def getBranchingFraction(self, particle, branching_fractions):
444 """ Returns the branching fraction of a particle given a branching_fraction table. """
445 result = {c.label: 0.0
for c
in particle.channels}
447 channels = [tuple(sorted(d.split(
':')[0]
for d
in channel.daughters))
for channel
in particle.channels]
448 if name
not in branching_fractions:
450 channels = [tuple(
pdg.conjugate(d)
for d
in channel)
for channel
in channels]
451 if name
not in branching_fractions:
453 for c, key
in zip(particle.channels, channels):
454 if key
in branching_fractions[name]:
455 result[c.label] = branching_fractions[name][key]
458 def loadExclusiveBranchingFractions(self, filename):
460 Load branching fraction from MC decay-file.
463 def isFloat(element):
464 """ Checks if element is a convertible to float"""
471 def isValidParticle(element):
472 """ Checks if element is a valid pdg name for a particle"""
479 branching_fractions = {
'UNKOWN': {}}
482 with open(filename)
as f:
484 fields = line.split(
' ')
485 fields = [x
for x
in fields
if x !=
'']
486 if len(fields) < 2
or fields[0][0] ==
'#':
488 if fields[0] ==
'Decay':
489 mother = fields[1].strip()
490 if not isValidParticle(mother):
493 if fields[0] ==
'Enddecay':
496 if mother ==
'UNKOWN':
499 if len(fields) < 1
or not isFloat(fields[0]):
501 while len(fields) > 1:
502 if isValidParticle(fields[-1]):
505 if len(fields) < 1
or not all(isValidParticle(p)
for p
in fields[1:]):
507 neutrinoTag_list = [
'nu_e',
'nu_mu',
'nu_tau',
'anti-nu_e',
'anti-nu_mu',
'anti-nu_tau']
508 daughters = tuple(sorted(p
for p
in fields[1:]
if p
not in neutrinoTag_list))
509 if mother
not in branching_fractions:
510 branching_fractions[mother] = {}
511 if daughters
not in branching_fractions[mother]:
512 branching_fractions[mother][daughters] = 0.0
513 branching_fractions[mother][daughters] += float(fields[0])
515 del branching_fractions[
'UNKOWN']
516 return branching_fractions
518 def loadInclusiveBranchingFractions(self, exclusive_branching_fractions):
520 Get covered branching fraction of a particle using a recursive algorithm
521 and the given exclusive branching_fractions (given as Hashable List)
522 @param particle identifier of the particle
523 @param branching_fractions
525 particles = set(exclusive_branching_fractions.keys())
527 particles = sorted(particles, key=
lambda x:
pdg.get(x).Mass())
528 inclusive_branching_fractions = copy.deepcopy(exclusive_branching_fractions)
531 if p
in inclusive_branching_fractions:
532 br = sum(inclusive_branching_fractions[p].values())
534 br = sum(inclusive_branching_fractions[
pdg.conjugate(p)].values())
535 for p_br
in inclusive_branching_fractions.values():
537 for i
in range(c.count(p)):
539 return inclusive_branching_fractions
542 class MonitoringParticle:
544 Monitoring object containing all the monitoring information
545 about a single particle
548 def __init__(self, particle):
550 Read the monitoring information of the given particle
551 @param particle the particle for which the information is read
554 self.particle = particle
556 self.mc_count = MonitoringMCCount(particle)
558 self.module_statistic = MonitoringModuleStatistics(particle)
560 self.time_per_channel = self.module_statistic.channel_time
562 self.time_per_channel_per_module = self.module_statistic.channel_time_per_module
564 self.total_time = self.module_statistic.particle_time + sum(self.time_per_channel.values())
567 self.total_number_of_channels = len(self.particle.channels)
569 self.reconstructed_number_of_channels = 0
572 self.branching_fractions = MonitoringBranchingFractions()
574 self.exc_br_per_channel = self.branching_fractions.getExclusive(particle)
576 self.inc_br_per_channel = self.branching_fractions.getInclusive(particle)
579 self.before_ranking = {}
581 self.after_ranking = {}
583 self.after_vertex = {}
585 self.after_classifier = {}
587 self.training_data = {}
589 self.ignored_channels = {}
591 for channel
in self.particle.channels:
592 hist = MonitoringHist(
'Monitor_PreReconstruction_BeforeRanking.root', f
'{channel.label}')
593 self.before_ranking[channel.label] = self.calculateStatistic(hist, channel.mvaConfig.target)
594 hist = MonitoringHist(
'Monitor_PreReconstruction_AfterRanking.root', f
'{channel.label}')
595 self.after_ranking[channel.label] = self.calculateStatistic(hist, channel.mvaConfig.target)
596 hist = MonitoringHist(
'Monitor_PreReconstruction_AfterVertex.root', f
'{channel.label}')
597 self.after_vertex[channel.label] = self.calculateStatistic(hist, channel.mvaConfig.target)
598 hist = MonitoringHist(
'Monitor_PostReconstruction_AfterMVA.root', f
'{channel.label}')
599 self.after_classifier[channel.label] = self.calculateStatistic(hist, channel.mvaConfig.target)
600 if hist.valid
and hist.sum(channel.mvaConfig.target) > 0:
601 self.reconstructed_number_of_channels += 1
602 self.ignored_channels[channel.label] =
False
604 self.ignored_channels[channel.label] =
True
605 hist = MonitoringHist(
'Monitor_TrainingData.root', f
'{channel.label}')
606 self.training_data[channel.label] = hist
608 plist = removeJPsiSlash(particle.identifier)
609 hist = MonitoringHist(
'Monitor_PostReconstruction_BeforePostCut.root', f
'{plist}')
611 self.before_postcut = self.calculateStatistic(hist, self.particle.mvaConfig.target)
612 hist = MonitoringHist(
'Monitor_PostReconstruction_BeforeRanking.root', f
'{plist}')
614 self.before_ranking_postcut = self.calculateStatistic(hist, self.particle.mvaConfig.target)
615 hist = MonitoringHist(
'Monitor_PostReconstruction_AfterRanking.root', f
'{plist}')
617 self.after_ranking_postcut = self.calculateStatistic(hist, self.particle.mvaConfig.target)
619 self.before_tag = self.calculateStatistic(hist, self.particle.mvaConfig.target)
621 self.after_tag = self.calculateUniqueStatistic(hist)
623 self.final_ntuple = MonitoringNTuple(
'Monitor_Final.root', f
'{plist}')
625 def calculateStatistic(self, hist, target):
627 Calculate Statistic object where all signal candidates are considered signal
629 nTrueSig = self.mc_count[
'sum']
631 return Statistic(nTrueSig, 0, 0)
632 signal_bins = (hist.centers[target] > 0.5)
633 bckgrd_bins = ~signal_bins
634 nSig = hist.values[target][signal_bins].sum()
635 nBg = hist.values[target][bckgrd_bins].sum()
636 return Statistic(nTrueSig, nSig, nBg)
638 def calculateUniqueStatistic(self, hist):
640 Calculate Static object where only unique signal candidates are considered signal
642 nTrueSig = self.mc_count[
'sum']
644 return Statistic(nTrueSig, 0, 0)
645 signal_bins = hist.centers[
'extraInfo(uniqueSignal)'] > 0.5
646 bckgrd_bins = hist.centers[
'extraInfo(uniqueSignal)'] <= 0.5
647 nSig = hist.values[
'extraInfo(uniqueSignal)'][signal_bins].sum()
648 nBg = hist.values[
'extraInfo(uniqueSignal)'][bckgrd_bins].sum()
649 return Statistic(nTrueSig, nSig, nBg)
def tree2dict(tree, tree_columns, dict_columns=None)