9 Contains classes to read in the monitoring output
10 and some simple plotting routines.
12 This is used by printReporting.py and latexReporting.py
13 to create summaries for a FEI training or application.
17 from generators
import get_default_decayfile
18 except ModuleNotFoundError:
19 print(
"MonitoringBranchingFractions won't work.")
20 from basf2_mva_evaluation
import plotting
28 from ROOT
import Belle2
30 from ROOT
import gSystem
31 gSystem.Load(
'libanalysis.so')
35 def removeJPsiSlash(string):
36 """ Remove slashes in a string, which is not allowed for filenames. """
37 return string.replace(
'/',
'')
41 """ Load the FEI configuration from the Summary.pickle file. """
42 if not os.path.isfile(
'Summary.pickle'):
43 raise RuntimeError(
"""Could not find Summary.pickle!
44 This file is automatically created by the FEI training.
45 But you can also create it yourself using:
46 pickle.dump((particles, configuration), open('Summary.pickle', 'wb'))""")
47 return pickle.load(open(
'Summary.pickle',
'rb'))
50 class Statistic(object):
52 This class provides the efficiency, purity and other quantities for a
53 given number of true signal candidates, signal candidates and background candidates
56 def __init__(self, nTrueSig, nSig, nBg):
58 Create a new Statistic object
59 @param nTrueSig the number of true signal particles
60 @param nSig the number of reconstructed signal candidates
61 @param nBg the number of reconstructed background candidates
64 self.nTrueSig = nTrueSig
72 """ Returns total number of reconstructed candidates. """
73 return self.nSig + self.nBg
77 """ Returns the purity of the reconstructed candidates. """
82 return self.nSig / float(self.nTotal)
86 """ Returns the efficiency of the reconstructed signal candidates with respect to the number of true signal particles. """
89 if self.nTrueSig == 0:
91 return self.nSig / float(self.nTrueSig)
94 def purityError(self):
95 """ Returns the uncertainty of the purity. """
98 return self.calcStandardDeviation(self.nSig, self.nTotal)
101 def efficiencyError(self):
103 Returns the uncertainty of the efficiency.
104 For an efficiency eps = self.nSig/self.nTrueSig, this function calculates the
105 standard deviation according to http://arxiv.org/abs/physics/0701199 .
107 if self.nTrueSig == 0:
109 return self.calcStandardDeviation(self.nSig, self.nTrueSig)
111 def calcStandardDeviation(self, k, n):
112 """ Helper method to calculate the standard deviation for efficiencies. """
115 variance = (k + 1) * (k + 2) / ((n + 2) * (n + 3)) - (k + 1) ** 2 / ((n + 2) ** 2)
118 return math.sqrt(variance)
121 """ Returns a string representation of a Statistic object. """
122 o = f
"nTrueSig {self.nTrueSig} nSig {self.nSig} nBg {self.nBg}\n"
123 o += f
"Efficiency {self.efficiency:.3f} ({self.efficiencyError:.3f})\n"
124 o += f
"Purity {self.purity:.3f} ({self.purityError:.3f})\n"
127 def __add__(self, a):
128 """ Adds two Statistics objects and returns a new object. """
129 return Statistic(self.nTrueSig, self.nSig + a.nSig, self.nBg + a.nBg)
131 def __radd__(self, a):
133 Returns a new Statistic object if the current one is added to zero.
134 Necessary to apply sum-function to Statistic objects.
137 return NotImplemented
138 return Statistic(self.nTrueSig, self.nSig, self.nBg)
141 class MonitoringHist(object):
143 Reads all TH1F and TH2F from a ROOT file
144 and puts them into a more accessible format.
149 Reads histograms from the given file
150 @param filename the name of the ROOT file
159 self.valid = os.path.isfile(filename)
164 f = ROOT.TFile(filename)
166 for key
in f.GetListOfKeys():
169 if not (isinstance(hist, ROOT.TH1D)
or isinstance(hist, ROOT.TH1F)
or
170 isinstance(hist, ROOT.TH2D)
or isinstance(hist, ROOT.TH2F)):
172 two_dimensional = isinstance(hist, ROOT.TH2D)
or isinstance(hist, ROOT.TH2F)
174 nbins = (hist.GetNbinsX(), hist.GetNbinsY())
175 self.centers[name] = np.array([[hist.GetXaxis().GetBinCenter(i)
for i
in range(nbins[0] + 2)],
176 [hist.GetYaxis().GetBinCenter(i)
for i
in range(nbins[1] + 2)]])
177 self.values[name] = np.array([[hist.GetBinContent(i, j)
for i
in range(nbins[0] + 2)]
for j
in range(nbins[1] + 2)])
178 self.nbins[name] = nbins
180 nbins = hist.GetNbinsX()
181 self.centers[name] = np.array([hist.GetBinCenter(i)
for i
in range(nbins + 2)])
182 self.values[name] = np.array([hist.GetBinContent(i)
for i
in range(nbins + 2)])
183 self.nbins[name] = nbins
187 Calculates the sum of a given histogram (== sum of all entries)
188 @param name key of the histogram
190 if name
not in self.centers:
192 return np.sum(self.values[name])
194 def mean(self, name):
196 Calculates the mean of a given histogram
197 @param name key of the histogram
199 if name
not in self.centers:
201 return np.average(self.centers[name], weights=self.values[name])
205 Calculates the standard deviation of a given histogram
206 @param name key of the histogram
208 if name
not in self.centers:
210 avg = np.average(self.centers[name], weights=self.values[name])
211 return np.sqrt(np.average((self.centers[name] - avg)**2, weights=self.values[name]))
215 Calculates the minimum of a given histogram
216 @param name key of the histogram
218 if name
not in self.centers:
220 nonzero = np.nonzero(self.values[name])[0]
221 if len(nonzero) == 0:
223 return self.centers[name][nonzero[0]]
227 Calculates the maximum of a given histogram
228 @param name key of the histogram
230 if name
not in self.centers:
232 nonzero = np.nonzero(self.values[name])[0]
233 if len(nonzero) == 0:
235 return self.centers[name][nonzero[-1]]
238 class MonitoringNTuple(object):
240 Reads the ntuple named variables from a ROOT file
244 Reads ntuple from the given file
245 @param filename the name of the ROOT file
248 self.valid = os.path.isfile(filename)
252 self.f = ROOT.TFile(filename)
254 self.tree = self.f.variables
256 self.filename = filename
259 class MonitoringModuleStatistics(object):
261 Reads the module statistics for a single particle from the outputted root file
262 and puts them into a more accessible format
266 Reads the module statistics from the file named Monitor_ModuleStatistics.root
267 @param particle the particle for which the statistics are read
269 root_file = ROOT.TFile(
'Monitor_ModuleStatistics.root')
270 persistentTree = root_file.Get(
'persistent')
271 persistentTree.GetEntry(0)
273 stats = persistentTree.ProcessStatistics.Clone()
276 numEntries = persistentTree.GetEntriesFast()
277 for i
in range(1, numEntries):
278 persistentTree.GetEntry(i)
279 stats.merge(persistentTree.ProcessStatistics)
282 statistic = {m.getName(): m.getTimeSum(m.c_Event) / 1e9
for m
in stats.getAll()}
285 self.channel_time = {}
287 self.channel_time_per_module = {}
288 for channel
in particle.channels:
289 if channel.label
not in self.channel_time:
290 self.channel_time[channel.label] = 0.0
291 self.channel_time_per_module[channel.label] = {
'ParticleCombiner': 0.0,
292 'BestCandidateSelection': 0.0,
293 'PListCutAndCopy': 0.0,
294 'VariablesToExtraInfo': 0.0,
296 'ParticleSelector': 0.0,
298 'ParticleVertexFitter': 0.0,
299 'TagUniqueSignal': 0.0,
300 'VariablesToHistogram': 0.0,
301 'VariablesToNtuple': 0.0}
302 for key, time
in statistic.items():
303 if(channel.decayString
in key
or channel.name
in key):
304 self.channel_time[channel.label] += time
305 for k
in self.channel_time_per_module[channel.label]:
307 self.channel_time_per_module[channel.label][k] += time
310 self.particle_time = 0
311 for key, time
in statistic.items():
312 if particle.identifier
in key:
313 self.particle_time += time
316 def MonitorCosBDLPlot(particle, filename):
317 """ Creates a CosBDL plot using ROOT. """
318 if not particle.final_ntuple.valid:
321 [
'extraInfo__bouniqueSignal__bc',
'cosThetaBetweenParticleAndNominalB',
322 'extraInfo__boSignalProbability__bc', particle.particle.mvaConfig.target],
323 [
'unique',
'cosThetaBDl',
'probability',
'signal'])
324 for i, cut
in enumerate([0.0, 0.01, 0.05, 0.1, 0.2, 0.5]):
326 common = (df[
'unique'] == 1) & (np.abs(df[
'cosThetaBDl']) < 10) & (df[
'probability'] >= cut)
327 p.add(df,
'cosThetaBDl', common & (df[
'signal'] == 1), label=
"Signal")
328 p.add(df,
'cosThetaBDl', common & (df[
'signal'] == 0), label=
"Background")
330 p.axis.set_title(f
"Cosine of Theta between B and Dl system for signal probability >= {cut:.2f}")
331 p.axis.set_xlabel(
"CosThetaBDl")
332 p.save(f
'{filename}_{i}.png')
335 def MonitorMbcPlot(particle, filename):
336 """ Creates a Mbc plot using ROOT. """
337 if not particle.final_ntuple.valid:
340 [
'extraInfo__bouniqueSignal__bc',
'Mbc',
341 'extraInfo__boSignalProbability__bc', particle.particle.mvaConfig.target],
342 [
'unique',
'Mbc',
'probability',
'signal'])
343 for i, cut
in enumerate([0.0, 0.01, 0.05, 0.1, 0.2, 0.5]):
345 common = (df[
'unique'] == 1) & (df[
'Mbc'] > 5.23) & (df[
'probability'] >= cut)
346 p.add(df,
'Mbc', common & (df[
'signal'] == 1), label=
"Signal")
347 p.add(df,
'Mbc', common & (df[
'signal'] == 0), label=
"Background")
349 p.axis.set_title(f
"Beam constrained mass for signal probability >= {cut:.2f}")
350 p.axis.set_xlabel(
"Mbc")
351 p.save(f
'{filename}_{i}.png')
354 def MonitorROCPlot(particle, filename):
355 """ Creates a ROC plot using ROOT. """
356 if not particle.final_ntuple.valid:
359 [
'extraInfo__bouniqueSignal__bc',
360 'extraInfo__boSignalProbability__bc', particle.particle.mvaConfig.target],
361 [
'unique',
'probability',
'signal'])
363 p.add(df,
'probability', df[
'signal'] == 1, df[
'signal'] == 0, label=
'All')
364 p.add(df,
'probability', (df[
'signal'] == 1) & (df[
'unique'] == 1), (df[
'signal'] == 0) & (df[
'unique'] == 1), label=
'Unique')
366 p.save(filename +
'.png')
369 def MonitorDiagPlot(particle, filename):
370 """ Creates a Diagonal plot using ROOT. """
371 if not particle.final_ntuple.valid:
374 [
'extraInfo__bouniqueSignal__bc',
375 'extraInfo__boSignalProbability__bc', particle.particle.mvaConfig.target],
376 [
'unique',
'probability',
'signal'])
378 p.add(df,
'probability', df[
'signal'] == 1, df[
'signal'] == 0)
381 p.save(filename +
'.png')
384 def MonitoringMCCount(particle):
386 Reads the MC Counts for a given particle from the ROOT file mcParticlesCount.root
387 @param particle the particle for which the MC counts are read
388 @return dictionary with 'sum', 'std', 'avg', 'max', and 'min'
390 root_file = ROOT.TFile(
'mcParticlesCount.root')
392 key = f
'NumberOfMCParticlesInEvent({abs(pdg.from_name(particle.name))})'
395 hist = root_file.Get(key)
397 mc_counts = {
'sum': 0,
'std': 0,
'avg': 0,
'min': 0,
'max': 0}
399 mc_counts[
'sum'] = sum(hist.GetXaxis().GetBinCenter(bin + 1) * hist.GetBinContent(bin + 1)
400 for bin
in range(hist.GetNbinsX()))
401 mc_counts[
'std'] = hist.GetStdDev()
402 mc_counts[
'avg'] = hist.GetMean()
403 mc_counts[
'max'] = hist.GetXaxis().GetBinCenter(hist.FindLastBinAbove(0.0))
404 mc_counts[
'min'] = hist.GetXaxis().GetBinCenter(hist.FindFirstBinAbove(0.0))
408 class MonitoringBranchingFractions(object):
409 """ Class extracts the branching fractions of a decay channel from the DECAY.DEC file. """
415 Create a new MonitoringBranchingFraction object.
416 The extracted branching fractions are cached, hence creating more than one object does not do anything.
418 if MonitoringBranchingFractions._shared
is None:
419 decay_file = get_default_decayfile()
421 self.exclusive_branching_fractions = self.loadExclusiveBranchingFractions(decay_file)
423 self.inclusive_branching_fractions = self.loadInclusiveBranchingFractions(self.exclusive_branching_fractions)
424 MonitoringBranchingFractions._shared = (self.exclusive_branching_fractions, self.inclusive_branching_fractions)
426 self.exclusive_branching_fractions, self.inclusive_branching_fractions = MonitoringBranchingFractions._shared
428 def getExclusive(self, particle):
429 """ Returns the exclusive (i.e. without the branching fractions of the daughters) branching fraction of a particle. """
430 return self.getBranchingFraction(particle, self.exclusive_branching_fractions)
432 def getInclusive(self, particle):
433 """ Returns the inclusive (i.e. including all branching fractions of the daughters) branching fraction of a particle. """
434 return self.getBranchingFraction(particle, self.inclusive_branching_fractions)
436 def getBranchingFraction(self, particle, branching_fractions):
437 """ Returns the branching fraction of a particle given a branching_fraction table. """
438 result = {c.label: 0.0
for c
in particle.channels}
440 channels = [tuple(sorted(d.split(
':')[0]
for d
in channel.daughters))
for channel
in particle.channels]
441 if name
not in branching_fractions:
443 channels = [tuple(
pdg.conjugate(d)
for d
in channel)
for channel
in channels]
444 if name
not in branching_fractions:
446 for c, key
in zip(particle.channels, channels):
447 if key
in branching_fractions[name]:
448 result[c.label] = branching_fractions[name][key]
451 def loadExclusiveBranchingFractions(self, filename):
453 Load branching fraction from MC decay-file.
456 def isFloat(element):
457 """ Checks if element is a convertible to float"""
464 def isValidParticle(element):
465 """ Checks if element is a valid pdg name for a particle"""
472 branching_fractions = {
'UNKOWN': {}}
475 with open(filename,
'r')
as f:
477 fields = line.split(
' ')
478 fields = [x
for x
in fields
if x !=
'']
479 if len(fields) < 2
or fields[0][0] ==
'#':
481 if fields[0] ==
'Decay':
482 mother = fields[1].strip()
483 if not isValidParticle(mother):
486 if fields[0] ==
'Enddecay':
489 if mother ==
'UNKOWN':
492 if len(fields) < 1
or not isFloat(fields[0]):
494 while len(fields) > 1:
495 if isValidParticle(fields[-1]):
498 if len(fields) < 1
or not all(isValidParticle(p)
for p
in fields[1:]):
500 neutrinoTag_list = [
'nu_e',
'nu_mu',
'nu_tau',
'anti-nu_e',
'anti-nu_mu',
'anti-nu_tau']
501 daughters = tuple(sorted(p
for p
in fields[1:]
if p
not in neutrinoTag_list))
502 if mother
not in branching_fractions:
503 branching_fractions[mother] = {}
504 if daughters
not in branching_fractions[mother]:
505 branching_fractions[mother][daughters] = 0.0
506 branching_fractions[mother][daughters] += float(fields[0])
508 del branching_fractions[
'UNKOWN']
509 return branching_fractions
511 def loadInclusiveBranchingFractions(self, exclusive_branching_fractions):
513 Get covered branching fraction of a particle using a recursive algorithm
514 and the given exclusive branching_fractions (given as Hashable List)
515 @param particle identifier of the particle
516 @param branching_fractions
518 particles = set(exclusive_branching_fractions.keys())
520 particles = sorted(particles, key=
lambda x:
pdg.get(x).Mass())
521 inclusive_branching_fractions = copy.deepcopy(exclusive_branching_fractions)
524 if p
in inclusive_branching_fractions:
525 br = sum(inclusive_branching_fractions[p].values())
527 br = sum(inclusive_branching_fractions[
pdg.conjugate(p)].values())
528 for p_br
in inclusive_branching_fractions.values():
530 for i
in range(c.count(p)):
532 return inclusive_branching_fractions
535 class MonitoringParticle(object):
537 Monitoring object containing all the monitoring information
538 about a single particle
542 Read the monitoring information of the given particle
543 @param particle the particle for which the information is read
546 self.particle = particle
548 self.mc_count = MonitoringMCCount(particle)
550 self.module_statistic = MonitoringModuleStatistics(particle)
552 self.time_per_channel = self.module_statistic.channel_time
554 self.time_per_channel_per_module = self.module_statistic.channel_time_per_module
556 self.total_time = self.module_statistic.particle_time + sum(self.time_per_channel.values())
559 self.total_number_of_channels = len(self.particle.channels)
561 self.reconstructed_number_of_channels = 0
564 self.branching_fractions = MonitoringBranchingFractions()
566 self.exc_br_per_channel = self.branching_fractions.getExclusive(particle)
568 self.inc_br_per_channel = self.branching_fractions.getInclusive(particle)
571 self.before_ranking = {}
573 self.after_ranking = {}
575 self.after_vertex = {}
577 self.after_classifier = {}
579 self.training_data = {}
581 self.ignored_channels = {}
583 for channel
in self.particle.channels:
584 hist = MonitoringHist(f
'Monitor_PreReconstruction_BeforeRanking_{channel.label}.root')
585 self.before_ranking[channel.label] = self.calculateStatistic(hist, channel.mvaConfig.target)
586 hist = MonitoringHist(f
'Monitor_PreReconstruction_AfterRanking_{channel.label}.root')
587 self.after_ranking[channel.label] = self.calculateStatistic(hist, channel.mvaConfig.target)
588 hist = MonitoringHist(f
'Monitor_MatchParticleList_AfterVertex_{channel.label}.root')
589 self.after_vertex[channel.label] = self.calculateStatistic(hist, channel.mvaConfig.target)
590 hist = MonitoringHist(f
'Monitor_PostReconstruction_AfterMVA_{channel.label}.root')
591 self.after_classifier[channel.label] = self.calculateStatistic(hist, channel.mvaConfig.target)
592 if hist.valid
and hist.sum(channel.mvaConfig.target) > 0:
593 self.reconstructed_number_of_channels += 1
594 self.ignored_channels[channel.label] =
False
596 self.ignored_channels[channel.label] =
True
597 hist = MonitoringHist(f
'Monitor_TrainingData_{channel.label}.root')
598 self.training_data[channel.label] = hist
600 plist = removeJPsiSlash(particle.identifier)
601 hist = MonitoringHist(f
'Monitor_PostReconstruction_BeforePostCut_{plist}.root')
603 self.before_postcut = self.calculateStatistic(hist, self.particle.mvaConfig.target)
604 hist = MonitoringHist(f
'Monitor_PostReconstruction_BeforeRanking_{plist}.root')
606 self.before_ranking_postcut = self.calculateStatistic(hist, self.particle.mvaConfig.target)
607 hist = MonitoringHist(f
'Monitor_PostReconstruction_AfterRanking_{plist}.root')
609 self.after_ranking_postcut = self.calculateStatistic(hist, self.particle.mvaConfig.target)
611 self.before_tag = self.calculateStatistic(hist, self.particle.mvaConfig.target)
613 self.after_tag = self.calculateUniqueStatistic(hist)
615 self.final_ntuple = MonitoringNTuple(f
'Monitor_Final_{plist}.root')
617 def calculateStatistic(self, hist, target):
619 Calculate Statistic object where all signal candidates are considered signal
621 nTrueSig = self.mc_count[
'sum']
623 return Statistic(nTrueSig, 0, 0)
624 signal_bins = (hist.centers[target] > 0.5)
625 bckgrd_bins = ~signal_bins
626 nSig = hist.values[target][signal_bins].sum()
627 nBg = hist.values[target][bckgrd_bins].sum()
628 return Statistic(nTrueSig, nSig, nBg)
630 def calculateUniqueStatistic(self, hist):
632 Calculate Static object where only unique signal candidates are considered signal
634 nTrueSig = self.mc_count[
'sum']
636 return Statistic(nTrueSig, 0, 0)
637 signal_bins = hist.centers[
'extraInfo(uniqueSignal)'] > 0.5
638 bckgrd_bins = hist.centers[
'extraInfo(uniqueSignal)'] <= 0.5
639 nSig = hist.values[
'extraInfo(uniqueSignal)'][signal_bins].sum()
640 nBg = hist.values[
'extraInfo(uniqueSignal)'][bckgrd_bins].sum()
641 return Statistic(nTrueSig, nSig, nBg)