14 Contains classes to read in the monitoring output
15 and some simple plotting routines.
17 This is used by printReporting.py and latexReporting.py
18 to create summaries for a FEI training or application.
22 from generators
import get_default_decayfile
23 except ModuleNotFoundError:
24 print(
"MonitoringBranchingFractions won't work.")
25 from basf2_mva_evaluation
import plotting
33 from ROOT
import Belle2
35 from ROOT
import gSystem
36 gSystem.Load(
'libanalysis.so')
40 def removeJPsiSlash(string):
41 """ Remove slashes in a string, which is not allowed for filenames. """
42 return string.replace(
'/',
'')
46 """ Load the FEI configuration from the Summary.pickle file. """
47 if not os.path.isfile(
'Summary.pickle'):
48 raise RuntimeError(
"""Could not find Summary.pickle!
49 This file is automatically created by the FEI training.
50 But you can also create it yourself using:
51 pickle.dump((particles, configuration), open('Summary.pickle', 'wb'))""")
52 return pickle.load(open(
'Summary.pickle',
'rb'))
57 This class provides the efficiency, purity and other quantities for a
58 given number of true signal candidates, signal candidates and background candidates
61 def __init__(self, nTrueSig, nSig, nBg):
63 Create a new Statistic object
64 @param nTrueSig the number of true signal particles
65 @param nSig the number of reconstructed signal candidates
66 @param nBg the number of reconstructed background candidates
69 self.nTrueSig = nTrueSig
77 """ Returns total number of reconstructed candidates. """
78 return self.nSig + self.nBg
82 """ Returns the purity of the reconstructed candidates. """
87 return self.nSig / float(self.nTotal)
91 """ Returns the efficiency of the reconstructed signal candidates with respect to the number of true signal particles. """
94 if self.nTrueSig == 0:
96 return self.nSig / float(self.nTrueSig)
99 def purityError(self):
100 """ Returns the uncertainty of the purity. """
103 return self.calcStandardDeviation(self.nSig, self.nTotal)
106 def efficiencyError(self):
108 Returns the uncertainty of the efficiency.
109 For an efficiency eps = self.nSig/self.nTrueSig, this function calculates the
110 standard deviation according to http://arxiv.org/abs/physics/0701199 .
112 if self.nTrueSig == 0:
114 return self.calcStandardDeviation(self.nSig, self.nTrueSig)
116 def calcStandardDeviation(self, k, n):
117 """ Helper method to calculate the standard deviation for efficiencies. """
120 variance = (k + 1) * (k + 2) / ((n + 2) * (n + 3)) - (k + 1) ** 2 / ((n + 2) ** 2)
123 return math.sqrt(variance)
126 """ Returns a string representation of a Statistic object. """
127 o = f
"nTrueSig {self.nTrueSig} nSig {self.nSig} nBg {self.nBg}\n"
128 o += f
"Efficiency {self.efficiency:.3f} ({self.efficiencyError:.3f})\n"
129 o += f
"Purity {self.purity:.3f} ({self.purityError:.3f})\n"
132 def __add__(self, a):
133 """ Adds two Statistics objects and returns a new object. """
134 return Statistic(self.nTrueSig, self.nSig + a.nSig, self.nBg + a.nBg)
136 def __radd__(self, a):
138 Returns a new Statistic object if the current one is added to zero.
139 Necessary to apply sum-function to Statistic objects.
142 return NotImplemented
143 return Statistic(self.nTrueSig, self.nSig, self.nBg)
146 class MonitoringHist:
148 Reads all TH1F and TH2F from a ROOT file
149 and puts them into a more accessible format.
152 def __init__(self, filename, dirname):
154 Reads histograms from the given file
155 @param filename the name of the ROOT file
164 self.valid = os.path.isfile(filename)
169 f = ROOT.TFile.Open(filename,
'read')
172 for key
in d.GetListOfKeys():
175 if not (isinstance(hist, ROOT.TH1D)
or isinstance(hist, ROOT.TH1F)
or
176 isinstance(hist, ROOT.TH2D)
or isinstance(hist, ROOT.TH2F)):
178 two_dimensional = isinstance(hist, ROOT.TH2D)
or isinstance(hist, ROOT.TH2F)
180 nbins = (hist.GetNbinsX(), hist.GetNbinsY())
181 self.centers[name] = np.array([[hist.GetXaxis().GetBinCenter(i)
for i
in range(nbins[0] + 2)],
182 [hist.GetYaxis().GetBinCenter(i)
for i
in range(nbins[1] + 2)]])
183 self.values[name] = np.array([[hist.GetBinContent(i, j)
for i
in range(nbins[0] + 2)]
for j
in range(nbins[1] + 2)])
184 self.nbins[name] = nbins
186 nbins = hist.GetNbinsX()
187 self.centers[name] = np.array([hist.GetBinCenter(i)
for i
in range(nbins + 2)])
188 self.values[name] = np.array([hist.GetBinContent(i)
for i
in range(nbins + 2)])
189 self.nbins[name] = nbins
193 Calculates the sum of a given histogram (== sum of all entries)
194 @param name key of the histogram
196 if name
not in self.centers:
198 return np.sum(self.values[name])
200 def mean(self, name):
202 Calculates the mean of a given histogram
203 @param name key of the histogram
205 if name
not in self.centers:
207 return np.average(self.centers[name], weights=self.values[name])
211 Calculates the standard deviation of a given histogram
212 @param name key of the histogram
214 if name
not in self.centers:
216 avg = np.average(self.centers[name], weights=self.values[name])
217 return np.sqrt(np.average((self.centers[name] - avg)**2, weights=self.values[name]))
221 Calculates the minimum of a given histogram
222 @param name key of the histogram
224 if name
not in self.centers:
226 nonzero = np.nonzero(self.values[name])[0]
227 if len(nonzero) == 0:
229 return self.centers[name][nonzero[0]]
233 Calculates the maximum of a given histogram
234 @param name key of the histogram
236 if name
not in self.centers:
238 nonzero = np.nonzero(self.values[name])[0]
239 if len(nonzero) == 0:
241 return self.centers[name][nonzero[-1]]
244 class MonitoringNTuple:
246 Reads the ntuple named variables from a ROOT file
249 def __init__(self, filename, treenameprefix):
251 Reads ntuple from the given file
252 @param filename the name of the ROOT file
255 self.valid = os.path.isfile(filename)
259 self.f = ROOT.TFile.Open(filename,
'read')
261 self.tree = self.f.Get(f
'{treenameprefix} variables')
263 self.filename = filename
266 class MonitoringModuleStatistics:
268 Reads the module statistics for a single particle from the outputted root file
269 and puts them into a more accessible format
272 def __init__(self, particle):
274 Reads the module statistics from the file named Monitor_ModuleStatistics.root
275 @param particle the particle for which the statistics are read
277 root_file = ROOT.TFile.Open(
'Monitor_ModuleStatistics.root',
'read')
278 persistentTree = root_file.Get(
'persistent')
279 persistentTree.GetEntry(0)
281 stats = persistentTree.ProcessStatistics.Clone()
284 numEntries = persistentTree.GetEntriesFast()
285 for i
in range(1, numEntries):
286 persistentTree.GetEntry(i)
287 stats.merge(persistentTree.ProcessStatistics)
290 statistic = {m.getName(): m.getTimeSum(m.c_Event) / 1e9
for m
in stats.getAll()}
293 self.channel_time = {}
295 self.channel_time_per_module = {}
296 for channel
in particle.channels:
297 if channel.label
not in self.channel_time:
298 self.channel_time[channel.label] = 0.0
299 self.channel_time_per_module[channel.label] = {
'ParticleCombiner': 0.0,
300 'BestCandidateSelection': 0.0,
301 'PListCutAndCopy': 0.0,
302 'VariablesToExtraInfo': 0.0,
304 'ParticleSelector': 0.0,
306 'ParticleVertexFitter': 0.0,
307 'TagUniqueSignal': 0.0,
308 'VariablesToHistogram': 0.0,
309 'VariablesToNtuple': 0.0}
310 for key, time
in statistic.items():
311 if(channel.decayString
in key
or channel.name
in key):
312 self.channel_time[channel.label] += time
313 for k
in self.channel_time_per_module[channel.label]:
315 self.channel_time_per_module[channel.label][k] += time
318 self.particle_time = 0
319 for key, time
in statistic.items():
320 if particle.identifier
in key:
321 self.particle_time += time
324 def MonitorCosBDLPlot(particle, filename):
325 """ Creates a CosBDL plot using ROOT. """
326 if not particle.final_ntuple.valid:
329 [
'extraInfo__bouniqueSignal__bc',
'cosThetaBetweenParticleAndNominalB',
330 'extraInfo__boSignalProbability__bc', particle.particle.mvaConfig.target],
331 [
'unique',
'cosThetaBDl',
'probability',
'signal'])
332 for i, cut
in enumerate([0.0, 0.01, 0.05, 0.1, 0.2, 0.5]):
334 common = (np.abs(df[
'cosThetaBDl']) < 10) & (df[
'probability'] >= cut)
335 p.add(df,
'cosThetaBDl', common & (df[
'signal'] == 1), label=
"Signal")
336 p.add(df,
'cosThetaBDl', common & (df[
'signal'] == 0), label=
"Background")
338 p.axis.set_title(f
"Cosine of Theta between B and Dl system for signal probability >= {cut:.2f}")
339 p.axis.set_xlabel(
"CosThetaBDl")
340 p.save(f
'{filename}_{i}.png')
343 def MonitorMbcPlot(particle, filename):
344 """ Creates a Mbc plot using ROOT. """
345 if not particle.final_ntuple.valid:
348 [
'extraInfo__bouniqueSignal__bc',
'Mbc',
349 'extraInfo__boSignalProbability__bc', particle.particle.mvaConfig.target],
350 [
'unique',
'Mbc',
'probability',
'signal'])
351 for i, cut
in enumerate([0.0, 0.01, 0.05, 0.1, 0.2, 0.5]):
353 common = (df[
'Mbc'] > 5.23) & (df[
'probability'] >= cut)
354 p.add(df,
'Mbc', common & (df[
'signal'] == 1), label=
"Signal")
355 p.add(df,
'Mbc', common & (df[
'signal'] == 0), label=
"Background")
357 p.axis.set_title(f
"Beam constrained mass for signal probability >= {cut:.2f}")
358 p.axis.set_xlabel(
"Mbc")
359 p.save(f
'{filename}_{i}.png')
362 def MonitorROCPlot(particle, filename):
363 """ Creates a ROC plot using ROOT. """
364 if not particle.final_ntuple.valid:
367 [
'extraInfo__bouniqueSignal__bc',
368 'extraInfo__boSignalProbability__bc', particle.particle.mvaConfig.target],
369 [
'unique',
'probability',
'signal'])
371 p.add(df,
'probability', df[
'signal'] == 1, df[
'signal'] == 0, label=
'All')
373 p.save(filename +
'.png')
376 def MonitorDiagPlot(particle, filename):
377 """ Creates a Diagonal plot using ROOT. """
378 if not particle.final_ntuple.valid:
381 [
'extraInfo__bouniqueSignal__bc',
382 'extraInfo__boSignalProbability__bc', particle.particle.mvaConfig.target],
383 [
'unique',
'probability',
'signal'])
385 p.add(df,
'probability', df[
'signal'] == 1, df[
'signal'] == 0)
387 p.save(filename +
'.png')
390 def MonitoringMCCount(particle):
392 Reads the MC Counts for a given particle from the ROOT file mcParticlesCount.root
393 @param particle the particle for which the MC counts are read
394 @return dictionary with 'sum', 'std', 'avg', 'max', and 'min'
396 root_file = ROOT.TFile.Open(
'mcParticlesCount.root',
'read')
398 key = f
'NumberOfMCParticlesInEvent({abs(pdg.from_name(particle.name))})'
401 hist = root_file.Get(key)
403 mc_counts = {
'sum': 0,
'std': 0,
'avg': 0,
'min': 0,
'max': 0}
405 mc_counts[
'sum'] = sum(hist.GetXaxis().GetBinCenter(bin + 1) * hist.GetBinContent(bin + 1)
406 for bin
in range(hist.GetNbinsX()))
407 mc_counts[
'std'] = hist.GetStdDev()
408 mc_counts[
'avg'] = hist.GetMean()
409 mc_counts[
'max'] = hist.GetXaxis().GetBinCenter(hist.FindLastBinAbove(0.0))
410 mc_counts[
'min'] = hist.GetXaxis().GetBinCenter(hist.FindFirstBinAbove(0.0))
414 class MonitoringBranchingFractions:
415 """ Class extracts the branching fractions of a decay channel from the DECAY.DEC file. """
421 Create a new MonitoringBranchingFraction object.
422 The extracted branching fractions are cached, hence creating more than one object does not do anything.
424 if MonitoringBranchingFractions._shared
is None:
425 decay_file = get_default_decayfile()
427 self.exclusive_branching_fractions = self.loadExclusiveBranchingFractions(decay_file)
429 self.inclusive_branching_fractions = self.loadInclusiveBranchingFractions(self.exclusive_branching_fractions)
430 MonitoringBranchingFractions._shared = (self.exclusive_branching_fractions, self.inclusive_branching_fractions)
432 self.exclusive_branching_fractions, self.inclusive_branching_fractions = MonitoringBranchingFractions._shared
434 def getExclusive(self, particle):
435 """ Returns the exclusive (i.e. without the branching fractions of the daughters) branching fraction of a particle. """
436 return self.getBranchingFraction(particle, self.exclusive_branching_fractions)
438 def getInclusive(self, particle):
439 """ Returns the inclusive (i.e. including all branching fractions of the daughters) branching fraction of a particle. """
440 return self.getBranchingFraction(particle, self.inclusive_branching_fractions)
442 def getBranchingFraction(self, particle, branching_fractions):
443 """ Returns the branching fraction of a particle given a branching_fraction table. """
444 result = {c.label: 0.0
for c
in particle.channels}
446 channels = [tuple(sorted(d.split(
':')[0]
for d
in channel.daughters))
for channel
in particle.channels]
447 if name
not in branching_fractions:
449 channels = [tuple(
pdg.conjugate(d)
for d
in channel)
for channel
in channels]
450 if name
not in branching_fractions:
452 for c, key
in zip(particle.channels, channels):
453 if key
in branching_fractions[name]:
454 result[c.label] = branching_fractions[name][key]
457 def loadExclusiveBranchingFractions(self, filename):
459 Load branching fraction from MC decay-file.
462 def isFloat(element):
463 """ Checks if element is a convertible to float"""
470 def isValidParticle(element):
471 """ Checks if element is a valid pdg name for a particle"""
478 branching_fractions = {
'UNKOWN': {}}
481 with open(filename)
as f:
483 fields = line.split(
' ')
484 fields = [x
for x
in fields
if x !=
'']
485 if len(fields) < 2
or fields[0][0] ==
'#':
487 if fields[0] ==
'Decay':
488 mother = fields[1].strip()
489 if not isValidParticle(mother):
492 if fields[0] ==
'Enddecay':
495 if mother ==
'UNKOWN':
498 if len(fields) < 1
or not isFloat(fields[0]):
500 while len(fields) > 1:
501 if isValidParticle(fields[-1]):
504 if len(fields) < 1
or not all(isValidParticle(p)
for p
in fields[1:]):
506 neutrinoTag_list = [
'nu_e',
'nu_mu',
'nu_tau',
'anti-nu_e',
'anti-nu_mu',
'anti-nu_tau']
507 daughters = tuple(sorted(p
for p
in fields[1:]
if p
not in neutrinoTag_list))
508 if mother
not in branching_fractions:
509 branching_fractions[mother] = {}
510 if daughters
not in branching_fractions[mother]:
511 branching_fractions[mother][daughters] = 0.0
512 branching_fractions[mother][daughters] += float(fields[0])
514 del branching_fractions[
'UNKOWN']
515 return branching_fractions
517 def loadInclusiveBranchingFractions(self, exclusive_branching_fractions):
519 Get covered branching fraction of a particle using a recursive algorithm
520 and the given exclusive branching_fractions (given as Hashable List)
521 @param particle identifier of the particle
522 @param branching_fractions
524 particles = set(exclusive_branching_fractions.keys())
526 particles = sorted(particles, key=
lambda x:
pdg.get(x).Mass())
527 inclusive_branching_fractions = copy.deepcopy(exclusive_branching_fractions)
530 if p
in inclusive_branching_fractions:
531 br = sum(inclusive_branching_fractions[p].values())
533 br = sum(inclusive_branching_fractions[
pdg.conjugate(p)].values())
534 for p_br
in inclusive_branching_fractions.values():
536 for i
in range(c.count(p)):
538 return inclusive_branching_fractions
541 class MonitoringParticle:
543 Monitoring object containing all the monitoring information
544 about a single particle
547 def __init__(self, particle):
549 Read the monitoring information of the given particle
550 @param particle the particle for which the information is read
553 self.particle = particle
555 self.mc_count = MonitoringMCCount(particle)
557 self.module_statistic = MonitoringModuleStatistics(particle)
559 self.time_per_channel = self.module_statistic.channel_time
561 self.time_per_channel_per_module = self.module_statistic.channel_time_per_module
563 self.total_time = self.module_statistic.particle_time + sum(self.time_per_channel.values())
566 self.total_number_of_channels = len(self.particle.channels)
568 self.reconstructed_number_of_channels = 0
571 self.branching_fractions = MonitoringBranchingFractions()
573 self.exc_br_per_channel = self.branching_fractions.getExclusive(particle)
575 self.inc_br_per_channel = self.branching_fractions.getInclusive(particle)
578 self.before_ranking = {}
580 self.after_ranking = {}
582 self.after_vertex = {}
584 self.after_classifier = {}
586 self.training_data = {}
588 self.ignored_channels = {}
590 for channel
in self.particle.channels:
591 hist = MonitoringHist(f
'Monitor_PreReconstruction_BeforeRanking.root', f
'{channel.label}')
592 self.before_ranking[channel.label] = self.calculateStatistic(hist, channel.mvaConfig.target)
593 hist = MonitoringHist(f
'Monitor_PreReconstruction_AfterRanking.root', f
'{channel.label}')
594 self.after_ranking[channel.label] = self.calculateStatistic(hist, channel.mvaConfig.target)
595 hist = MonitoringHist(f
'Monitor_PreReconstruction_AfterVertex.root', f
'{channel.label}')
596 self.after_vertex[channel.label] = self.calculateStatistic(hist, channel.mvaConfig.target)
597 hist = MonitoringHist(f
'Monitor_PostReconstruction_AfterMVA.root', f
'{channel.label}')
598 self.after_classifier[channel.label] = self.calculateStatistic(hist, channel.mvaConfig.target)
599 if hist.valid
and hist.sum(channel.mvaConfig.target) > 0:
600 self.reconstructed_number_of_channels += 1
601 self.ignored_channels[channel.label] =
False
603 self.ignored_channels[channel.label] =
True
604 hist = MonitoringHist(f
'Monitor_TrainingData.root', f
'{channel.label}')
605 self.training_data[channel.label] = hist
607 plist = removeJPsiSlash(particle.identifier)
608 hist = MonitoringHist(f
'Monitor_PostReconstruction_BeforePostCut.root', f
'{plist}')
610 self.before_postcut = self.calculateStatistic(hist, self.particle.mvaConfig.target)
611 hist = MonitoringHist(f
'Monitor_PostReconstruction_BeforeRanking.root', f
'{plist}')
613 self.before_ranking_postcut = self.calculateStatistic(hist, self.particle.mvaConfig.target)
614 hist = MonitoringHist(f
'Monitor_PostReconstruction_AfterRanking.root', f
'{plist}')
616 self.after_ranking_postcut = self.calculateStatistic(hist, self.particle.mvaConfig.target)
618 self.before_tag = self.calculateStatistic(hist, self.particle.mvaConfig.target)
620 self.after_tag = self.calculateUniqueStatistic(hist)
622 self.final_ntuple = MonitoringNTuple(f
'Monitor_Final.root', f
'{plist}')
624 def calculateStatistic(self, hist, target):
626 Calculate Statistic object where all signal candidates are considered signal
628 nTrueSig = self.mc_count[
'sum']
630 return Statistic(nTrueSig, 0, 0)
631 signal_bins = (hist.centers[target] > 0.5)
632 bckgrd_bins = ~signal_bins
633 nSig = hist.values[target][signal_bins].sum()
634 nBg = hist.values[target][bckgrd_bins].sum()
635 return Statistic(nTrueSig, nSig, nBg)
637 def calculateUniqueStatistic(self, hist):
639 Calculate Static object where only unique signal candidates are considered signal
641 nTrueSig = self.mc_count[
'sum']
643 return Statistic(nTrueSig, 0, 0)
644 signal_bins = hist.centers[
'extraInfo(uniqueSignal)'] > 0.5
645 bckgrd_bins = hist.centers[
'extraInfo(uniqueSignal)'] <= 0.5
646 nSig = hist.values[
'extraInfo(uniqueSignal)'][signal_bins].sum()
647 nBg = hist.values[
'extraInfo(uniqueSignal)'][bckgrd_bins].sum()
648 return Statistic(nTrueSig, nSig, nBg)
def tree2dict(tree, tree_columns, dict_columns=None)
Global list of available variables.
static Manager & Instance()
get singleton instance.
std::string makeROOTCompatible(std::string str)
Remove special characters that ROOT dislikes in branch names, e.g.
std::string invertMakeROOTCompatible(std::string str)
Invert makeROOTCompatible operation.