release-08-01-10/doxygen/monitoring_8py_source.html

 #!/usr/bin/env python


 # @cond SUPPRESS_DOXYGEN


 """

  Contains classes to read in the monitoring output

  and some simple plotting routines.


  This is used by printReporting.py and latexReporting.py

  to create summaries for a FEI training or application.

 """


 try:

     from generators import get_default_decayfile

 except ModuleNotFoundError:

     print("MonitoringBranchingFractions won't work.")

 from basf2_mva_evaluation import plotting

 import basf2_mva_util

 import pickle

 import copy

 import math

 import os

 import numpy as np

 import pdg


 def removeJPsiSlash(string):

     """ Remove slashes in a string, which is not allowed for filenames. """

     return string.replace('/', '')


 def load_config():

     """ Load the FEI configuration from the Summary.pickle file. """

     if not os.path.isfile('Summary.pickle'):

         raise RuntimeError("""Could not find Summary.pickle!

                               This file is automatically created by the FEI training.

                               But you can also create it yourself using:

                               pickle.dump((particles, configuration), open('Summary.pickle', 'wb'))""")

     return pickle.load(open('Summary.pickle', 'rb'))


 class Statistic:

     """

     This class provides the efficiency, purity and other quantities for a

     given number of true signal candidates, signal candidates and background candidates

     """


     def __init__(self, nTrueSig, nSig, nBg):

         """

         Create a new Statistic object

         @param nTrueSig the number of true signal particles

         @param nSig the number of reconstructed signal candidates

         @param nBg the number of reconstructed background candidates

         """


         self.nTrueSig = nTrueSig


         self.nSig = nSig


         self.nBg = nBg


     @property

     def nTotal(self):

         """ Returns total number of reconstructed candidates. """

         return self.nSig + self.nBg


     @property

     def purity(self):

         """ Returns the purity of the reconstructed candidates. """

         if self.nSig == 0:

             return 0.0

         if self.nTotal == 0:

             return 0.0

         return self.nSig / float(self.nTotal)


     @property

     def efficiency(self):

         """ Returns the efficiency of the reconstructed signal candidates with respect to the number of true signal particles. """

         if self.nSig == 0:

             return 0.0

         if self.nTrueSig == 0:

             return float('inf')

         return self.nSig / float(self.nTrueSig)


     @property

     def purityError(self):

         """ Returns the uncertainty of the purity. """

         if self.nTotal == 0:

             return 0.0

         return self.calcStandardDeviation(self.nSig, self.nTotal)


     @property

     def efficiencyError(self):

         """

         Returns the uncertainty of the efficiency.

         For an efficiency eps = self.nSig/self.nTrueSig, this function calculates the

         standard deviation according to http://arxiv.org/abs/physics/0701199 .

         """

         if self.nTrueSig == 0:

             return float('inf')

         return self.calcStandardDeviation(self.nSig, self.nTrueSig)


     def calcStandardDeviation(self, k, n):

         """ Helper method to calculate the standard deviation for efficiencies. """

         k = float(k)

         n = float(n)

         variance = (k + 1) * (k + 2) / ((n + 2) * (n + 3)) - (k + 1) ** 2 / ((n + 2) ** 2)

         if variance <= 0:

             return 0.0

         return math.sqrt(variance)


     def __str__(self):

         """ Returns a string representation of a Statistic object. """

         o = f"nTrueSig {self.nTrueSig}    nSig {self.nSig}    nBg {self.nBg}\n"

         o += f"Efficiency {self.efficiency:.3f} ({self.efficiencyError:.3f})\n"

         o += f"Purity {self.purity:.3f} ({self.purityError:.3f})\n"

         return o


     def __add__(self, a):

         """ Adds two Statistics objects and returns a new object. """

         return Statistic(self.nTrueSig, self.nSig + a.nSig, self.nBg + a.nBg)


     def __radd__(self, a):

         """

         Returns a new Statistic object if the current one is added to zero.

         Necessary to apply sum-function to Statistic objects.

         """

         if a != 0:

             return NotImplemented

         return Statistic(self.nTrueSig, self.nSig, self.nBg)


 class MonitoringHist:

     """

     Reads all TH1F and TH2F from a ROOT file

     and puts them into a more accessible format.

     """


     def __init__(self, filename, dirname):

         """

         Reads histograms from the given file

         @param filename the name of the ROOT file

         """

         # Always avoid the top-level 'import ROOT'.

         import ROOT  # noqa


         self.values = {}


         self.centers = {}


         self.nbins = {}


         self.valid = os.path.isfile(filename)


         if not self.valid:

             return


         f = ROOT.TFile.Open(filename, 'read')

         d = f.Get(ROOT.Belle2.MakeROOTCompatible.makeROOTCompatible(dirname))


         for key in d.GetListOfKeys():

             name = ROOT.Belle2.MakeROOTCompatible.invertMakeROOTCompatible(key.GetName())

             hist = key.ReadObj()

             if not (isinstance(hist, ROOT.TH1D) or isinstance(hist, ROOT.TH1F) or

                     isinstance(hist, ROOT.TH2D) or isinstance(hist, ROOT.TH2F)):

                 continue

             two_dimensional = isinstance(hist, ROOT.TH2D) or isinstance(hist, ROOT.TH2F)

             if two_dimensional:

                 nbins = (hist.GetNbinsX(), hist.GetNbinsY())

                 self.centers[name] = np.array([[hist.GetXaxis().GetBinCenter(i) for i in range(nbins[0] + 2)],

                                                [hist.GetYaxis().GetBinCenter(i) for i in range(nbins[1] + 2)]])

                 self.values[name] = np.array([[hist.GetBinContent(i, j) for i in range(nbins[0] + 2)] for j in range(nbins[1] + 2)])

                 self.nbins[name] = nbins

             else:

                 nbins = hist.GetNbinsX()

                 self.centers[name] = np.array([hist.GetBinCenter(i) for i in range(nbins + 2)])

                 self.values[name] = np.array([hist.GetBinContent(i) for i in range(nbins + 2)])

                 self.nbins[name] = nbins


     def sum(self, name):

         """

         Calculates the sum of a given histogram (== sum of all entries)

         @param name key of the histogram

         """

         if name not in self.centers:

             return np.nan

         return np.sum(self.values[name])


     def mean(self, name):

         """

         Calculates the mean of a given histogram

         @param name key of the histogram

         """

         if name not in self.centers:

             return np.nan

         return np.average(self.centers[name], weights=self.values[name])


     def std(self, name):

         """

         Calculates the standard deviation of a given histogram

         @param name key of the histogram

         """

         if name not in self.centers:

             return np.nan

         avg = np.average(self.centers[name], weights=self.values[name])

         return np.sqrt(np.average((self.centers[name] - avg)**2, weights=self.values[name]))


     def min(self, name):

         """

         Calculates the minimum of a given histogram

         @param name key of the histogram

         """

         if name not in self.centers:

             return np.nan

         nonzero = np.nonzero(self.values[name])[0]

         if len(nonzero) == 0:

             return np.nan

         return self.centers[name][nonzero[0]]


     def max(self, name):

         """

         Calculates the maximum of a given histogram

         @param name key of the histogram

         """

         if name not in self.centers:

             return np.nan

         nonzero = np.nonzero(self.values[name])[0]

         if len(nonzero) == 0:

             return np.nan

         return self.centers[name][nonzero[-1]]


 class MonitoringNTuple:

     """

     Reads the ntuple named variables from a ROOT file

     """


     def __init__(self, filename, treenameprefix):

         """

         Reads ntuple from the given file

         @param filename the name of the ROOT file

         """

         # Always avoid the top-level 'import ROOT'.

         import ROOT  # noqa


         self.valid = os.path.isfile(filename)

         if not self.valid:

             return


         self.f = ROOT.TFile.Open(filename, 'read')


         self.tree = self.f.Get(f'{treenameprefix} variables')


         self.filename = filename


 class MonitoringModuleStatistics:

     """

     Reads the module statistics for a single particle from the outputted root file

     and puts them into a more accessible format

     """


     def __init__(self, particle):

         """

         Reads the module statistics from the file named Monitor_ModuleStatistics.root

         @param particle the particle for which the statistics are read

         """

         # Always avoid the top-level 'import ROOT'.

         import ROOT  # noqa

         root_file = ROOT.TFile.Open('Monitor_ModuleStatistics.root', 'read')

         persistentTree = root_file.Get('persistent')

         persistentTree.GetEntry(0)

         # Clone() needed so we actually own the object (original dies when tfile is deleted)

         stats = persistentTree.ProcessStatistics.Clone()


         # merge statistics from all persistent trees into 'stats'

         numEntries = persistentTree.GetEntriesFast()

         for i in range(1, numEntries):

             persistentTree.GetEntry(i)

             stats.merge(persistentTree.ProcessStatistics)


         # TODO .getTimeSum returns always 0 at the moment ?!

         statistic = {m.getName(): m.getTimeSum(m.c_Event) / 1e9 for m in stats.getAll()}


         self.channel_time = {}


         self.channel_time_per_module = {}

         for channel in particle.channels:

             if channel.label not in self.channel_time:

                 self.channel_time[channel.label] = 0.0

                 self.channel_time_per_module[channel.label] = {'ParticleCombiner': 0.0,

                                                                'BestCandidateSelection': 0.0,

                                                                'PListCutAndCopy': 0.0,

                                                                'VariablesToExtraInfo': 0.0,

                                                                'MCMatch': 0.0,

                                                                'ParticleSelector': 0.0,

                                                                'MVAExpert': 0.0,

                                                                'ParticleVertexFitter': 0.0,

                                                                'TagUniqueSignal': 0.0,

                                                                'VariablesToHistogram': 0.0,

                                                                'VariablesToNtuple': 0.0}

             for key, time in statistic.items():

                 if(channel.decayString in key or channel.name in key):

                     self.channel_time[channel.label] += time

                     for k in self.channel_time_per_module[channel.label]:

                         if k in key:

                             self.channel_time_per_module[channel.label][k] += time


         self.particle_time = 0

         for key, time in statistic.items():

             if particle.identifier in key:

                 self.particle_time += time


 def MonitorCosBDLPlot(particle, filename):

     """ Creates a CosBDL plot using ROOT. """

     if not particle.final_ntuple.valid:

         return

     df = basf2_mva_util.tree2dict(particle.final_ntuple.tree,

                                   ['extraInfo__bouniqueSignal__bc', 'cosThetaBetweenParticleAndNominalB',

                                    'extraInfo__boSignalProbability__bc', particle.particle.mvaConfig.target],

                                   ['unique', 'cosThetaBDl', 'probability', 'signal'])

     for i, cut in enumerate([0.0, 0.01, 0.05, 0.1, 0.2, 0.5]):

         p = plotting.VerboseDistribution(range_in_std=5.0)

         common = (np.abs(df['cosThetaBDl']) < 10) & (df['probability'] >= cut)

         df = df[common]

         p.add(df, 'cosThetaBDl', (df['signal'] == 1), label="Signal")

         p.add(df, 'cosThetaBDl', (df['signal'] == 0), label="Background")

         p.finish()

         p.axis.set_title(f"Cosine of Theta between B and Dl system for signal probability >= {cut:.2f}")

         p.axis.set_xlabel("CosThetaBDl")

         p.save(f'{filename}_{i}.png')


 def MonitorMbcPlot(particle, filename):

     """ Creates a Mbc plot using ROOT. """

     if not particle.final_ntuple.valid:

         return

     df = basf2_mva_util.tree2dict(particle.final_ntuple.tree,

                                   ['extraInfo__bouniqueSignal__bc', 'Mbc',

                                    'extraInfo__boSignalProbability__bc', particle.particle.mvaConfig.target],

                                   ['unique', 'Mbc', 'probability', 'signal'])

     for i, cut in enumerate([0.0, 0.01, 0.05, 0.1, 0.2, 0.5]):

         p = plotting.VerboseDistribution(range_in_std=5.0)

         common = (df['Mbc'] > 5.23) & (df['probability'] >= cut)

         df = df[common]

         p.add(df, 'Mbc', (df['signal'] == 1), label="Signal")

         p.add(df, 'Mbc', (df['signal'] == 0), label="Background")

         p.finish()

         p.axis.set_title(f"Beam constrained mass for signal probability >= {cut:.2f}")

         p.axis.set_xlabel("Mbc")

         p.save(f'{filename}_{i}.png')


 def MonitorROCPlot(particle, filename):

     """ Creates a ROC plot using ROOT. """

     if not particle.final_ntuple.valid:

         return

     df = basf2_mva_util.tree2dict(particle.final_ntuple.tree,

                                   ['extraInfo__bouniqueSignal__bc',

                                    'extraInfo__boSignalProbability__bc', particle.particle.mvaConfig.target],

                                   ['unique', 'probability', 'signal'])

     p = plotting.RejectionOverEfficiency()

     p.add(df, 'probability', df['signal'] == 1, df['signal'] == 0, label='All')

     p.finish()

     p.save(filename + '.png')


 def MonitorDiagPlot(particle, filename):

     """ Creates a Diagonal plot using ROOT. """

     if not particle.final_ntuple.valid:

         return

     df = basf2_mva_util.tree2dict(particle.final_ntuple.tree,

                                   ['extraInfo__bouniqueSignal__bc',

                                    'extraInfo__boSignalProbability__bc', particle.particle.mvaConfig.target],

                                   ['unique', 'probability', 'signal'])

     p = plotting.Diagonal()

     p.add(df, 'probability', df['signal'] == 1, df['signal'] == 0)

     p.finish()

     p.save(filename + '.png')


 def MonitoringMCCount(particle):

     """

     Reads the MC Counts for a given particle from the ROOT file mcParticlesCount.root

     @param particle the particle for which the MC counts are read

     @return dictionary with 'sum', 'std', 'avg', 'max', and 'min'

     """

     # Always avoid the top-level 'import ROOT'.

     import ROOT  # noqa

     root_file = ROOT.TFile.Open('mcParticlesCount.root', 'read')


     key = f'NumberOfMCParticlesInEvent({abs(pdg.from_name(particle.name))})'


     key = ROOT.Belle2.MakeROOTCompatible.makeROOTCompatible(key)

     hist = root_file.Get(key)


     mc_counts = {'sum': 0, 'std': 0, 'avg': 0, 'min': 0, 'max': 0}

     if hist:

         mc_counts['sum'] = sum(hist.GetXaxis().GetBinCenter(bin + 1) * hist.GetBinContent(bin + 1)

                                for bin in range(hist.GetNbinsX()))

         mc_counts['std'] = hist.GetStdDev()

         mc_counts['avg'] = hist.GetMean()

         mc_counts['max'] = hist.GetXaxis().GetBinCenter(hist.FindLastBinAbove(0.0))

         mc_counts['min'] = hist.GetXaxis().GetBinCenter(hist.FindFirstBinAbove(0.0))

     return mc_counts


 class MonitoringBranchingFractions:

     """ Class extracts the branching fractions of a decay channel from the DECAY.DEC file. """


     _shared = None


     def __init__(self):

         """

         Create a new MonitoringBranchingFraction object.

         The extracted branching fractions are cached, hence creating more than one object does not do anything.

         """

         if MonitoringBranchingFractions._shared is None:

             decay_file = get_default_decayfile()


             self.exclusive_branching_fractions = self.loadExclusiveBranchingFractions(decay_file)


             self.inclusive_branching_fractions = self.loadInclusiveBranchingFractions(self.exclusive_branching_fractions)

             MonitoringBranchingFractions._shared = (self.exclusive_branching_fractions, self.inclusive_branching_fractions)

         else:

             self.exclusive_branching_fractions, self.inclusive_branching_fractions = MonitoringBranchingFractions._shared


     def getExclusive(self, particle):

         """ Returns the exclusive (i.e. without the branching fractions of the daughters) branching fraction of a particle. """

         return self.getBranchingFraction(particle, self.exclusive_branching_fractions)


     def getInclusive(self, particle):

         """ Returns the inclusive (i.e. including all branching fractions of the daughters) branching fraction of a particle. """

         return self.getBranchingFraction(particle, self.inclusive_branching_fractions)


     def getBranchingFraction(self, particle, branching_fractions):

         """ Returns the branching fraction of a particle given a branching_fraction table. """

         result = {c.label: 0.0 for c in particle.channels}

         name = particle.name

         channels = [tuple(sorted(d.split(':')[0] for d in channel.daughters)) for channel in particle.channels]

         if name not in branching_fractions:

             name = pdg.conjugate(name)

             channels = [tuple(pdg.conjugate(d) for d in channel) for channel in channels]

             if name not in branching_fractions:

                 return result

         for c, key in zip(particle.channels, channels):

             if key in branching_fractions[name]:

                 result[c.label] = branching_fractions[name][key]

         return result


     def loadExclusiveBranchingFractions(self, filename):

         """

         Load branching fraction from MC decay-file.

         """


         def isFloat(element):

             """ Checks if element is a convertible to float"""

             try:

                 float(element)

                 return True

             except ValueError:

                 return False


         def isValidParticle(element):

             """ Checks if element is a valid pdg name for a particle"""

             try:

                 pdg.from_name(element)

                 return True

             except LookupError:

                 return False


         branching_fractions = {'UNKOWN': {}}


         mother = 'UNKOWN'

         with open(filename) as f:

             for line in f:

                 fields = line.split(' ')

                 fields = [x for x in fields if x != '']

                 if len(fields) < 2 or fields[0][0] == '#':

                     continue

                 if fields[0] == 'Decay':

                     mother = fields[1].strip()

                     if not isValidParticle(mother):

                         mother = 'UNKOWN'

                     continue

                 if fields[0] == 'Enddecay':

                     mother = 'UNKOWN'

                     continue

                 if mother == 'UNKOWN':

                     continue

                 fields = fields[:-1]

                 if len(fields) < 1 or not isFloat(fields[0]):

                     continue

                 while len(fields) > 1:

                     if isValidParticle(fields[-1]):

                         break

                     fields = fields[:-1]

                 if len(fields) < 1 or not all(isValidParticle(p) for p in fields[1:]):

                     continue

                 neutrinoTag_list = ['nu_e', 'nu_mu', 'nu_tau', 'anti-nu_e', 'anti-nu_mu', 'anti-nu_tau']

                 daughters = tuple(sorted(p for p in fields[1:] if p not in neutrinoTag_list))

                 if mother not in branching_fractions:

                     branching_fractions[mother] = {}

                 if daughters not in branching_fractions[mother]:

                     branching_fractions[mother][daughters] = 0.0

                 branching_fractions[mother][daughters] += float(fields[0])


         del branching_fractions['UNKOWN']

         return branching_fractions


     def loadInclusiveBranchingFractions(self, exclusive_branching_fractions):

         """

         Get covered branching fraction of a particle using a recursive algorithm

         and the given exclusive branching_fractions (given as Hashable List)

         @param particle identifier of the particle

         @param branching_fractions

         """

         particles = set(exclusive_branching_fractions.keys())

         particles.update({pdg.conjugate(p) for p in particles if p != pdg.conjugate(p)})

         particles = sorted(particles, key=lambda x: pdg.get(x).Mass())

         inclusive_branching_fractions = copy.deepcopy(exclusive_branching_fractions)


         for p in particles:

             if p in inclusive_branching_fractions:

                 br = sum(inclusive_branching_fractions[p].values())

             else:

                 br = sum(inclusive_branching_fractions[pdg.conjugate(p)].values())

             for p_br in inclusive_branching_fractions.values():

                 for c in p_br:

                     for i in range(c.count(p)):

                         p_br[c] *= br

         return inclusive_branching_fractions


 class MonitoringParticle:

     """

     Monitoring object containing all the monitoring information

     about a single particle

     """


     def __init__(self, particle):

         """

         Read the monitoring information of the given particle

         @param particle the particle for which the information is read

         """


         self.particle = particle


         self.mc_count = MonitoringMCCount(particle)


         self.module_statistic = MonitoringModuleStatistics(particle)


         self.time_per_channel = self.module_statistic.channel_time


         self.time_per_channel_per_module = self.module_statistic.channel_time_per_module


         self.total_time = self.module_statistic.particle_time + sum(self.time_per_channel.values())


         self.total_number_of_channels = len(self.particle.channels)


         self.reconstructed_number_of_channels = 0


         self.branching_fractions = MonitoringBranchingFractions()


         self.exc_br_per_channel = self.branching_fractions.getExclusive(particle)


         self.inc_br_per_channel = self.branching_fractions.getInclusive(particle)


         self.before_ranking = {}


         self.after_ranking = {}


         self.after_vertex = {}


         self.after_classifier = {}


         self.training_data = {}


         self.ignored_channels = {}


         for channel in self.particle.channels:

             hist = MonitoringHist('Monitor_PreReconstruction_BeforeRanking.root', f'{channel.label}')

             self.before_ranking[channel.label] = self.calculateStatistic(hist, channel.mvaConfig.target)

             hist = MonitoringHist('Monitor_PreReconstruction_AfterRanking.root', f'{channel.label}')

             self.after_ranking[channel.label] = self.calculateStatistic(hist, channel.mvaConfig.target)

             hist = MonitoringHist('Monitor_PreReconstruction_AfterVertex.root', f'{channel.label}')

             self.after_vertex[channel.label] = self.calculateStatistic(hist, channel.mvaConfig.target)

             hist = MonitoringHist('Monitor_PostReconstruction_AfterMVA.root', f'{channel.label}')

             self.after_classifier[channel.label] = self.calculateStatistic(hist, channel.mvaConfig.target)

             if hist.valid and hist.sum(channel.mvaConfig.target) > 0:

                 self.reconstructed_number_of_channels += 1

                 self.ignored_channels[channel.label] = False

             else:

                 self.ignored_channels[channel.label] = True

             hist = MonitoringHist('Monitor_TrainingData.root', f'{channel.label}')

             self.training_data[channel.label] = hist


         plist = removeJPsiSlash(particle.identifier)

         hist = MonitoringHist('Monitor_PostReconstruction_BeforePostCut.root', f'{plist}')


         self.before_postcut = self.calculateStatistic(hist, self.particle.mvaConfig.target)

         hist = MonitoringHist('Monitor_PostReconstruction_BeforeRanking.root', f'{plist}')


         self.before_ranking_postcut = self.calculateStatistic(hist, self.particle.mvaConfig.target)

         hist = MonitoringHist('Monitor_PostReconstruction_AfterRanking.root', f'{plist}')


         self.after_ranking_postcut = self.calculateStatistic(hist, self.particle.mvaConfig.target)


         self.before_tag = self.calculateStatistic(hist, self.particle.mvaConfig.target)


         self.after_tag = self.calculateUniqueStatistic(hist)


         self.final_ntuple = MonitoringNTuple('Monitor_Final.root', f'{plist}')


     def calculateStatistic(self, hist, target):

         """

         Calculate Statistic object where all signal candidates are considered signal

         """

         nTrueSig = self.mc_count['sum']

         if not hist.valid:

             return Statistic(nTrueSig, 0, 0)

         signal_bins = (hist.centers[target] > 0.5)

         bckgrd_bins = ~signal_bins

         nSig = hist.values[target][signal_bins].sum()

         nBg = hist.values[target][bckgrd_bins].sum()

         return Statistic(nTrueSig, nSig, nBg)


     def calculateUniqueStatistic(self, hist):

         """

         Calculate Static object where only unique signal candidates are considered signal

         """

         nTrueSig = self.mc_count['sum']

         if not hist.valid:

             return Statistic(nTrueSig, 0, 0)

         signal_bins = hist.centers['extraInfo(uniqueSignal)'] > 0.5

         bckgrd_bins = hist.centers['extraInfo(uniqueSignal)'] <= 0.5

         nSig = hist.values['extraInfo(uniqueSignal)'][signal_bins].sum()

         nBg = hist.values['extraInfo(uniqueSignal)'][bckgrd_bins].sum()

         return Statistic(nTrueSig, nSig, nBg)


 # @endcond

plotting.Diagonal
Definition: plotting.py:525

plotting.RejectionOverEfficiency
Definition: plotting.py:419

plotting.VerboseDistribution
Definition: plotting.py:969

std

pdg.conjugate
def conjugate(name)
Definition: pdg.py:110

pdg.from_name
def from_name(name)
Definition: pdg.py:62

pdg.get
def get(name)
Definition: pdg.py:47