12 Contains classes to read in the monitoring output
13 and some simple plotting routines.
15 This is used by printReporting.py and latexReporting.py
16 to create summaries for a FEI training or application.
20 from generators
import get_default_decayfile
21except ModuleNotFoundError:
22 print(
"MonitoringBranchingFractions won't work.")
23from basf2_mva_evaluation
import plotting
34def removeJPsiSlash(string):
35 """ Remove slashes in a string, which is not allowed for filenames. """
36 return string.replace(
'/',
'')
40 """ Load the FEI configuration from the Summary.pickle file. """
41 if not os.path.isfile(
'Summary.pickle'):
42 raise RuntimeError(
"""Could not find Summary.pickle!
43 This file is automatically created by the FEI training.
44 But you can also create it yourself using:
45 pickle.dump((particles, configuration), open('Summary.pickle', 'wb'))""")
46 return pickle.load(open(
'Summary.pickle',
'rb'))
51 This class provides the efficiency, purity and other quantities for a
52 given number of true signal candidates, signal candidates and background candidates
57 Create a new Statistic object
58 @param nTrueSig the number of true signal particles
59 @param nSig the number of reconstructed signal candidates
60 @param nBg the number of reconstructed background candidates
71 """ Returns the number of reconstructed signal candidates. """
76 """ Returns the number of reconstructed true signal candidates. """
81 """ Returns the number of reconstructed background candidates. """
86 """ Returns total number of reconstructed candidates. """
91 """ Returns the purity of the reconstructed candidates. """
102 """ Returns the efficiency of the reconstructed signal candidates with respect to the number of true signal particles. """
111 """ Returns the uncertainty of the purity. """
121 Returns the uncertainty of the efficiency.
122 For an efficiency eps = self._nSig/self._nTrueSig, this function calculates the
123 standard deviation according to http://arxiv.org/abs/physics/0701199 .
130 """ Helper method to calculate the standard deviation for efficiencies. """
133 variance = (k + 1) * (k + 2) / ((n + 2) * (n + 3)) - (k + 1) ** 2 / ((n + 2) ** 2)
136 return math.sqrt(variance)
139 """ Returns a string representation of a Statistic object. """
140 o = f
"nTrueSig {self.nTrueSig} nSig {self.nSig} nBg {self.nBg}\n"
141 o += f
"Efficiency {self.efficiency:.3f} ({self.efficiencyError:.3f})\n"
142 o += f
"Purity {self.purity:.3f} ({self.purityError:.3f})\n"
146 """ Adds two Statistics objects and returns a new object. """
153 Returns a new Statistic object if the current one is added to zero.
154 Necessary to apply sum-function to Statistic objects.
157 return NotImplemented
165 Reads all TH1F and TH2F from a ROOT file
166 and puts them into a more accessible format.
171 Reads histograms from the given file
172 @param filename the name of the ROOT file
185 self.
valid = os.path.isfile(filename)
190 f = ROOT.TFile.Open(filename,
'read')
191 d = f.Get(ROOT.Belle2.MakeROOTCompatible.makeROOTCompatible(dirname))
193 for key
in d.GetListOfKeys():
194 name = ROOT.Belle2.MakeROOTCompatible.invertMakeROOTCompatible(key.GetName())
196 if not (isinstance(hist, ROOT.TH1D)
or isinstance(hist, ROOT.TH1F)
or
197 isinstance(hist, ROOT.TH2D)
or isinstance(hist, ROOT.TH2F)):
199 self.
two_dimensional[name] = isinstance(hist, ROOT.TH2D)
or isinstance(hist, ROOT.TH2F)
201 nbins = (hist.GetNbinsX(), hist.GetNbinsY())
202 self.
centers[name] = [[hist.GetXaxis().GetBinCenter(i)
for i
in range(nbins[0] + 2)],
203 [hist.GetYaxis().GetBinCenter(i)
for i
in range(nbins[1] + 2)]]
204 self.
values[name] = [[hist.GetBinContent(i, j)
for i
in range(nbins[0] + 2)]
for j
in range(nbins[1] + 2)]
205 self.
nbins[name] = nbins
207 nbins = hist.GetNbinsX()
208 self.
centers[name] = np.array([hist.GetBinCenter(i)
for i
in range(nbins + 2)])
209 self.
values[name] = np.array([hist.GetBinContent(i)
for i
in range(nbins + 2)])
210 self.
nbins[name] = nbins
214 Calculates the sum of a given histogram (== sum of all entries)
215 @param name key of the histogram
221 for i
in range(len(self.
values[name])):
222 for j
in range(len(self.
values[name][i])):
223 tempsum += self.
values[name][i][j]
225 return np.sum(self.
values[name])
229 Calculates the mean of a given histogram
230 @param name key of the histogram
236 for i
in range(len(self.
values[name])):
237 for j
in range(len(self.
values[name][i])):
238 tempsum += self.
centers[name][i][j] * self.
values[name][i][j]
239 return tempsum / self.
sum(name)
240 return np.average(self.
centers[name], weights=self.
values[name])
244 Calculates the standard deviation of a given histogram
245 @param name key of the histogram
250 avg = self.
mean(name)
252 for i
in range(len(self.
values[name])):
253 for j
in range(len(self.
values[name][i])):
254 tempsum += self.
values[name][i][j] * (self.
centers[name][i][j] - avg)**2
255 return np.sqrt(tempsum / self.
sum(name))
256 avg = np.average(self.
centers[name], weights=self.
values[name])
257 return np.sqrt(np.average((self.
centers[name] - avg)**2, weights=self.
values[name]))
261 Calculates the minimum of a given histogram
262 @param name key of the histogram
268 for i
in range(len(self.
values[name])):
269 for j
in range(len(self.
values[name][i])):
270 if self.
values[name][i][j] < tempmin:
271 tempmin = self.
centers[name][i][j]
273 nonzero = np.nonzero(self.
values[name])[0]
274 if len(nonzero) == 0:
276 return self.
centers[name][nonzero[0]]
280 Calculates the maximum of a given histogram
281 @param name key of the histogram
287 for i
in range(len(self.
values[name])):
288 for j
in range(len(self.
values[name][i])):
289 if self.
values[name][i][j] > tempmax:
290 tempmax = self.
centers[name][i][j]
292 nonzero = np.nonzero(self.
values[name])[0]
293 if len(nonzero) == 0:
295 return self.
centers[name][nonzero[-1]]
300 Reads the ntuple named variables from a ROOT file
305 Reads ntuple from the given file
306 @param filename the name of the ROOT file
311 self.
valid = os.path.isfile(filename)
312 print(f
'FEI-monitoring: Looking for {filename}')
314 raise RuntimeError(f
"Could not find {filename}: current dir is {os.getcwd()}")
317 self.
f = ROOT.TFile.Open(filename,
'read')
318 print(f
'FEI-monitoring: Found {filename}')
320 self.
tree = self.
f.Get(ROOT.Belle2.MakeROOTCompatible.makeROOTCompatible(f
'{treenameprefix} variables'))
321 print(f
'FEI-monitoring: Found {treenameprefix} variables')
328 Reads the module statistics for a single particle from the outputted root file
329 and puts them into a more accessible format
334 Reads the module statistics from the file named Monitor_ModuleStatistics.root
335 @param particle the particle for which the statistics are read
339 root_file = ROOT.TFile.Open(
'Monitor_ModuleStatistics.root',
'read')
340 persistentTree = root_file.Get(
'persistent')
341 persistentTree.GetEntry(0)
343 stats = persistentTree.ProcessStatistics.Clone()
346 numEntries = persistentTree.GetEntriesFast()
347 for i
in range(1, numEntries):
348 persistentTree.GetEntry(i)
349 stats.merge(persistentTree.ProcessStatistics)
352 statistic = {m.getName(): m.getTimeSum(m.c_Event) / 1e9
for m
in stats.getAll()}
358 for channel
in particle.channels:
362 'BestCandidateSelection': 0.0,
363 'PListCutAndCopy': 0.0,
364 'VariablesToExtraInfo': 0.0,
366 'ParticleSelector': 0.0,
368 'ParticleVertexFitter': 0.0,
369 'TagUniqueSignal': 0.0,
370 'VariablesToHistogram': 0.0,
371 'VariablesToNtuple': 0.0}
372 for key, time
in statistic.items():
373 if (channel.decayString
in key
or channel.name
in key):
381 for key, time
in statistic.items():
382 if particle.identifier
in key:
386def MonitorSigProbPlot(particle, filename):
387 """ Creates a Signal probability plot using ROOT. """
388 if not particle.final_ntuple.valid:
391 [
'extraInfo__bouniqueSignal__bc',
392 'extraInfo__boSignalProbability__bc', particle.particle.mvaConfig.target],
393 [
'unique',
'probability',
'signal'], max_entries=int(1e8))
396 common = (df[
'probability'] >= 0) & (df[
'probability'] <= 1)
398 p.add(df,
'probability', (df[
'signal'] == 1), label=
"Signal")
399 p.add(df,
'probability', (df[
'signal'] == 0), label=
"Background")
401 p.axis.set_title(
"Signal probability")
402 p.axis.set_xlabel(
"Probability")
403 p.save(filename +
'.png')
406def MonitorSpectatorPlot(particle, spectator, filename, range=(
None,
None)):
407 """ Creates a spectator plot using ROOT. """
408 if not particle.final_ntuple.valid:
411 [
'extraInfo__bouniqueSignal__bc', spectator,
412 'extraInfo__boSignalProbability__bc', particle.particle.mvaConfig.target],
413 [
'unique', spectator,
'probability',
'signal'], max_entries=int(1e8))
414 for i, cut
in enumerate([0.0, 0.01, 0.05, 0.1, 0.2, 0.5]):
416 common = (df[
'probability'] >= cut)
417 if range[0]
is not None:
418 common &= (df[spectator] >= range[0])
419 if range[1]
is not None:
420 common &= (df[spectator] <= range[1])
422 p.add(df, spectator, (df[
'signal'] == 1), label=
"Signal")
423 p.add(df, spectator, (df[
'signal'] == 0), label=
"Background")
425 p.axis.set_title(f
"{spectator} for signal probability >= {cut:.2f}")
426 p.axis.set_xlabel(spectator)
427 p.save(f
'{filename}_{i}.png')
430def MonitorROCPlot(particle, filename):
431 """ Creates a ROC plot using ROOT. """
432 if not particle.final_ntuple.valid:
435 [
'extraInfo__bouniqueSignal__bc',
436 'extraInfo__boSignalProbability__bc', particle.particle.mvaConfig.target],
437 [
'unique',
'probability',
'signal'], max_entries=int(1e8))
439 p.add(df,
'probability', df[
'signal'] == 1, df[
'signal'] == 0, label=
'All')
441 p.save(filename +
'.png')
444def MonitorDiagPlot(particle, filename):
445 """ Creates a Diagonal plot using ROOT. """
446 if not particle.final_ntuple.valid:
449 [
'extraInfo__bouniqueSignal__bc',
450 'extraInfo__boSignalProbability__bc', particle.particle.mvaConfig.target],
451 [
'unique',
'probability',
'signal'], max_entries=int(1e8))
453 p.add(df,
'probability', df[
'signal'] == 1, df[
'signal'] == 0)
455 p.save(filename +
'.png')
458def MonitoringMCCount(particle):
460 Reads the MC Counts for a given particle from the ROOT file mcParticlesCount.root
461 @param particle the particle for which the MC counts are read
462 @return dictionary with 'sum', 'std', 'avg', 'max', and 'min'
466 root_file = ROOT.TFile.Open(
'mcParticlesCount.root',
'read')
468 key = f
'NumberOfMCParticlesInEvent({abs(pdg.from_name(particle.name))})'
470 key = ROOT.Belle2.MakeROOTCompatible.makeROOTCompatible(key)
471 hist = root_file.Get(key)
473 mc_counts = {
'sum': 0,
'std': 0,
'avg': 0,
'min': 0,
'max': 0}
475 mc_counts[
'sum'] = sum(hist.GetXaxis().GetBinCenter(bin + 1) * hist.GetBinContent(bin + 1)
476 for bin
in range(hist.GetNbinsX()))
477 mc_counts[
'std'] = hist.GetStdDev()
478 mc_counts[
'avg'] = hist.GetMean()
479 mc_counts[
'max'] = hist.GetXaxis().GetBinCenter(hist.FindLastBinAbove(0.0))
480 mc_counts[
'min'] = hist.GetXaxis().GetBinCenter(hist.FindFirstBinAbove(0.0))
485 """ Class extracts the branching fractions of a decay channel from the DECAY.DEC file. """
491 Create a new MonitoringBranchingFraction object.
492 The extracted branching fractions are cached, hence creating more than one object does not do anything.
494 if MonitoringBranchingFractions._shared
is None:
495 decay_file = get_default_decayfile()
505 """ Returns the exclusive (i.e. without the branching fractions of the daughters) branching fraction of a particle. """
509 """ Returns the inclusive (i.e. including all branching fractions of the daughters) branching fraction of a particle. """
513 """ Returns the branching fraction of a particle given a branching_fraction table. """
514 result = {c.label: 0.0
for c
in particle.channels}
516 channels = [tuple(sorted(d.split(
':')[0]
for d
in channel.daughters))
for channel
in particle.channels]
517 if name
not in branching_fractions:
519 channels = [tuple(
pdg.conjugate(d)
for d
in channel)
for channel
in channels]
520 if name
not in branching_fractions:
522 for c, key
in zip(particle.channels, channels):
523 if key
in branching_fractions[name]:
524 result[c.label] = branching_fractions[name][key]
529 Load branching fraction from MC decay-file.
532 def isFloat(element):
533 """ Checks if element is a convertible to float"""
540 def isValidParticle(element):
541 """ Checks if element is a valid pdg name for a particle"""
548 branching_fractions = {
'UNKOWN': {}}
551 with open(filename)
as f:
553 fields = line.split(
' ')
554 fields = [x
for x
in fields
if x !=
'']
555 if len(fields) < 2
or fields[0][0] ==
'#':
557 if fields[0] ==
'Decay':
558 mother = fields[1].strip()
559 if not isValidParticle(mother):
562 if fields[0] ==
'Enddecay':
565 if mother ==
'UNKOWN':
568 if len(fields) < 1
or not isFloat(fields[0]):
570 while len(fields) > 1:
571 if isValidParticle(fields[-1]):
574 if len(fields) < 1
or not all(isValidParticle(p)
for p
in fields[1:]):
576 neutrinoTag_list = [
'nu_e',
'nu_mu',
'nu_tau',
'anti-nu_e',
'anti-nu_mu',
'anti-nu_tau']
577 daughters = tuple(sorted(p
for p
in fields[1:]
if p
not in neutrinoTag_list))
578 if mother
not in branching_fractions:
579 branching_fractions[mother] = {}
580 if daughters
not in branching_fractions[mother]:
581 branching_fractions[mother][daughters] = 0.0
582 branching_fractions[mother][daughters] += float(fields[0])
584 del branching_fractions[
'UNKOWN']
585 return branching_fractions
589 Get covered branching fraction of a particle using a recursive algorithm
590 and the given exclusive branching_fractions (given as Hashable List)
591 @param particle identifier of the particle
592 @param branching_fractions
594 particles = set(exclusive_branching_fractions.keys())
596 particles = sorted(particles, key=
lambda x:
pdg.get(x).Mass())
597 inclusive_branching_fractions = copy.deepcopy(exclusive_branching_fractions)
600 if p
in inclusive_branching_fractions:
601 br = sum(inclusive_branching_fractions[p].values())
603 br = sum(inclusive_branching_fractions[
pdg.conjugate(p)].values())
604 for p_br
in inclusive_branching_fractions.values():
606 for i
in range(c.count(p)):
608 return inclusive_branching_fractions
613 Monitoring object containing all the monitoring information
614 about a single particle
619 Read the monitoring information of the given particle
620 @param particle the particle for which the information is read
626 for i
in range(len(particlesInStages)):
627 for iparticle
in particlesInStages[i]:
628 if iparticle.identifier == self.
particle.identifier:
632 raise RuntimeError(f
"Could not find particle {self.particle.identifier} in the list of stages.")
670 for channel
in self.
particle.channels:
671 hist =
MonitoringHist(
'Monitor_PreReconstruction_BeforeRanking.root', f
'{channel.label}')
673 hist =
MonitoringHist(
'Monitor_PreReconstruction_AfterRanking.root', f
'{channel.label}')
675 hist =
MonitoringHist(
'Monitor_PreReconstruction_AfterVertex.root', f
'{channel.label}')
677 hist =
MonitoringHist(
'Monitor_PostReconstruction_AfterMVA.root', f
'{channel.label}')
679 if hist.valid
and hist.sum(channel.mvaConfig.target) > 0:
684 hist =
MonitoringHist(
'Monitor_TrainingData.root', f
'{channel.label}')
687 plist = removeJPsiSlash(particle.identifier)
688 hist =
MonitoringHist(
'Monitor_PostReconstruction_BeforePostCut.root', f
'{plist}')
691 hist =
MonitoringHist(
'Monitor_PostReconstruction_BeforeRanking.root', f
'{plist}')
694 hist =
MonitoringHist(
'Monitor_PostReconstruction_AfterRanking.root', f
'{plist}')
706 Calculate Statistic object where all signal candidates are considered signal
711 signal_bins = (hist.centers[target] > 0.5)
712 bckgrd_bins = ~signal_bins
713 nSig = hist.values[target][signal_bins].sum()
714 nBg = hist.values[target][bckgrd_bins].sum()
719 Calculate Static object where only unique signal candidates are considered signal
727 if getattr(entry,
"extraInfo__bouniqueSignal__bc") == 1:
get_stages_from_particles(typing.Sequence[typing.Union[config.Particle, str]] particles)
chain2dict(chain, tree_columns, dict_columns=None, max_entries=None)
getInclusive(self, particle)
loadInclusiveBranchingFractions(self, exclusive_branching_fractions)
exclusive_branching_fractions
exclusive branching fractions
getExclusive(self, particle)
loadExclusiveBranchingFractions(self, filename)
inclusive_branching_fractions
inclusive branching fractions
getBranchingFraction(self, particle, branching_fractions)
dict nbins
Dictionary of number of bins for each histogram.
dict two_dimensional
Dictionary of 2D mode for each histogram.
valid
Indicates if the histograms were successfully read.
__init__(self, filename, dirname)
dict centers
Dictionary of bin-centers for each histogram.
dict values
Dictionary of bin-contents for each histogram.
dict channel_time_per_module
the time per module
int particle_time
the time per particle
dict channel_time
the time for each channel
filename
Filename so we can use it later.
valid
Indicates if the ntuple were successfully read.
f
Reference to the ROOT file, so it isn't closed.
tree
Reference to the tree named variables inside the ROOT file.
__init__(self, filename, treenameprefix)
int reconstructed_number_of_channels
Reconstructed number of channels.
after_ranking_postcut
Monitoring histogram in PostReconstruction after the ranking postcut.
before_postcut
Monitoring histogram in PostReconstruction before the postcut.
exc_br_per_channel
Exclusive branching fractions per channel.
dict before_ranking
Monitoring histogram in PreReconstruction before the ranking-cut.
before_ranking_postcut
Monitoring histogram in PostReconstruction before the ranking postcut.
inc_br_per_channel
Inclusive branching fraction per channel.
branching_fractions
Branching fractions.
time_per_channel
time per channel
dict ignored_channels
Dictionary containing whether the channel reconstructed at least one candidate or not.
dict after_vertex
Monitoring histogram in PreReconstruction after the vertex fit.
calculateUniqueStatistic(self, tree)
total_number_of_channels
Total number of channels.
particle
Particle containing its configuration.
final_ntuple
Reference to the final ntuple.
before_tag
Statistic object before unique tagging of signals.
dict training_data
Monitoring histogram for TrainingData Generation only available if Monitoring runs on the training mo...
mc_count
Dictionary with 'sum', 'std', 'mean', 'min' and 'max' of the MC counts.
calculateStatistic(self, hist, target)
after_tag
Statistic object after unique tagging of signals.
dict after_classifier
Monitoring histogram in PostReconstruction after the mva application.
dict after_ranking
Monitoring histogram in PreReconstruction after the ranking-cut.
time_per_channel_per_module
time per channel per module
module_statistic
Module statistics.
int _nSig
the number of reconstructed signal candidates
_nBg
the number of reconstructed background candidates
__init__(self, nTrueSig, nSig, nBg)
int _nTrueSig
the number of true signal particles
calcStandardDeviation(self, k, n)