Belle II Software  release-05-02-19
root_handler.py
1 import numpy as np
2 import os
3 from root_pandas import read_root
4 
5 import pandas as pd
6 
7 
9  """This class represents a loaded validation root file. It has methods for plotting the typically needed graphs."""
10 
11  def __init__(self, filename, label=None, color_index=0, additional_information=None):
12  """Create a new validation result from the given filename.
13  Additional options for plotting (e.g. color or label) can be given."""
14 
15  self.filename = filename
16 
17  self.pr_data = read_root(self.filename, tree_key="pr_tree/pr_tree")
18  self.pr_data["is_prompt"] = (
19  np.sqrt(self.pr_data.x_truth ** 2 + self.pr_data.y_truth ** 2) < 0.5) & (self.pr_data.is_primary == 1)
20 
21  self.mc_data = read_root(self.filename, tree_key="mc_tree/mc_tree")
22  self.mc_data["is_prompt"] = (
23  np.sqrt(self.mc_data.x_truth ** 2 + self.mc_data.y_truth ** 2) < 0.5) & (self.mc_data.is_primary == 1)
24 
25 
26  self.mc_prompts = self.mc_data[self.mc_data.is_prompt == 1]
27 
28  self.pr_prompts = self.pr_data[self.pr_data.is_prompt == 1]
29 
30  import seaborn as sb
31 
32  colors = sb.color_palette()
33 
34 
35  self.label = label
36 
37  self.color = colors[color_index % len(colors)]
38 
39 
40  self.finding_efficiency = None
41 
42  self.hit_efficiency = None
43 
44  self.fake_rate = None
45 
46  self.clone_rate = None
48 
49 
50  self.additional_information = additional_information
51 
52  @staticmethod
53  def from_calculations(calculations, key="output_file_name", parameter_part=None):
54  """Create validation results from an ipython calculation."""
55  if parameter_part:
56  return [
58  c.get(key),
59  label=c.get_parameters()[parameter_part],
60  color_index=i) for i,
61  c in enumerate(calculations)]
62  else:
63  return [
65  c.get(key),
66  label=c.get_parameters(),
67  color_index=i) for i,
68  c in enumerate(calculations)]
69 
71  """Return the figures of merit from the file. Mosty used for internal seeting of the properties."""
72  if self.finding_efficiency is None:
73  overview = read_root(self.filename, tree_key="ExpertMCSideTrackingValidationModule_overview_figures_of_merit")
74  self.finding_efficiency = overview.finding_efficiency[0]
75  self.hit_efficiency = overview.hit_efficiency[0]
76 
77  overview = read_root(self.filename, tree_key="ExpertPRSideTrackingValidationModule_overview_figures_of_merit")
78  self.clone_rate = overview.clone_rate[0]
79  self.fake_rate = overview.fake_rate[0]
80 
81  return dict(finding_efficiency=self.finding_efficiency,
82  hit_efficiency=self.hit_efficiency,
83  clone_rate=self.clone_rate,
84  fake_rate=self.fake_rate)
85 
87  """Print out the figures of merit as a LaTeX-ready table."""
88  results = self.get_figure_of_merits()
89 
90  latex_string = r'\begin{table}' + "\n"
91  latex_string += r' \begin{tabular}{cc} \toprule' + "\n"
92  latex_string += r' & \\ \midrule' + "\n"
93  latex_string += r' Finding Efficiency & ' + "%.2f" % (100 * results["finding_efficiency"]) + r' \% \\' + "\n"
94  latex_string += r' Hit Efficiency & ' + "%.2f" % (100 * results["hit_efficiency"]) + r' \% \\' + "\n"
95  latex_string += r' Fake Rate & ' + "%.2f" % (100 * results["fake_rate"]) + r' \% \\' + "\n"
96  latex_string += r' Clone Rate & ' + "%.2f" % (100 * results["clone_rate"]) + r' \% \\ \bottomrule' + "\n"
97  latex_string += r' \end{tabular}' + "\n"
98  latex_string += r'\end{table}'
99 
100  return latex_string
101 
103  """Plot a oint in the finding-efficiency/hit-efficiency plane."""
104  import matplotlib.pyplot as plt
105  self.plot(100 * self.finding_efficiency, 100 * self.hit_efficiency, loc=3)
106  plt.xlabel("finding efficiency")
107  plt.ylabel("hit efficiency")
108 
109  def grouped_by_pt_data(self, mc_data=None):
110  """Convenience function to return the input data (or the internal mc_data) grouped by pt."""
111  if mc_data is None:
112  mc_data = self.mc_data
113 
114  pt_values = pd.cut(mc_data.pt_truth, np.linspace(mc_data.pt_truth.min(), mc_data.pt_truth.max(), 10))
115  grouped = mc_data.groupby(pt_values)
116 
117  return grouped
118 
119  def plot(self, data_x, data_y, loc=4, yerr=None):
120  """Plot data_y over data_x with the correct settings for this result. Mostly used internally."""
121  import matplotlib.pyplot as plt
122  if yerr is not None:
123  plt.errorbar(data_x, data_y, ls="-", marker="o",
124  color=self.color, label=self.label, yerr=yerr, lw=4)
125  else:
126  plt.plot(data_x, data_y, ls="-", marker="o",
127  color=self.color, label=self.label, lw=4)
128 
129  if self.label is not None:
130  plt.legend(loc=loc, frameon=True)
131 
132  def plot_finding_efficiency(self, data=None):
133  """Plot the finding efficiency over pt."""
134  import matplotlib.pyplot as plt
135  grouped = self.grouped_by_pt_data(data)
136 
137  self.plot(grouped.median().pt_truth, grouped.mean().is_matched, yerr=1 / np.sqrt(grouped.count().is_matched))
138  plt.xlabel(r"$p_T$ of the MC tracks (in GeV)")
139  plt.ylabel("Finding Efficiency")
140 
141  def plot_hit_efficiency(self, data=None):
142  """Plot the hit efficiency over pt."""
143  import matplotlib.pyplot as plt
144  grouped = self.grouped_by_pt_data(data)
145 
146  self.plot(grouped.median().pt_truth, grouped.mean().hit_efficiency, yerr=1 / np.sqrt(grouped.sum().mc_number_of_hits))
147  plt.xlabel(r"$p_T$ of the MC tracks (in GeV)")
148  plt.ylabel("Hit Efficiency")
149 
151  """Print mostfully useful information about this result."""
152  pr_data = self.pr_data
153  mc_data = self.mc_data
154  primaries = pr_data[self.pr_data.is_prompt == 1]
155  primaries_mc = mc_data[self.mc_data.is_prompt == 1]
156 
157  print(self.label)
158  print("Fake", 100 * primaries.is_fake.mean(), 100 * pr_data.is_fake.mean())
159  print("Clone", 100 * primaries.is_clone.mean(), 100 * pr_data.is_clone.mean())
160  print("Ghost", 100 * primaries.is_ghost.mean(), 100 * pr_data.is_ghost.mean())
161  print("Fitted", 100 * primaries.is_fitted.mean(), 100 * pr_data.is_fitted.mean())
162  print("Found", 100 * primaries_mc.is_matched.mean(), 100 * mc_data.is_matched.mean())
163  print("Found2", 100.0 - 100 * primaries_mc.is_missing.mean(), 100.0 - 100 * mc_data.is_missing.mean())
164  print("Merged", 100 * primaries_mc.is_merged.mean(), 100 * mc_data.is_merged.mean())
165  print("Hit-Eff", 100 * primaries_mc.hit_efficiency.mean(), 100 * mc_data.hit_efficiency.mean())
166  print("Wrong Hits", primaries.number_of_wrong_hits.mean(), pr_data.number_of_wrong_hits.mean())
167 
168  def append_to_dataframe(self, df):
169  """Append the main results to a already consisting dataframe."""
170  result = {"finding_efficiency": self.finding_efficiency,
171  "hit_efficiency": self.hit_efficiency,
172  "clone_rate": self.clone_rate,
173  "fake_rate": self.fake_rate,
174  "file_name": self.filename}
175  if self.additional_information:
176  result.update(self.additional_information)
177  return df.append(result, ignore_index=True)
root_handler.TrackingValidationResult.fake_rate
fake_rate
the fake rate
Definition: root_handler.py:44
root_handler.TrackingValidationResult.__init__
def __init__(self, filename, label=None, color_index=0, additional_information=None)
Definition: root_handler.py:11
root_handler.TrackingValidationResult.get_figure_of_merits
def get_figure_of_merits(self)
Definition: root_handler.py:70
root_handler.TrackingValidationResult.from_calculations
def from_calculations(calculations, key="output_file_name", parameter_part=None)
Definition: root_handler.py:53
root_handler.TrackingValidationResult.get_figures_of_merit_latex
def get_figures_of_merit_latex(self)
Definition: root_handler.py:86
root_handler.TrackingValidationResult.color
color
the color index
Definition: root_handler.py:37
root_handler.TrackingValidationResult
Definition: root_handler.py:8
root_handler.TrackingValidationResult.pr_prompts
pr_prompts
the pr prompt data
Definition: root_handler.py:28
root_handler.TrackingValidationResult.plot_efficiency_point
def plot_efficiency_point(self)
Definition: root_handler.py:102
root_handler.TrackingValidationResult.clone_rate
clone_rate
the clone rate
Definition: root_handler.py:46
root_handler.TrackingValidationResult.additional_information
additional_information
the additional information
Definition: root_handler.py:50
root_handler.TrackingValidationResult.filename
filename
The root filename.
Definition: root_handler.py:15
root_handler.TrackingValidationResult.label
label
the label
Definition: root_handler.py:35
root_handler.TrackingValidationResult.grouped_by_pt_data
def grouped_by_pt_data(self, mc_data=None)
Definition: root_handler.py:109
root_handler.TrackingValidationResult.print_useful_information
def print_useful_information(self)
Definition: root_handler.py:150
root_handler.TrackingValidationResult.hit_efficiency
hit_efficiency
the hit efficiency
Definition: root_handler.py:42
root_handler.TrackingValidationResult.mc_prompts
mc_prompts
the mc prompt data
Definition: root_handler.py:26
root_handler.TrackingValidationResult.plot
def plot(self, data_x, data_y, loc=4, yerr=None)
Definition: root_handler.py:119
root_handler.TrackingValidationResult.pr_data
pr_data
The pr data.
Definition: root_handler.py:17
root_handler.TrackingValidationResult.plot_hit_efficiency
def plot_hit_efficiency(self, data=None)
Definition: root_handler.py:141
root_handler.TrackingValidationResult.finding_efficiency
finding_efficiency
the finding efficiency
Definition: root_handler.py:40
root_handler.TrackingValidationResult.mc_data
mc_data
the mc data
Definition: root_handler.py:21
root_handler.TrackingValidationResult.plot_finding_efficiency
def plot_finding_efficiency(self, data=None)
Definition: root_handler.py:132
root_handler.TrackingValidationResult.append_to_dataframe
def append_to_dataframe(self, df)
Definition: root_handler.py:168