Belle II Software  release-08-01-10
root_handler.py
1 
8 
9 import numpy as np
10 import uproot
11 
12 import pandas as pd
13 
14 
16  """This class represents a loaded validation root file. It has methods for plotting the typically needed graphs."""
17 
18  def __init__(self, filename, label=None, color_index=0, additional_information=None):
19  """Create a new validation result from the given filename.
20  Additional options for plotting (e.g. color or label) can be given."""
21 
22  self.filenamefilename = filename
23 
24  self.pr_datapr_data = uproot.open(self.filenamefilename)["pr_tree/pr_tree"].arrays(library="pd")
25  self.pr_datapr_data["is_prompt"] = (
26  np.sqrt(self.pr_datapr_data.x_truth ** 2 + self.pr_datapr_data.y_truth ** 2) < 0.5) & (self.pr_datapr_data.is_primary == 1)
27 
28  self.mc_datamc_data = uproot.open(self.filenamefilename)["mc_tree/mc_tree"].arrays(library="pd")
29  self.mc_datamc_data["is_prompt"] = (
30  np.sqrt(self.mc_datamc_data.x_truth ** 2 + self.mc_datamc_data.y_truth ** 2) < 0.5) & (self.mc_datamc_data.is_primary == 1)
31 
32 
33  self.mc_promptsmc_prompts = self.mc_datamc_data[self.mc_datamc_data.is_prompt == 1]
34 
35  self.pr_promptspr_prompts = self.pr_datapr_data[self.pr_datapr_data.is_prompt == 1]
36 
37  import seaborn as sb
38 
39  colors = sb.color_palette()
40 
41 
42  self.labellabel = label
43 
44  self.colorcolor = colors[color_index % len(colors)]
45 
46 
47  self.finding_efficiencyfinding_efficiency = None
48 
49  self.hit_efficiencyhit_efficiency = None
50 
51  self.fake_ratefake_rate = None
52 
53  self.clone_rateclone_rate = None
54  self.get_figure_of_meritsget_figure_of_merits()
55 
56 
57  self.additional_informationadditional_information = additional_information
58 
59  @staticmethod
60  def from_calculations(calculations, key="output_file_name", parameter_part=None):
61  """Create validation results from an ipython calculation."""
62  if parameter_part:
63  return [
65  c.get(key),
66  label=c.get_parameters()[parameter_part],
67  color_index=i) for i,
68  c in enumerate(calculations)]
69  else:
70  return [
72  c.get(key),
73  label=c.get_parameters(),
74  color_index=i) for i,
75  c in enumerate(calculations)]
76 
78  """Return the figures of merit from the file. Mosty used for internal seeting of the properties."""
79  if self.finding_efficiencyfinding_efficiency is None:
80  overview = uproot.open(
81  self.filenamefilename)["ExpertMCSideTrackingValidationModule_overview_figures_of_merit"].arrays(
82  library="pd")
83  self.finding_efficiencyfinding_efficiency = overview.finding_efficiency[0]
84  self.hit_efficiencyhit_efficiency = overview.hit_efficiency[0]
85 
86  overview = uproot.open(
87  self.filenamefilename)["ExpertPRSideTrackingValidationModule_overview_figures_of_merit"].arrays(
88  library="pd")
89  self.clone_rateclone_rate = overview.clone_rate[0]
90  self.fake_ratefake_rate = overview.fake_rate[0]
91 
92  return dict(finding_efficiency=self.finding_efficiencyfinding_efficiency,
93  hit_efficiency=self.hit_efficiencyhit_efficiency,
94  clone_rate=self.clone_rateclone_rate,
95  fake_rate=self.fake_ratefake_rate)
96 
98  """Print out the figures of merit as a LaTeX-ready table."""
99  results = self.get_figure_of_meritsget_figure_of_merits()
100 
101  latex_string = r'\begin{table}' + "\n"
102  latex_string += r' \begin{tabular}{cc} \toprule' + "\n"
103  latex_string += r' & \\ \midrule' + "\n"
104  latex_string += r' Finding Efficiency & ' + "%.2f" % (100 * results["finding_efficiency"]) + r' \% \\' + "\n"
105  latex_string += r' Hit Efficiency & ' + "%.2f" % (100 * results["hit_efficiency"]) + r' \% \\' + "\n"
106  latex_string += r' Fake Rate & ' + "%.2f" % (100 * results["fake_rate"]) + r' \% \\' + "\n"
107  latex_string += r' Clone Rate & ' + "%.2f" % (100 * results["clone_rate"]) + r' \% \\ \bottomrule' + "\n"
108  latex_string += r' \end{tabular}' + "\n"
109  latex_string += r'\end{table}'
110 
111  return latex_string
112 
114  """Plot a oint in the finding-efficiency/hit-efficiency plane."""
115  import matplotlib.pyplot as plt
116  self.plotplot(100 * self.finding_efficiencyfinding_efficiency, 100 * self.hit_efficiencyhit_efficiency, loc=3)
117  plt.xlabel("finding efficiency")
118  plt.ylabel("hit efficiency")
119 
120  def grouped_by_pt_data(self, mc_data=None):
121  """Convenience function to return the input data (or the internal mc_data) grouped by pt."""
122  if mc_data is None:
123  mc_data = self.mc_datamc_data
124 
125  pt_values = pd.cut(mc_data.pt_truth, np.linspace(mc_data.pt_truth.min(), mc_data.pt_truth.max(), 10))
126  grouped = mc_data.groupby(pt_values)
127 
128  return grouped
129 
130  def plot(self, data_x, data_y, loc=4, yerr=None):
131  """Plot data_y over data_x with the correct settings for this result. Mostly used internally."""
132  import matplotlib.pyplot as plt
133  if yerr is not None:
134  plt.errorbar(data_x, data_y, ls="-", marker="o",
135  color=self.colorcolor, label=self.labellabel, yerr=yerr, lw=4)
136  else:
137  plt.plot(data_x, data_y, ls="-", marker="o",
138  color=self.colorcolor, label=self.labellabel, lw=4)
139 
140  if self.labellabel is not None:
141  plt.legend(loc=loc, frameon=True)
142 
143  def plot_finding_efficiency(self, data=None):
144  """Plot the finding efficiency over pt."""
145  import matplotlib.pyplot as plt
146  grouped = self.grouped_by_pt_datagrouped_by_pt_data(data)
147 
148  self.plotplot(grouped.median().pt_truth, grouped.mean().is_matched, yerr=1 / np.sqrt(grouped.count().is_matched))
149  plt.xlabel(r"$p_T$ of the MC tracks (in GeV)")
150  plt.ylabel("Finding Efficiency")
151 
152  def plot_hit_efficiency(self, data=None):
153  """Plot the hit efficiency over pt."""
154  import matplotlib.pyplot as plt
155  grouped = self.grouped_by_pt_datagrouped_by_pt_data(data)
156 
157  self.plotplot(grouped.median().pt_truth, grouped.mean().hit_efficiency, yerr=1 / np.sqrt(grouped.sum().mc_number_of_hits))
158  plt.xlabel(r"$p_T$ of the MC tracks (in GeV)")
159  plt.ylabel("Hit Efficiency")
160 
162  """Print mostfully useful information about this result."""
163  pr_data = self.pr_datapr_data
164  mc_data = self.mc_datamc_data
165  primaries = pr_data[self.pr_datapr_data.is_prompt == 1]
166  primaries_mc = mc_data[self.mc_datamc_data.is_prompt == 1]
167 
168  print(self.labellabel)
169  print("Fake", 100 * primaries.is_fake.mean(), 100 * pr_data.is_fake.mean())
170  print("Clone", 100 * primaries.is_clone.mean(), 100 * pr_data.is_clone.mean())
171  print("Ghost", 100 * primaries.is_ghost.mean(), 100 * pr_data.is_ghost.mean())
172  print("Fitted", 100 * primaries.is_fitted.mean(), 100 * pr_data.is_fitted.mean())
173  print("Found", 100 * primaries_mc.is_matched.mean(), 100 * mc_data.is_matched.mean())
174  print("Found2", 100.0 - 100 * primaries_mc.is_missing.mean(), 100.0 - 100 * mc_data.is_missing.mean())
175  print("Merged", 100 * primaries_mc.is_merged.mean(), 100 * mc_data.is_merged.mean())
176  print("Hit-Eff", 100 * primaries_mc.hit_efficiency.mean(), 100 * mc_data.hit_efficiency.mean())
177  print("Wrong Hits", primaries.number_of_wrong_hits.mean(), pr_data.number_of_wrong_hits.mean())
178 
179  def append_to_dataframe(self, df):
180  """Append the main results to a already consisting dataframe."""
181  result = {"finding_efficiency": self.finding_efficiencyfinding_efficiency,
182  "hit_efficiency": self.hit_efficiencyhit_efficiency,
183  "clone_rate": self.clone_rateclone_rate,
184  "fake_rate": self.fake_ratefake_rate,
185  "file_name": self.filenamefilename}
186  if self.additional_informationadditional_information:
187  result.update(self.additional_informationadditional_information)
188  return df.append(result, ignore_index=True)
additional_information
the additional information
Definition: root_handler.py:57
def plot(self, data_x, data_y, loc=4, yerr=None)
def plot_hit_efficiency(self, data=None)
def grouped_by_pt_data(self, mc_data=None)
def __init__(self, filename, label=None, color_index=0, additional_information=None)
Definition: root_handler.py:18
def plot_finding_efficiency(self, data=None)
finding_efficiency
the finding efficiency
Definition: root_handler.py:47
def from_calculations(calculations, key="output_file_name", parameter_part=None)
Definition: root_handler.py:60