Belle II Software development
root_handler.py
1
8
9import numpy as np
10import uproot
11
12import pandas as pd
13
14
16 """This class represents a loaded validation root file. It has methods for plotting the typically needed graphs."""
17
18 def __init__(self, filename, label=None, color_index=0, additional_information=None):
19 """Create a new validation result from the given filename.
20 Additional options for plotting (e.g. color or label) can be given."""
21
22 self.filename = filename
23
24 self.pr_data = uproot.open(self.filename)["pr_tree/pr_tree"].arrays(library="pd")
25 self.pr_data["is_prompt"] = (
26 np.sqrt(self.pr_data.x_truth ** 2 + self.pr_data.y_truth ** 2) < 0.5) & (self.pr_data.is_primary == 1)
27
28 self.mc_data = uproot.open(self.filename)["mc_tree/mc_tree"].arrays(library="pd")
29 self.mc_data["is_prompt"] = (
30 np.sqrt(self.mc_data.x_truth ** 2 + self.mc_data.y_truth ** 2) < 0.5) & (self.mc_data.is_primary == 1)
31
32
33 self.mc_prompts = self.mc_data[self.mc_data.is_prompt == 1]
34
35 self.pr_prompts = self.pr_data[self.pr_data.is_prompt == 1]
36
37 import seaborn as sb
38
39 colors = sb.color_palette()
40
41
42 self.label = label
43
44 self.color = colors[color_index % len(colors)]
45
46
48
49 self.hit_efficiency = None
50
51 self.fake_rate = None
52
53 self.clone_rate = None
55
56
57 self.additional_information = additional_information
58
59 @staticmethod
60 def from_calculations(calculations, key="output_file_name", parameter_part=None):
61 """Create validation results from an ipython calculation."""
62 if parameter_part:
63 return [
65 c.get(key),
66 label=c.get_parameters()[parameter_part],
67 color_index=i) for i,
68 c in enumerate(calculations)]
69 else:
70 return [
72 c.get(key),
73 label=c.get_parameters(),
74 color_index=i) for i,
75 c in enumerate(calculations)]
76
78 """Return the figures of merit from the file. Mostly used for internal setting of the properties."""
79 if self.finding_efficiency is None:
80 overview = uproot.open(
81 self.filename)["ExpertMCSideTrackingValidationModule_overview_figures_of_merit"].arrays(
82 library="pd")
83 self.finding_efficiency = overview.finding_efficiency[0]
84 self.hit_efficiency = overview.hit_efficiency[0]
85
86 overview = uproot.open(
87 self.filename)["ExpertPRSideTrackingValidationModule_overview_figures_of_merit"].arrays(
88 library="pd")
89 self.clone_rate = overview.clone_rate[0]
90 self.fake_rate = overview.fake_rate[0]
91
92 return dict(finding_efficiency=self.finding_efficiency,
93 hit_efficiency=self.hit_efficiency,
94 clone_rate=self.clone_rate,
95 fake_rate=self.fake_rate)
96
98 """Print out the figures of merit as a LaTeX-ready table."""
99 results = self.get_figure_of_merits()
100
101 latex_string = r'\begin{table}' + "\n"
102 latex_string += r' \begin{tabular}{cc} \toprule' + "\n"
103 latex_string += r' & \\ \midrule' + "\n"
104 latex_string += r' Finding Efficiency & ' + f"{100 * results['finding_efficiency']:.2f}" + r' \% \\' + "\n"
105 latex_string += r' Hit Efficiency & ' + f"{100 * results['hit_efficiency']:.2f}" + r' \% \\' + "\n"
106 latex_string += r' Fake Rate & ' + f"{100 * results['fake_rate']:.2f}" + r' \% \\' + "\n"
107 latex_string += r' Clone Rate & ' + f"{100 * results['clone_rate']:.2f}" + r' \% \\ \bottomrule' + "\n"
108 latex_string += r' \end{tabular}' + "\n"
109 latex_string += r'\end{table}'
110
111 return latex_string
112
114 """Plot a point in the finding-efficiency/hit-efficiency plane."""
115 import matplotlib.pyplot as plt
116 self.plot(100 * self.finding_efficiency, 100 * self.hit_efficiency, loc=3)
117 plt.xlabel("finding efficiency")
118 plt.ylabel("hit efficiency")
119
120 def grouped_by_pt_data(self, mc_data=None):
121 """Convenience function to return the input data (or the internal mc_data) grouped by pt."""
122 if mc_data is None:
123 mc_data = self.mc_data
124
125 pt_values = pd.cut(mc_data.pt_truth, np.linspace(mc_data.pt_truth.min(), mc_data.pt_truth.max(), 10))
126 grouped = mc_data.groupby(pt_values)
127
128 return grouped
129
130 def plot(self, data_x, data_y, loc=4, yerr=None):
131 """Plot data_y over data_x with the correct settings for this result. Mostly used internally."""
132 import matplotlib.pyplot as plt
133 if yerr is not None:
134 plt.errorbar(data_x, data_y, ls="-", marker="o",
135 color=self.color, label=self.label, yerr=yerr, lw=4)
136 else:
137 plt.plot(data_x, data_y, ls="-", marker="o",
138 color=self.color, label=self.label, lw=4)
139
140 if self.label is not None:
141 plt.legend(loc=loc, frameon=True)
142
143 def plot_finding_efficiency(self, data=None):
144 """Plot the finding efficiency over pt."""
145 import matplotlib.pyplot as plt
146 grouped = self.grouped_by_pt_data(data)
147
148 self.plot(grouped.median().pt_truth, grouped.mean().is_matched, yerr=1 / np.sqrt(grouped.count().is_matched))
149 plt.xlabel(r"$p_T$ of the MC tracks (in GeV)")
150 plt.ylabel("Finding Efficiency")
151
152 def plot_hit_efficiency(self, data=None):
153 """Plot the hit efficiency over pt."""
154 import matplotlib.pyplot as plt
155 grouped = self.grouped_by_pt_data(data)
156
157 self.plot(grouped.median().pt_truth, grouped.mean().hit_efficiency, yerr=1 / np.sqrt(grouped.sum().mc_number_of_hits))
158 plt.xlabel(r"$p_T$ of the MC tracks (in GeV)")
159 plt.ylabel("Hit Efficiency")
160
162 """Print mostfully useful information about this result."""
163 pr_data = self.pr_data
164 mc_data = self.mc_data
165 primaries = pr_data[self.pr_data.is_prompt == 1]
166 primaries_mc = mc_data[self.mc_data.is_prompt == 1]
167
168 print(self.label)
169 print("Fake", 100 * primaries.is_fake.mean(), 100 * pr_data.is_fake.mean())
170 print("Clone", 100 * primaries.is_clone.mean(), 100 * pr_data.is_clone.mean())
171 print("Ghost", 100 * primaries.is_ghost.mean(), 100 * pr_data.is_ghost.mean())
172 print("Fitted", 100 * primaries.is_fitted.mean(), 100 * pr_data.is_fitted.mean())
173 print("Found", 100 * primaries_mc.is_matched.mean(), 100 * mc_data.is_matched.mean())
174 print("Found2", 100.0 - 100 * primaries_mc.is_missing.mean(), 100.0 - 100 * mc_data.is_missing.mean())
175 print("Merged", 100 * primaries_mc.is_merged.mean(), 100 * mc_data.is_merged.mean())
176 print("Hit-Eff", 100 * primaries_mc.hit_efficiency.mean(), 100 * mc_data.hit_efficiency.mean())
177 print("Wrong Hits", primaries.number_of_wrong_hits.mean(), pr_data.number_of_wrong_hits.mean())
178
179 def append_to_dataframe(self, df):
180 """Append the main results to a already consisting dataframe."""
181 result = {"finding_efficiency": self.finding_efficiency,
182 "hit_efficiency": self.hit_efficiency,
183 "clone_rate": self.clone_rate,
184 "fake_rate": self.fake_rate,
185 "file_name": self.filename}
187 result.update(self.additional_information)
188 return df.append(result, ignore_index=True)
additional_information
the additional information
Definition: root_handler.py:57
def plot(self, data_x, data_y, loc=4, yerr=None)
def plot_hit_efficiency(self, data=None)
def grouped_by_pt_data(self, mc_data=None)
def __init__(self, filename, label=None, color_index=0, additional_information=None)
Definition: root_handler.py:18
def plot_finding_efficiency(self, data=None)
finding_efficiency
the finding efficiency
Definition: root_handler.py:47
def from_calculations(calculations, key="output_file_name", parameter_part=None)
Definition: root_handler.py:60
Definition: plot.py:1