Belle II Software development
evalPriors.py
1#!/usr/bin/env python3
2
3
10
11# torch imports
12import torch
13from torch.nn.functional import one_hot
14
15# other imports
16import numpy as np
17import uproot3 as ur
18from sklearn.preprocessing import PolynomialFeatures
19
20# Model
21from priorDataLoaderAndModel import PriorModel
22
23# dataloader
24
25
26def data_load(data: np.array) -> torch.FloatTensor:
27 """
28 Creates a dataset in the format that is required by the model for processing.
29
30 Parameters:
31 data(np.array): A 2D numpy array containing cos(theta) as the first column and momentum as the second column.
32
33 Returns:
34 A torch tensor containing second order polynomial feature transformation of the provided data along with the
35 additional transverse momentum.
36 """
37 x = np.sin(np.arccos(data[:, 0])) * data[:, 1]
38 x = np.hstack((data, x.reshape(-1, 1)))
39 pf = PolynomialFeatures(2, include_bias=False)
40 x = pf.fit_transform(x)
41 return torch.FloatTensor(x)
42
43
44# scaling for calibration
45def scaling(dat: str):
46 """
47 Creates the temperature scaling object for calibration.
48
49 Parameters:
50 dat(str): The path to the scaling file generated during training.
51
52 Returns:
53 The scaling class to transform the output predictions from the model.
54 """
55 # TemperatureScaling imported within the scaling function as it is not
56 # included with basf2 and calibration is not mandatory so it can be
57 # avoided if calibration is not required.
58 from netcal.scaling import TemperatureScaling
59
60 data = ur.open(dat)["scale"].pandas.df()
61 pdg_list = list(data.columns)
62 pdg_list.remove("truth")
63 sorted_list = np.sort([e[:-4] for e in pdg_list])
64 y = data[sorted_list[0] + "_pdg"].values.reshape(-1, 1)
65 for i in range(len(sorted_list) - 1):
66 y = np.hstack((y, data[sorted_list[i + 1] + "_pdg"].values.reshape(-1, 1)))
67 temp = TemperatureScaling()
68 temp.fit(y, one_hot(torch.LongTensor(data["truth"].values)).numpy())
69 return temp
70
71
72class Priors:
73 """
74 Class to calculate PID prior probabilities and posteriors.
75
76 Attributes:
77 model(PriorModel): The trained model to be used for evaluation.
78 plist(np.array): List of particle PDGs for which the model was trained.
79 require_scale(bool): True if a scaling file is provided or else False.
80 scale(TemperatureScaling) (optional): Calibration object constructed for temperature scaling.
81 """
82
83 def __init__(self, particlelist: list, Model: str, prescaling: str = None):
84 """
85 Initialize the Priors class.
86
87 Parameters:
88 particlelist(list(int)): List of PDG values for which the model was trained.
89 Model(str): Path to a previously trained model which will be used to calculate priors.
90 prescaling(str) (optional): Path to the scaling file created while training the model.
91 """
92 model = PriorModel(len(particlelist))
93 model.load_state_dict(torch.load(Model))
94 model.eval()
95 if torch.cuda.is_available():
96 model = model.to("cuda")
97
98 self.model = model
99 if prescaling is not None:
100 scale = scaling(prescaling)
101
102 self.scale = scale
103
104 self.require_scale = True
105 else:
106
107 self.require_scale = False
108
109 self.plist = np.sort(particlelist)
110
111 def calculate_priors(self, momentum: np.array, cosTheta: np.array):
112 """
113 Calculates priors for given momentum and cos(theta).
114
115 Parameters:
116 momentum(np.array): A numpy array containing the momentum of particles.
117 cosTheta(np.array): A numpy array containing the cosTheta information of particles.
118
119 Returns:
120 None.
121 """
122 y = data_load(np.hstack((cosTheta.reshape(-1, 1), momentum.reshape(-1, 1))))
123 if torch.cuda.is_available():
124 y = y.to("cuda")
125 out = self.model(y)
126 if torch.cuda.is_available():
127 out = out.to("cpu")
128 out = out.detach().numpy()
129
130 if self.require_scale:
131 out = self.scale.transform(out)
132
133 self.prior = out
134
135 def get_priors(self, pdg: int = None) -> np.array:
136 """
137 Gives the calculated PID priors.
138
139 Parameters:
140 pdg(int) (optional): The PDG value of the particles for which prior probabilities are needed.
141
142 Returns:
143 A 1D array containing prior probabilities for required particle in case PDG value is specified;
144 else it will return a 2D array for all particles that were used during training.
145 """
146 if pdg is not None:
147 index = np.where(self.plist == pdg)[0][0]
148 return self.prior[:, index]
149 else:
150 return self.prior
151
152 def get_posterior(self, pid: int, pdg: int = None) -> np.array:
153 """
154 Get PID posterior probabilities.
155
156 Parameters:
157 pid(np.array): The PID values for the particles used during training process arranged in ascending order of PDG values.
158 pdg(int) (optional): PDG value of particle for which posterior is required.
159
160 Returns:
161 A 1D array of posterior probabilities in case PDG value is provided else returns a 2D array containing
162 the posteriors for all particles.
163 """
164 priorpid = np.multiply(self.prior, pid)
165 sumprpid = np.sum(priorpid, axis=1)
166 posterior = np.divide(priorpid, sumprpid.reshape(-1, 1))
167 if pdg is None:
168 return posterior
169 else:
170 index = np.where(self.plist == pdg)[0][0]
171 return posterior[:, index]
def __init__(self, list particlelist, str Model, str prescaling=None)
Definition: evalPriors.py:83
def calculate_priors(self, np.array momentum, np.array cosTheta)
Definition: evalPriors.py:111
require_scale
True if the scaling object exist.
Definition: evalPriors.py:104
model
The torch model for prior calculation.
Definition: evalPriors.py:98
prior
Numpy array containing PID prior probability data.
Definition: evalPriors.py:133
plist
Sorted particle PDG list.
Definition: evalPriors.py:109
np.array get_posterior(self, int pid, int pdg=None)
Definition: evalPriors.py:152
scale
Temperature scaling object for calibration.
Definition: evalPriors.py:102
np.array get_priors(self, int pdg=None)
Definition: evalPriors.py:135
Definition: pdg.py:1