Belle II Software  release-08-01-10
evalPriors.py
1 #!/usr/bin/env python3
2 
3 
10 
11 # torch imports
12 import torch
13 from torch.nn.functional import one_hot
14 
15 # other imports
16 import numpy as np
17 import uproot3 as ur
18 from sklearn.preprocessing import PolynomialFeatures
19 
20 # Model
21 from priorDataLoaderAndModel import PriorModel
22 
23 # dataloader
24 
25 
26 def data_load(data: np.array) -> torch.FloatTensor:
27  """
28  Creates a dataset in the format that is required by the model for processing.
29 
30  Parameters:
31  data(np.array): A 2D numpy array containing cos(theta) as the first column and momentum as the second column.
32 
33  Returns:
34  A torch tensor containing second order polynomial feature transformation of the provided data along with the
35  additional transverse momentum.
36  """
37  x = np.sin(np.arccos(data[:, 0])) * data[:, 1]
38  x = np.hstack((data, x.reshape(-1, 1)))
39  pf = PolynomialFeatures(2, include_bias=False)
40  x = pf.fit_transform(x)
41  return torch.FloatTensor(x)
42 
43 
44 # scaling for calibration
45 def scaling(dat: str):
46  """
47  Creates the temperature scaling object for calibration.
48 
49  Parameters:
50  dat(str): The path to the scaling file generated during training.
51 
52  Returns:
53  The scaling class to transform the output predictions from the model.
54  """
55  # TemperatureScaling imported within the scaling function as it is not
56  # included with basf2 and calibration is not mandatory so it can be
57  # avoided if calibration is not required.
58  from netcal.scaling import TemperatureScaling
59 
60  data = ur.open(dat)["scale"].pandas.df()
61  pdg_list = list(data.columns)
62  pdg_list.remove("truth")
63  sorted_list = np.sort([e[:-4] for e in pdg_list])
64  y = data[sorted_list[0] + "_pdg"].values.reshape(-1, 1)
65  for i in range(len(sorted_list) - 1):
66  y = np.hstack((y, data[sorted_list[i + 1] + "_pdg"].values.reshape(-1, 1)))
67  temp = TemperatureScaling()
68  temp.fit(y, one_hot(torch.LongTensor(data["truth"].values)).numpy())
69  return temp
70 
71 
72 class Priors:
73  """
74  Class to calculate PID prior probabilities and posteriors.
75 
76  Attributes:
77  model(PriorModel): The trained model to be used for evaluation.
78  plist(np.array): List of particle PDGs for which the model was trained.
79  require_scale(bool): True if a scaling file is provided or else False.
80  scale(TemperatureScaling) (optional): Calibration object constructed for temperature scaling.
81  """
82 
83  def __init__(self, particlelist: list, Model: str, prescaling: str = None):
84  """
85  Initialize the Priors class.
86 
87  Parameters:
88  particlelist(list(int)): List of PDG values for which the model was trained.
89  Model(str): Path to a previously trained model which will be used to calculate priors.
90  prescaling(str) (optional): Path to the scaling file created while training the model.
91  """
92  model = PriorModel(len(particlelist))
93  model.load_state_dict(torch.load(Model))
94  model.eval()
95  if torch.cuda.is_available():
96  model = model.to("cuda")
97 
98  self.modelmodel = model
99  if prescaling is not None:
100  scale = scaling(prescaling)
101 
102  self.scalescale = scale
103 
104  self.require_scalerequire_scale = True
105  else:
106 
107  self.require_scalerequire_scale = False
108 
109  self.plistplist = np.sort(particlelist)
110 
111  def calculate_priors(self, momentum: np.array, cosTheta: np.array):
112  """
113  Calculates priors for given momentum and cos(theta).
114 
115  Parameters:
116  momentum(np.array): A numpy array containing the momentum of particles.
117  cosTheta(np.array): A numpy array containing the cosTheta information of particles.
118 
119  Returns:
120  None.
121  """
122  y = data_load(np.hstack((cosTheta.reshape(-1, 1), momentum.reshape(-1, 1))))
123  if torch.cuda.is_available():
124  y = y.to("cuda")
125  out = self.modelmodel(y)
126  if torch.cuda.is_available():
127  out = out.to("cpu")
128  out = out.detach().numpy()
129 
130  if self.require_scalerequire_scale:
131  out = self.scalescale.transform(out)
132 
133  self.priorprior = out
134 
135  def get_priors(self, pdg: int = None) -> np.array:
136  """
137  Gives the calculated PID priors.
138 
139  Parameters:
140  pdg(int) (optional): The PDG value of the particles for which prior probabilities are needed.
141 
142  Returns:
143  A 1D array conatining prior probabilities for required particle in case PDG value is specified;
144  else it will return a 2D array for all particles that were used during training.
145  """
146  if pdg is not None:
147  index = np.where(self.plistplist == pdg)[0][0]
148  return self.priorprior[:, index]
149  else:
150  return self.priorprior
151 
152  def get_posterior(self, pid: int, pdg: int = None) -> np.array:
153  """
154  Get PID posterior probabilities.
155 
156  Parameters:
157  pid(np.array): The PID values for the particles used during training process arranged in ascending order of PDG values.
158  pdg(int) (optional): PDG value of particle for which posterior is required.
159 
160  Returns:
161  A 1D array of posterior probabilities in case PDG value is provided else returns a 2D array containing
162  the posteriors for all particles.
163  """
164  priorpid = np.multiply(self.priorprior, pid)
165  sumprpid = np.sum(priorpid, axis=1)
166  posterior = np.divide(priorpid, sumprpid.reshape(-1, 1))
167  if pdg is None:
168  return posterior
169  else:
170  index = np.where(self.plistplist == pdg)[0][0]
171  return posterior[:, index]
def __init__(self, list particlelist, str Model, str prescaling=None)
Definition: evalPriors.py:83
def calculate_priors(self, np.array momentum, np.array cosTheta)
Definition: evalPriors.py:111
require_scale
True if the scaling object exist.
Definition: evalPriors.py:104
model
The torch model for prior calculation.
Definition: evalPriors.py:98
prior
Numpy array containing PID prior probability data.
Definition: evalPriors.py:133
plist
Sorted particle PDG list.
Definition: evalPriors.py:109
np.array get_posterior(self, int pid, int pdg=None)
Definition: evalPriors.py:152
scale
Temperature scaling object for calibration.
Definition: evalPriors.py:102
np.array get_priors(self, int pdg=None)
Definition: evalPriors.py:135