Belle II Software  release-08-01-10
priorDataLoaderAndModel.py
1 #!/usr/bin/env python3
2 
3 
10 
11 from torch.nn import Linear
12 from torch.nn import ReLU
13 from torch.nn import BatchNorm1d
14 from torch.nn.init import kaiming_uniform_
15 from torch.nn.init import xavier_uniform_
16 from torch.nn import Softmax
17 import torch
18 from torch.utils.data import Dataset
19 from torch.utils.data import random_split
20 from sklearn.preprocessing import LabelEncoder
21 from sklearn.preprocessing import PolynomialFeatures
22 import uproot3 as ur
23 import numpy as np
24 
25 
26 class PriorDataLoader(Dataset):
27  """
28  Dataloader for PID prior probability training.
29 
30  Attributes:
31  x (np.array): Array containing feature data with a second order combination of momentum, cos(theta) and transverse momentum.
32  y (np.array): Array containing the label encoded PDG values.
33 
34  """
35 
36  def __init__(self, path: str, key: str, particlelist: list, labels: list):
37  """
38  Initialize the dataloader for PID prior training.
39 
40  Parameters:
41  path (str): Path to the root file containing the data.
42  key (str): Key (i.e. path) of the tree within the root file.
43  particlelist (list(int)): List of particle PDG values for which the model has to be trained.
44  labels (str): Labels of pandas columns containing cos(theta), momentum and PDG values (in this order).
45 
46  """
47  data = ur.open(path)
48  data = data[key].pandas.df(labels)
49  df = data.dropna().reset_index(drop=True)
50  df.loc[:, labels[2]] = df.loc[:, labels[2]].abs()
51  droplist = np.setdiff1d(np.unique(df[labels[2]].values), particlelist)
52  for i in droplist:
53  df = df.drop(df.loc[df[labels[2]] == i].index).reset_index(drop=True)
54  x = df.values[:, 0:2]
55  x = np.hstack((x, (np.sin(np.arccos(x[:, 0])) * x[:, 1]).reshape(-1, 1)))
56  pol = PolynomialFeatures(2, include_bias=False)
57  x = pol.fit_transform(x)
58 
59  self.xx = x.astype("float32")
60  y = df.values[:, 2]
61  le = LabelEncoder()
62  y = le.fit_transform(y)
63 
64  self.yy = y.astype("int64")
65 
66  def __getitem__(self, index):
67  """
68  Function to get feature and label tensors at the given index location.
69 
70  Parameters:
71  index (int): The index of required tensors.
72 
73  Returns:
74  Tensors of features and labels at the given index.
75  """
76  return [self.xx[index], self.yy[index]]
77 
78  def __len__(self):
79  """
80  Function to obtain length of a tensor.
81 
82  Parameters:
83  None.
84 
85  Returns:
86  Number of feature sets.
87  """
88  return len(self.xx)
89 
90  def get_split(self, n_test: float = 0.1) -> torch.tensor:
91  """
92  Split the input data into training and validation set.
93 
94  Parameter:
95  n_test (float): Ratio of number of particles to be taken in the validation set to that of training set.
96 
97  Return:
98  A randomly split data set with the ratio given by 'n_test'.
99  """
100  test_size = round(n_test * len(self.xx))
101  train_size = len(self.xx) - test_size
102  return random_split(self, [train_size, test_size])
103 
104 
105 class PriorModel(torch.nn.Module):
106  """
107  Pytorch model for PID prior probability calculation.
108 
109  Attributes:
110  hidden1: Linear layer with 9 inputs and 128 outputs.
111  act1: An RELU activation layer.
112  hidden2: A batch normalization layer.
113  hidden3: Linear layer with 128 inputs and 64 outputs.
114  act2: An RELU activation layer.
115  hidden4: A batch normalization layer.
116  hidden5: Linear layer with 64 inputs and 32 outputs.
117  act3: An RELU activation layer.
118  hidden6: A batch normalization layer.
119  hidden7: Linear layer with 9 inputs and 128 outputs.
120  act4: A softmax activation layer.
121 
122  """
123 
124  def __init__(self, n_output: int):
125  """
126  Initialize the PID prior probability model.
127 
128  Parameter:
129  n_output (int): Number of output nodes.
130 
131  """
132  super().__init__()
133 
134  self.hidden1hidden1 = Linear(9, 128)
135  kaiming_uniform_(self.hidden1hidden1.weight, nonlinearity="relu")
136 
137  self.act1act1 = ReLU()
138 
139  self.hidden2hidden2 = BatchNorm1d(128)
140 
141  self.hidden3hidden3 = Linear(128, 64)
142  kaiming_uniform_(self.hidden3hidden3.weight, nonlinearity="relu")
143 
144  self.act2act2 = ReLU()
145 
146  self.hidden4hidden4 = BatchNorm1d(64)
147 
148  self.hidden5hidden5 = Linear(64, 32)
149  kaiming_uniform_(self.hidden5hidden5.weight, nonlinearity="relu")
150 
151  self.act3act3 = ReLU()
152 
153  self.hidden6hidden6 = BatchNorm1d(32)
154 
155  self.hidden7hidden7 = Linear(32, n_output)
156  xavier_uniform_(self.hidden7hidden7.weight)
157 
158  self.act4act4 = Softmax(dim=1)
159 
160  def forward(self, x: torch.tensor) -> torch.tensor:
161  """
162  Gives PID prior probabilities for the input features.
163 
164  Parameter:
165  x (torch.tensor): A 2D tensor containing features for a particle as a row.
166 
167  Returns:
168  A torch tensor containing PID prior probabilities for the provided features.
169  """
170  x = self.hidden1hidden1(x)
171  x = self.act1act1(x)
172  x = self.hidden2hidden2(x)
173  x = self.hidden3hidden3(x)
174  x = self.act2act2(x)
175  x = self.hidden4hidden4(x)
176  x = self.hidden5hidden5(x)
177  x = self.act3act3(x)
178  x = self.hidden6hidden6(x)
179  x = self.hidden7hidden7(x)
180  x = self.act4act4(x)
181  return x
def __init__(self, str path, str key, list particlelist, list labels)
torch.tensor get_split(self, float n_test=0.1)
hidden5
Linear layer with 64 inputs and 32 outputs.
torch.tensor forward(self, torch.tensor x)
hidden1
Linear layer with 9 inputs and 128 outputs.
hidden7
Linear layer with 32 inputs and outputs for each particle in the particlelist.
hidden3
Linear layer with 128 inputs and 64 outputs.