release-08-01-10/doxygen/NN__filter__module_8py_source.html

 import os

 import numpy as np

 import pandas as pd


 from collections import defaultdict

 import basf2 as b2

 from ROOT import Belle2

 from ROOT.Belle2 import DBAccessorBase, DBStoreEntry


 from smartBKG import TOKENIZE_DICT, PREPROC_CONFIG, MODEL_CONFIG


 def check_status_bit(status_bit):

     """

     Returns True if conditions are satisfied (not an unusable particle)

     """

     return (

         (status_bit & 1 << 4 == 0) &  # IsVirtual

         (status_bit & 1 << 5 == 0) &  # Initial

         (status_bit & 1 << 6 == 0) &  # ISRPhoton

         (status_bit & 1 << 7 == 0)  # FSRPhoton

     )


 class NNFilterModule(b2.Module):

     """

     Goals:

        1. Build a graph from an event composed of MCParticles

        2. Apply the well-trained model for reweighting or sampling method to get a score

        3. Execute reweighting or sampling process to get a weight


     Arguments:

        model_file(str): Path to saved model

        model_config(dict): Parameters to build the model

        preproc_config(dict): Parameters to provide information for preprocessing

        threshold(float): Threshold for event selection using reweighting method, value *None* indicating sampling mehtod

        extra_info_var(str): Name of eventExtraInfo to save model prediction to

        global_tag(str): Tag in ConditionDB where the well trained model was stored

        payload(str): Payload for the well trained model in global tag


     Returns:

        Pass or rejected according to random sampling or selection with the given threshold


     Note:

         Score after the NN filter indicating the probability of the event to pass is saved

         under ``EventExtraInfo.extra_info_var``.


         Use ``eventExtraInfo(extra_info_var)`` in ``modularAnalysis.variablesToNtuple`` or

         ``additionalBranches=["EventExtraInfo"]`` in ``mdst.add_mdst_output`` to have access to the scores.

     """


     def __init__(

         self,

         model_file=None,

         model_config=MODEL_CONFIG,

         preproc_config=PREPROC_CONFIG,

         threshold=None,

         extra_info_var="NN_prediction",

         global_tag="SmartBKG_GATGAP",

         payload="GATGAPgen.pth"

     ):

         """

         Initialise the class.

         :param model_file:  TODO

         :param model_config:  TODO

         :param preproc_config:  TODO

         :param threshold:  TODO

         :param extra_info_var:  TODO

         :param global_tag:  TODO

         :param payload:  TODO

         """

         super().__init__()


         self.model_filemodel_file = model_file


         self.model_configmodel_config = model_config


         self.preproc_configpreproc_config = preproc_config


         self.thresholdthreshold = threshold


         self.extra_info_varextra_info_var = extra_info_var


         self.payloadpayload = payload


         # set additional database conditions for trained neural network

         b2.conditions.prepend_globaltag(global_tag)


     def initialize(self):

         """

         Initialise module before any events are processed

         """

         import torch

         from smartBKG.models.gatgap import GATGAPModel


         DEVICE = torch.device("cpu")


         # read trained model parameters from

         if not self.model_filemodel_file:

             accessor = DBAccessorBase(DBStoreEntry.c_RawFile, self.payloadpayload, True)

             self.model_filemodel_file = accessor.getFilename()

         trained_parameters = torch.load(self.model_filemodel_file, map_location=DEVICE)


         self.modelmodel = GATGAPModel(**self.model_configmodel_config)

         self.modelmodel.load_state_dict(trained_parameters['model_state_dict'])


         self.EventExtraInfoEventExtraInfo = Belle2.PyStoreObj('EventExtraInfo')

         if not self.EventExtraInfoEventExtraInfo.isValid():

             self.EventExtraInfoEventExtraInfo.registerInDataStore()


         self.gen_varsgen_vars = defaultdict(list)


         self.out_featuresout_features = self.preproc_configpreproc_config['features']

         if 'PDG' in self.preproc_configpreproc_config['features']:

             self.out_featuresout_features.remove('PDG')


     def event(self):

         """

         Collect information from database, build graphs, make predictions and select through sampling or threshold

         """

         import torch

         # Initialize for every event

         self.gen_varsgen_vars.clear()


         # Need to create the eventExtraInfo entry for each event

         self.EventExtraInfoEventExtraInfo.create()


         mcplist = Belle2.PyStoreArray("MCParticles")


         array_indices = []

         mother_indices = []


         for i, mcp in enumerate(mcplist):

             if mcp.isPrimaryParticle():

                 # Check mc particle is useable

                 if not check_status_bit(mcp.getStatus()):

                     continue


                 prodTime = mcp.getProductionTime()

                 # record the production time of root particle for the correction of jitter

                 if i == 0:

                     root_prodTime = prodTime

                 prodTime -= root_prodTime


                 four_vec = mcp.get4Vector()

                 prod_vec = mcp.getProductionVertex()


                 # build generated variables as node features

                 self.gen_varsgen_vars['prodTime'].append(prodTime)

                 self.gen_varsgen_vars['energy'].append(mcp.getEnergy())

                 self.gen_varsgen_vars['x'].append(prod_vec.x())

                 self.gen_varsgen_vars['y'].append(prod_vec.y())

                 self.gen_varsgen_vars['z'].append(prod_vec.z())

                 self.gen_varsgen_vars['px'].append(four_vec.Px())

                 self.gen_varsgen_vars['py'].append(four_vec.Py())

                 self.gen_varsgen_vars['pz'].append(four_vec.Pz())

                 self.gen_varsgen_vars['PDG'].append(

                     TOKENIZE_DICT[int(mcp.getPDG())]

                 )


                 # Particle level cutting

                 df = pd.DataFrame(self.gen_varsgen_vars).tail(1)

                 df.query(" and ".join(self.preproc_configpreproc_config["cuts"]), inplace=True)

                 if df.empty:

                     for values in self.gen_varsgen_vars.values():

                         values.pop()

                     continue


                 # Collect indices for graph

                 array_indices.append(mcp.getArrayIndex())

                 mother = mcp.getMother()

                 if mother:

                     mother_indices.append(mother.getArrayIndex())

                 else:

                     mother_indices.append(0)


         graph = self.build_graphbuild_graph(

             array_indices=array_indices, mother_indices=mother_indices,

             PDGs=self.gen_varsgen_vars['PDG'], Features=[self.gen_varsgen_vars[key] for key in self.out_featuresout_features],

             symmetrize=True, add_self_loops=True

         )


         # Output pass probability

         pred = torch.sigmoid(self.modelmodel(graph)).detach().numpy().squeeze()


         # Save the pass probability to EventExtraInfo

         self.EventExtraInfoEventExtraInfo.addExtraInfo(self.extra_info_varextra_info_var, pred)


         # Module returns bool of whether prediciton passes threshold for use in basf2 path flow control

         if self.thresholdthreshold:

             self.return_value(int(pred >= self.thresholdthreshold))

         else:

             self.return_value(int(pred >= np.random.rand()))


     def mapped_mother_indices(self, array_indices, mother_indices):

         """

         Map the mother indices to an enumerated list. The one-hot encoded version

         of that list then corresponds to the adjacency matrix.


         Example:

            >>> mapped_mother_indices(

            ...    [0, 1, 3, 5, 6, 7, 8, 9, 10],

            ...    [0, 0, 0, 1, 1, 1, 5, 5, 7]

            ... )

            [0, 0, 0, 1, 1, 1, 3, 3, 5]


         Args:

            array_indices: list or array of indices. Each index has to be unique.

            mother_indices: list or array of mother indices.


         Returns:

            List of mapped indices

         """

         idx_dict = {v: i for i, v in enumerate(array_indices)}

         return [idx_dict[m] for m in mother_indices]


     def build_graph(self, array_indices, mother_indices, PDGs, Features,

                     symmetrize=True, add_self_loops=True):

         """

         Build graph from preprocessed particle information

         """

         import torch

         import dgl

         os.environ["DGLBACKEND"] = "pytorch"


         # Build adjacency mapping

         adjacency = self.mapped_mother_indicesmapped_mother_indices(array_indices, mother_indices)


         # Build graph

         src = adjacency

         dst = np.arange(len(src))

         src_new, dst_new = src, dst

         if symmetrize:

             src_new, dst_new = (

                 np.concatenate([src, dst]),

                 np.concatenate([dst, src])

             )

         # remove self-loops (the Y(4S)) to avoid duplicated self loops

         src_new, dst_new = map(

             np.array, zip(*[(s, d) for s, d in zip(src_new, dst_new) if not s == d])

         )

         if add_self_loops:

             src_new, dst_new = (

                 np.concatenate([src_new, dst]),

                 np.concatenate([dst_new, dst])

             )

         graph = dgl.graph((src_new, dst_new))

         graph.ndata["x_pdg"] = torch.tensor(PDGs, dtype=torch.int32)

         graph.ndata["x_feature"] = torch.tensor(np.transpose(Features), dtype=torch.float32)


         return graph

Belle2::PyStoreArray
A (simplified) python wrapper for StoreArray.
Definition: PyStoreArray.h:72

Belle2::PyStoreObj
a (simplified) python wrapper for StoreObjPtr.
Definition: PyStoreObj.h:67

smartBKG.NN_filter_module.NNFilterModule
Definition: NN_filter_module.py:32

smartBKG.NN_filter_module.NNFilterModule.build_graph
def build_graph(self, array_indices, mother_indices, PDGs, Features, symmetrize=True, add_self_loops=True)
Definition: NN_filter_module.py:228

smartBKG.NN_filter_module.NNFilterModule.threshold
threshold
TODO.
Definition: NN_filter_module.py:87

smartBKG.NN_filter_module.NNFilterModule.model_file
model_file
TODO.
Definition: NN_filter_module.py:81

smartBKG.NN_filter_module.NNFilterModule.model
model
model with trained parameters
Definition: NN_filter_module.py:112

smartBKG.NN_filter_module.NNFilterModule.out_features
out_features
node features
Definition: NN_filter_module.py:123

smartBKG.NN_filter_module.NNFilterModule.model_config
model_config
TODO.
Definition: NN_filter_module.py:83

smartBKG.NN_filter_module.NNFilterModule.gen_vars
gen_vars
generated variables
Definition: NN_filter_module.py:121

smartBKG.NN_filter_module.NNFilterModule.__init__
def __init__(self, model_file=None, model_config=MODEL_CONFIG, preproc_config=PREPROC_CONFIG, threshold=None, extra_info_var="NN_prediction", global_tag="SmartBKG_GATGAP", payload="GATGAPgen.pth")
Definition: NN_filter_module.py:68

smartBKG.NN_filter_module.NNFilterModule.initialize
def initialize(self)
Definition: NN_filter_module.py:96

smartBKG.NN_filter_module.NNFilterModule.extra_info_var
extra_info_var
TODO.
Definition: NN_filter_module.py:89

smartBKG.NN_filter_module.NNFilterModule.preproc_config
preproc_config
TODO.
Definition: NN_filter_module.py:85

smartBKG.NN_filter_module.NNFilterModule.mapped_mother_indices
def mapped_mother_indices(self, array_indices, mother_indices)
Definition: NN_filter_module.py:205

smartBKG.NN_filter_module.NNFilterModule.payload
payload
TODO.
Definition: NN_filter_module.py:91

smartBKG.NN_filter_module.NNFilterModule.event
def event(self)
Definition: NN_filter_module.py:127

smartBKG.NN_filter_module.NNFilterModule.EventExtraInfo
EventExtraInfo
StoreArray to save weights to.
Definition: NN_filter_module.py:116