Belle II Software development
how_to_wrap_your_existing_training_into_a_weightfile.py
1#!/usr/bin/env python3
2
3
10
11# In some cases you create a training outside of basf2
12# but you still want to apply the training in basf2
13# In this case you can create a fake training which produces a weightfile
14# which you can upload to the database, by overriding end_fit
15# In addition you have to override load and apply, so that the mva package
16# knows how to apply your custom training
17
18
19import numpy as np
20import basf2_mva
21import basf2_mva_util
22
23
24def get_model(number_of_features, number_of_spectators, number_of_events, training_fraction, parameters):
25 """ Must be implemented otherwise custom framework will raise an error """
26 return None
27
28
29def begin_fit(state, Xtest, Stest, ytest, wtest, nBatches):
30 """ Must be implemented otherwise custom framework will raise an error """
31 return state
32
33
34def partial_fit(state, X, S, y, w, epoch, batch):
35 """ Must be implemented otherwise custom framework will raise an error """
36 return True
37
38
39def feature_importance(state):
40 """ Must be implemented otherwise custom framework will raise an error """
41 return []
42
43
44def end_fit(state):
45 """
46 In end_fit you want to load your training and pass it as a picklable object to basf.
47 You do not perform a real training, so your training data can be empty (or a small file).
48 Since you do not fit anything here the training data is just ignored.
49 However, you want to ensure that the features you are using (and which you will need later)
50 are correctly set in the GeneralOptions.
51 """
52 import pickle
53 with open('mytraining.pickle', 'rb') as f:
54 my_pickled_classifier = pickle.load(f)
55 return my_pickled_classifier
56
57
58def load(my_pickled_classifier):
59 """
60 The load function receives your pickled_classifier.
61 So maybe you want to initialize your actual classifier here
62 using this pickled classifier information.
63 In this example I just pass it along
64 """
65 state = my_pickled_classifier
66 return state
67
68
69def apply(state, X):
70 """
71 In apply you will get the features you stated in the GeneralOptions.
72 In principle you can access the basf2 DataStore yourself in this function
73 and extract additional feature information for your classifier.
74 However, your method will only be work inside basf2 and not with basf2_mva_expert in this case
75 (and also you cannot use basf2_mva_evaluate.py).
76 """
77 p = state.predict_proba(X)
78 return np.require(p, dtype=np.float32, requirements=['A', 'W', 'C', 'O'])
79
80
81if __name__ == "__main__":
82 """
83 Here we create the fake training
84 We written all the necessary hooks, now we can call the mva framework as usual.
85 Other Python-based frameworks like sklearn, tensorflow, xgboost, ... have predefined hooks,
86 but you can overwrite all of them.
87 """
88 import ROOT # noqa
89 from basf2 import conditions, find_file
90 # NOTE: do not use testing payloads in production! Any results obtained like this WILL NOT BE PUBLISHED
91 conditions.testing_payloads = [
92 'localdb/database.txt'
93 ]
94
95 # First I create an external sklearn weightfile
96 from sklearn.ensemble import GradientBoostingClassifier
97 clf = GradientBoostingClassifier()
98 X = np.random.uniform(size=(1000, 3))
99 y = (np.random.uniform(size=1000) > 0.5).astype(int)
100 clf.fit(X, y)
101 import pickle
102 pickle.dump(clf, open('mytraining.pickle', 'wb'))
103
104 variables = ['M', 'p', 'pt']
105
106 # Now we need also a fake input file, we just create one
107 # the content doesn't matter, as long as the branches exist.
108 root_file = ROOT.TFile("fake_train.root", "recreate")
109 root_file.cd()
110 root_tree = ROOT.TTree('tree', 'title')
111 value = np.zeros(1, dtype=float)
112 for var in variables:
113 root_tree.Branch(var, value, var + '/F')
114 root_tree.Branch("isSignal", value, 'isSignal/F')
115 for i in range(10):
116 root_tree.Fill()
117 root_file.Write("tree")
118 root_file.Close()
119
120 # Now we write the configuration for our fake training
121 general_options = basf2_mva.GeneralOptions()
122 general_options.m_datafiles = basf2_mva.vector("fake_train.root")
123 general_options.m_treename = "tree"
124 general_options.m_identifier = "MyModel"
125 general_options.m_variables = basf2_mva.vector(*variables)
126 general_options.m_target_variable = "isSignal"
127
128 python_options = basf2_mva.PythonOptions()
129 python_options.m_framework = "custom"
130 python_options.m_steering_file = "mva/examples/python/how_to_wrap_your_existing_training_into_a_weightfile.py"
131
132 basf2_mva.teacher(general_options, python_options)
133
134 # Apply the training as usual
135 test_file = find_file("mva/test_D0toKpipi.root", "examples")
136 testing_data = basf2_mva.vector(test_file)
137
138 method = basf2_mva_util.Method(general_options.m_identifier)
139 p, t = method.apply_expert(testing_data, general_options.m_treename)
141 print("Custom Method", auc)
def calculate_auc_efficiency_vs_background_retention(p, t, w=None)