22 """ Let's assume we have written our own classifier (or installed something from github) """
25 """ Just print the passed parameters """
26 print(my_fancy_parameters)
29 """ Our method is so good, it doesn't even have to look at the data! """
33 """ Always return 1, this will boost our signal efficiency to the max """
34 return np.ones(len(X))
40def get_model(number_of_features, number_of_spectators, number_of_events, training_fraction, parameters):
42 This is the first function which
is called.
43 It must
return a python object representing your method
in memory, this object will be passed to all other hook functions.
44 In this case we
return our FancyClassifier object.
45 @param number_of_features the total number of features
46 @param number_of_spectators the total number of spectators
47 @param number_of_events the total number of events
48 @param training_fraction the signal fraction
in the training (
if you do a classification, otherwise the number
is meaningless)
49 @param parameters a python object which
is created
from a json string the user can
pass via the m_config argument
54def begin_fit(state, Xtest, Stest, ytest, wtest, nBatches):
56 Is called once per training after get_model.
57 You can initialize your training here.
58 In addition a validation sample is passed, which you can use during the training (
if the user set m_training_fraction != 1.0)
59 @param state the
return value of get_model
60 @param Xtest numpy array containing the features of the validation sample
61 @param Stest numpy array containing the spectators of the validation sample
62 @param ytest numpy array containing the target values of the validation sample
63 @param wtest numpy array containing the weights of the validation sample
64 @param nBatches int containing the number of batches that will be passed to partial_fit
in each epoch.
66 Since our method does
not support out-of-core fitting, the usual thing
is to add
67 some arrays which collect the data passed to partial_fit.
68 Our method doesn
't use the validation sample either, so we just don't use it.
75def partial_fit(state, X, S, y, w, epoch, batch):
77 Can be called multiple times per training depending on the user configuration:
78 If m_nIterations == 1 and m_mini_batch_size == 0 (these are the default values)
79 partial_fit
is called once
with the complete training data
80 If m_nIterations == 1
and m_mini_batch_size != 0
81 partial_fit
is called multiple times
with only a subset of the training data of the desired size,
82 until the complete dataset was streamed via partial_fit
83 If m_nIterations > 1
and m_mini_batch_size == 0
84 partial_fit
is called multiple times, each time
with the complete training data
85 If m_nIterations > 1
and m_mini_batch_size != 0
86 partial_fit
is called multiple times
with only a subset of the training data of the desired size,
87 until the complete dataset was streamed m_nIterations times
89 partial_fit
is called multiple times until partial_fit returns
False
90 As soon
as partial_fit returns
False the streaming of data
is stopped.
91 @param state the
return value of begin_fit
92 @param X numpy array containing the features of the training sample
93 @param S numpy array containing the spectators of the training sample
94 @param y numpy array containing the target values of the training sample
95 @param w numpy array containing the weights of the training sample
96 @param epoch the index of the current iteration through the total data set.
97 @param batch the index of the current mini batch passed to partial_fit
99 Since our method doesn
't use the streaming capability,
100 we just collect the data in our state object.
109 Is called once per training.
110 Here you can finish the training.
111 You must return a pickable object, which
is saved
in the weightfile,
112 later you must be able to create your estimator
from this pickled object
in the load function hook (see below).
113 @param state the
return value of begin_fit
115 We can fit our method here. And since our state object
is pickable,
116 we can just
return it. You might want to use better mechanism
in a real world example,
117 you can look at the implementations of the other methods (like tensorflow) how to save models
118 to files, read them
and return them
as a pickable object.
120 state.fit(state.X, state.y)
121 pickable_object_for_weightfile = state
122 return pickable_object_for_weightfile
125def feature_importance(state):
127 Called after end_fit.
128 Should return a list containing the feature importances.
129 The feature importances are saved
in the weightfile
and can be read out by the user.
130 If your method doesn
't support feature importance estimation return an empty list.
135def load(pickable_object_from_weightfile):
138 @param obj the
return value of end_fit, which was loaded
from the weightfile
and unpickled
139 This should
return again a state object, which
is passed to apply later.
141 In our case we directly pickled the state, so there
's nothing to do here.
142 In a real world scenario you might have to create files on disk in a temporary directory
143 and recreate your estimator
from them. You can look at other methods (like tensorflow) how this
is done.
145 state = pickable_object_from_weightfile
151 Is called once per inference.
152 Should return a numpy array
with the predicted values.
153 You have to make sure that the numpy array has the correct format (32bit float, C-style ordering)!
154 The last line
in this function takes care of this, I strongly recommend to keep this line!
155 @param state the
return value of load
156 @param X numpy array containing the features
for which a prediction should be returned
159 return np.require(p, dtype=np.float32, requirements=[
'A',
'W',
'C',
'O'])
162if __name__ ==
"__main__":
164 We written all the necessary hooks, now we can call the mva framework as usual.
165 Other Python-based frameworks like sklearn, tensorflow, xgboost, ... have predefined hooks,
166 but you can overwrite all of them.
168 from basf2
import conditions, find_file
170 conditions.testing_payloads = [
171 'localdb/database.txt'
175 variables = [
'M',
'p',
'pt',
'pz',
176 'daughter(0, p)',
'daughter(0, pz)',
'daughter(0, pt)',
177 'daughter(1, p)',
'daughter(1, pz)',
'daughter(1, pt)',
178 'daughter(2, p)',
'daughter(2, pz)',
'daughter(2, pt)',
179 'chiProb',
'dr',
'dz',
180 'daughter(0, dr)',
'daughter(1, dr)',
181 'daughter(0, dz)',
'daughter(1, dz)',
182 'daughter(0, chiProb)',
'daughter(1, chiProb)',
'daughter(2, chiProb)',
183 'daughter(0, kaonID)',
'daughter(0, pionID)',
184 'daughterInvM(0, 1)',
'daughterInvM(0, 2)',
'daughterInvM(1, 2)']
186 train_file = find_file(
"mva/train_D0toKpipi.root",
"examples")
187 test_file = find_file(
"mva/test_D0toKpipi.root",
"examples")
189 training_data = basf2_mva.vector(train_file)
190 testing_data = basf2_mva.vector(test_file)
192 general_options = basf2_mva.GeneralOptions()
193 general_options.m_datafiles = training_data
194 general_options.m_treename =
"tree"
195 general_options.m_identifier =
"MyFancyModel"
196 general_options.m_variables = basf2_mva.vector(*variables)
197 general_options.m_target_variable =
"isSignal"
202 python_options = basf2_mva.PythonOptions()
206 python_options.m_framework =
"custom"
212 python_options.m_steering_file =
"mva/examples/python/how_to_use_arbitrary_methods.py"
218 config_string = json.dumps({
'A':
'Python',
'Dictionary':
'With Parameters',
'And': [
'A List']})
219 print(
"The json config string", config_string)
220 python_options.m_config = config_string
224 python_options.m_training_fraction = 0.7
229 python_options.m_normalize =
False
234 python_options.m_nIterations = 1
235 python_options.m_mini_batch_size = 0
239 basf2_mva.teacher(general_options, python_options)
247 p, t = method.apply_expert(testing_data, general_options.m_treename)
252 print(
"Custom Method", auc)
def calculate_auc_efficiency_vs_background_retention(p, t, w=None)
def __init__(self, *my_fancy_parameters)