22 """ Let's assume we have written our own classifier (or installed something from github) """
25 """ Just print the passed parameters """
26 print(my_fancy_parameters)
29 """ Our method is so good, it doesn't even have to look at the data! """
33 """ Always return 1, this will boost our signal efficiency to the max """
34 return np.ones(len(X))
40 def get_model(number_of_features, number_of_spectators, number_of_events, training_fraction, parameters):
42 This is the first function which is called.
43 It must return a python object representing your method in memory, this object will be passed to all other hook functions.
44 In this case we return our FancyClassifier object.
45 @param number_of_features the total number of features
46 @param number_of_spectators the total number of spectators
47 @param number_of_events the total number of events
48 @param training_fraction the signal fraction in the training (if you do a classification, otherwise the number is meaningless)
49 @param parameters a python object which is created from a json string the user can pass via the m_config argument
54 def begin_fit(state, Xtest, Stest, ytest, wtest):
56 Is called once per training after get_model.
57 You can initialize your training here.
58 In addition a validation sample is passed, which you can use during the training (if the user set m_training_fraction != 1.0)
59 @param state the return value of get_model
60 @param Xtest numpy array containing the features of the validation sample
61 @param Stest numpy array containing the spectators of the validation sample
62 @param ytest numpy array containing the target values of the validation sample
63 @param wtest numpy array containing the weights of the validation sample
65 Since our method does not support out-of-core fitting, the usual thing is to add
66 some arrays which collect the data passed to partial_fit.
67 Our method doesn't use the validation sample either, so we just don't use it.
74 def partial_fit(state, X, S, y, w, epoch):
76 Can be called multiple times per training depending on the user configuration:
77 If m_nIterations == 1 and m_mini_batch_size == 0 (these are the default values)
78 partial_fit is called once with the complete training data
79 If m_nIterations == 1 and m_mini_batch_size != 0
80 partial_fit is called multiple times with only a subset of the training data of the desired size,
81 until the complete dataset was streamed via partial_fit
82 If m_nIterations > 1 and m_mini_batch_size == 0
83 partial_fit is called multiple times, each time with the complete training data
84 If m_nIterations > 1 and m_mini_batch_size != 0
85 partial_fit is called multiple times with only a subset of the training data of the desired size,
86 until the complete dataset was streamed m_nIterations times
88 partial_fit is called multiple times until partial_fit returns False
89 As soon as partial_fit returns False the streaming of data is stopped.
90 @param state the return value of begin_fit
91 @param X numpy array containing the features of the training sample
92 @param S numpy array containing the spectators of the training sample
93 @param y numpy array containing the target values of the training sample
94 @param w numpy array containing the weights of the training sample
95 @param epoch the total number of previous calls to partial_fit
97 Since our method doesn't use the streaming capability,
98 we just collect the data in our state object.
107 Is called once per training.
108 Here you can finish the training.
109 You must return a pickable object, which is saved in the weightfile,
110 later you must be able to create your estimator from this pickled object in the load function hook (see below).
111 @param state the return value of begin_fit
113 We can fit our method here. And since our state object is pickable,
114 we can just return it. You might want to use better mechanism in a real world example,
115 you can look at the implementations of the other methods (like tensorflow) how to save models
116 to files, read them and return them as a pickable object.
118 state.fit(state.X, state.y)
119 pickable_object_for_weightfile = state
120 return pickable_object_for_weightfile
123 def feature_importance(state):
125 Called after end_fit.
126 Should return a list containing the feature importances.
127 The feature importances are saved in the weightfile and can be read out by the user.
128 If your method doesn't support feature importance estimation return an empty list.
133 def load(pickable_object_from_weightfile):
136 @param obj the return value of end_fit, which was loaded from the weightfile and unpickled
137 This should return again a state object, which is passed to apply later.
139 In our case we directly pickled the state, so there's nothing to do here.
140 In a real world scenario you might have to create files on disk in a temporary directory
141 and recreate your estimator from them. You can look at other methods (like tensorflow) how this is done.
143 state = pickable_object_from_weightfile
149 Is called once per inference.
150 Should return a numpy array with the predicted values.
151 You have to make sure that the numpy array has the correct format (32bit float, C-style ordering)!
152 The last line in this function takes care of this, I strongly recommend to keep this line!
153 @param state the return value of load
154 @param X numpy array containing the features for which a prediction should be returned
157 return np.require(p, dtype=np.float32, requirements=[
'A',
'W',
'C',
'O'])
160 if __name__ ==
"__main__":
162 We written all the necessary hooks, now we can call the mva framework as usual.
163 Other Python-based frameworks like sklearn, tensorflow, xgboost, ... have predefined hooks,
164 but you can overwrite all of them.
166 from basf2
import conditions
168 conditions.testing_payloads = [
169 'localdb/database.txt'
173 variables = [
'M',
'p',
'pt',
'pz',
174 'daughter(0, p)',
'daughter(0, pz)',
'daughter(0, pt)',
175 'daughter(1, p)',
'daughter(1, pz)',
'daughter(1, pt)',
176 'daughter(2, p)',
'daughter(2, pz)',
'daughter(2, pt)',
177 'chiProb',
'dr',
'dz',
178 'daughter(0, dr)',
'daughter(1, dr)',
179 'daughter(0, dz)',
'daughter(1, dz)',
180 'daughter(0, chiProb)',
'daughter(1, chiProb)',
'daughter(2, chiProb)',
181 'daughter(0, kaonID)',
'daughter(0, pionID)',
182 'daughterInvariantMass(0, 1)',
'daughterInvariantMass(0, 2)',
'daughterInvariantMass(1, 2)']
184 general_options = basf2_mva.GeneralOptions()
185 general_options.m_datafiles = basf2_mva.vector(
"train.root")
186 general_options.m_treename =
"tree"
187 general_options.m_identifier =
"MyFancyModel"
188 general_options.m_variables = basf2_mva.vector(*variables)
189 general_options.m_target_variable =
"isSignal"
194 python_options = basf2_mva.PythonOptions()
198 python_options.m_framework =
"custom"
204 python_options.m_steering_file =
"mva/examples/python/how_to_use_arbitrary_methods.py"
210 config_string = json.dumps({
'A':
'Python',
'Dictionary':
'With Parameters',
'And': [
'A List']})
211 print(
"The json config string", config_string)
212 python_options.m_config = config_string
216 python_options.m_training_fraction = 0.7
221 python_options.m_normalize =
False
226 python_options.m_nIterations = 1
227 python_options.m_mini_batch_size = 0
231 basf2_mva.teacher(general_options, python_options)
239 p, t = method.apply_expert(basf2_mva.vector(
"test.root"), general_options.m_treename)
244 print(
"Custom Method", auc)
def calculate_roc_auc(p, t)
def __init__(self, *my_fancy_parameters)