24 def get_model(number_of_features, number_of_spectators, number_of_events, training_fraction, parameters):
25 """ Must be implemented otherwise custom framework will raise an error """
29 def begin_fit(state, Xtest, Stest, ytest, wtest, nBatches):
30 """ Must be implemented otherwise custom framework will raise an error """
34 def partial_fit(state, X, S, y, w, epoch, batch):
35 """ Must be implemented otherwise custom framework will raise an error """
39 def feature_importance(state):
40 """ Must be implemented otherwise custom framework will raise an error """
46 In end_fit you want to load your training and pass it as a picklable object to basf.
47 You do not perform a real training, so your training data can be empty (or a small file).
48 Since you do not fit anything here the training data is just ignored.
49 However, you want to ensure that the features you are using (and which you will need later)
50 are correctly set in the GeneralOptions.
53 with open(
'mytraining.pickle',
'rb')
as f:
54 my_pickled_classifier = pickle.load(f)
55 return my_pickled_classifier
58 def load(my_pickled_classifier):
60 The load function receives your pickled_classifier.
61 So maybe you want to initialize your actual classifier here
62 using this pickled classifier information.
63 In this example I just pass it along
65 state = my_pickled_classifier
71 In apply you will get the features you stated in the GeneralOptions.
72 In principle you can access the basf2 DataStore yourself in this function
73 and extract additional feature information for your classifier.
74 However, your method will only be work inside basf2 and not with basf2_mva_expert in this case
75 (and also you cannot use basf2_mva_evaluate.py).
77 p = state.predict_proba(X)
78 return np.require(p, dtype=np.float32, requirements=[
'A',
'W',
'C',
'O'])
81 if __name__ ==
"__main__":
83 Here we create the fake training
84 We written all the necessary hooks, now we can call the mva framework as usual.
85 Other Python-based frameworks like sklearn, tensorflow, xgboost, ... have predefined hooks,
86 but you can overwrite all of them.
89 from basf2
import conditions, find_file
91 conditions.testing_payloads = [
92 'localdb/database.txt'
96 from sklearn.ensemble
import GradientBoostingClassifier
97 clf = GradientBoostingClassifier()
98 X = np.random.uniform(size=(1000, 3))
99 y = (np.random.uniform(size=1000) > 0.5).astype(int)
102 pickle.dump(clf, open(
'mytraining.pickle',
'wb'))
104 variables = [
'M',
'p',
'pt']
108 root_file = ROOT.TFile(
"fake_train.root",
"recreate")
110 root_tree = ROOT.TTree(
'tree',
'title')
111 value = np.zeros(1, dtype=float)
112 for var
in variables:
113 root_tree.Branch(var, value, var +
'/F')
114 root_tree.Branch(
"isSignal", value,
'isSignal/F')
117 root_file.Write(
"tree")
121 general_options = basf2_mva.GeneralOptions()
122 general_options.m_datafiles = basf2_mva.vector(
"fake_train.root")
123 general_options.m_treename =
"tree"
124 general_options.m_identifier =
"MyModel"
125 general_options.m_variables = basf2_mva.vector(*variables)
126 general_options.m_target_variable =
"isSignal"
128 python_options = basf2_mva.PythonOptions()
129 python_options.m_framework =
"custom"
130 python_options.m_steering_file =
"mva/examples/python/how_to_wrap_your_existing_training_into_a_weightfile.py"
132 basf2_mva.teacher(general_options, python_options)
135 test_file = find_file(
"mva/test_D0toKpipi.root",
"examples")
136 testing_data = basf2_mva.vector(test_file)
139 p, t = method.apply_expert(testing_data, general_options.m_treename)
141 print(
"Custom Method", auc)
def calculate_auc_efficiency_vs_background_retention(p, t, w=None)