Belle II Software  release-08-01-10
how_to_wrap_your_existing_training_into_a_weightfile.py
1 #!/usr/bin/env python3
2 
3 
10 
11 # In some cases you create a training outside of basf2
12 # but you still want to apply the training in basf2
13 # In this case you can create a fake training which produces a weightfile
14 # which you can upload to the database, by overriding end_fit
15 # In addition you have to override load and apply, so that the mva package
16 # knows how to apply your custom training
17 
18 
19 import numpy as np
20 import basf2_mva
21 import basf2_mva_util
22 
23 
24 def get_model(number_of_features, number_of_spectators, number_of_events, training_fraction, parameters):
25  """ Must be implemented otherwise custom framework will raise an error """
26  return None
27 
28 
29 def begin_fit(state, Xtest, Stest, ytest, wtest, nBatches):
30  """ Must be implemented otherwise custom framework will raise an error """
31  return state
32 
33 
34 def partial_fit(state, X, S, y, w, epoch, batch):
35  """ Must be implemented otherwise custom framework will raise an error """
36  return True
37 
38 
39 def feature_importance(state):
40  """ Must be implemented otherwise custom framework will raise an error """
41  return []
42 
43 
44 def end_fit(state):
45  """
46  In end_fit you want to load your training and pass it as a picklable object to basf.
47  You do not perform a real training, so your training data can be empty (or a small file).
48  Since you do not fit anything here the training data is just ignored.
49  However, you want to ensure that the features you are using (and which you will need later)
50  are correctly set in the GeneralOptions.
51  """
52  import pickle
53  with open('mytraining.pickle', 'rb') as f:
54  my_pickled_classifier = pickle.load(f)
55  return my_pickled_classifier
56 
57 
58 def load(my_pickled_classifier):
59  """
60  The load function receives your pickled_classifier.
61  So maybe you want to initialize your actual classifier here
62  using this pickled classifier information.
63  In this example I just pass it along
64  """
65  state = my_pickled_classifier
66  return state
67 
68 
69 def apply(state, X):
70  """
71  In apply you will get the features you stated in the GeneralOptions.
72  In principle you can access the basf2 DataStore yourself in this function
73  and extract additional feature information for your classifier.
74  However, your method will only be work inside basf2 and not with basf2_mva_expert in this case
75  (and also you cannot use basf2_mva_evaluate.py).
76  """
77  p = state.predict_proba(X)
78  return np.require(p, dtype=np.float32, requirements=['A', 'W', 'C', 'O'])
79 
80 
81 if __name__ == "__main__":
82  """
83  Here we create the fake training
84  We written all the necessary hooks, now we can call the mva framework as usual.
85  Other Python-based frameworks like sklearn, tensorflow, xgboost, ... have predefined hooks,
86  but you can overwrite all of them.
87  """
88  import ROOT # noqa
89  from basf2 import conditions, find_file
90  # NOTE: do not use testing payloads in production! Any results obtained like this WILL NOT BE PUBLISHED
91  conditions.testing_payloads = [
92  'localdb/database.txt'
93  ]
94 
95  # First I create an external sklearn weightfile
96  from sklearn.ensemble import GradientBoostingClassifier
97  clf = GradientBoostingClassifier()
98  X = np.random.uniform(size=(1000, 3))
99  y = (np.random.uniform(size=1000) > 0.5).astype(int)
100  clf.fit(X, y)
101  import pickle
102  pickle.dump(clf, open('mytraining.pickle', 'wb'))
103 
104  variables = ['M', 'p', 'pt']
105 
106  # Now we need also a fake input file, we just create one
107  # the content doesn't matter, as long as the branches exist.
108  root_file = ROOT.TFile("fake_train.root", "recreate")
109  root_file.cd()
110  root_tree = ROOT.TTree('tree', 'title')
111  value = np.zeros(1, dtype=float)
112  for var in variables:
113  root_tree.Branch(var, value, var + '/F')
114  root_tree.Branch("isSignal", value, 'isSignal/F')
115  for i in range(10):
116  root_tree.Fill()
117  root_file.Write("tree")
118  root_file.Close()
119 
120  # Now we write the configuration for our fake training
121  general_options = basf2_mva.GeneralOptions()
122  general_options.m_datafiles = basf2_mva.vector("fake_train.root")
123  general_options.m_treename = "tree"
124  general_options.m_identifier = "MyModel"
125  general_options.m_variables = basf2_mva.vector(*variables)
126  general_options.m_target_variable = "isSignal"
127 
128  python_options = basf2_mva.PythonOptions()
129  python_options.m_framework = "custom"
130  python_options.m_steering_file = "mva/examples/python/how_to_wrap_your_existing_training_into_a_weightfile.py"
131 
132  basf2_mva.teacher(general_options, python_options)
133 
134  # Apply the training as usual
135  test_file = find_file("mva/test_D0toKpipi.root", "examples")
136  testing_data = basf2_mva.vector(test_file)
137 
138  method = basf2_mva_util.Method(general_options.m_identifier)
139  p, t = method.apply_expert(testing_data, general_options.m_treename)
141  print("Custom Method", auc)
def calculate_auc_efficiency_vs_background_retention(p, t, w=None)