Belle II Software  release-05-01-25
howto_wrap_your_existing_training_into_a_weightfile.py
1 #!/usr/bin/env python3
2 # -*- coding: utf-8 -*-
3 
4 # Thomas Keck 2017
5 
6 # In some cases you create a training outside of basf2
7 # but you still want to apply the training in basf2
8 # In this case you can create a fake training which produces a weightfile
9 # which you can upload to the database, by overriding end_fit
10 # In addition you have to override load and apply, so that the mva package
11 # knows howto apply your custom training
12 
13 
14 import numpy as np
15 import basf2_mva
16 import basf2_mva_util
17 
18 
19 def get_model(number_of_features, number_of_spectators, number_of_events, training_fraction, parameters):
20  """ Must be implemented otherwise custom framework will raise an error """
21  return None
22 
23 
24 def begin_fit(state, Xtest, Stest, ytest, wtest):
25  """ Must be implemented otherwise custom framework will raise an error """
26  return state
27 
28 
29 def partial_fit(state, X, S, y, w, epoch):
30  """ Must be implemented otherwise custom framework will raise an error """
31  return True
32 
33 
34 def feature_importance(state):
35  """ Must be implemented otherwise custom framework will raise an error """
36  return []
37 
38 
39 def end_fit(state):
40  """
41  In end_fit you want to load your training and pass it as a picklable object to basf.
42  You do not perform a real training, so your training data can be empty (or a small file).
43  Since you do not fit anything here the training data is just ignored.
44  However, you want to ensure that the features you are using (and which you will need later)
45  are correctly set in the GeneralOptions.
46  """
47  import pickle
48  with open('mytraining.pickle', 'rb') as f:
49  my_pickled_classifier = pickle.load(f)
50  return my_pickled_classifier
51 
52 
53 def load(my_pickled_classifier):
54  """
55  The load function receives your pickled_classifier.
56  So maybe you want to initialize your actual classifier here
57  using this pickled classifier information.
58  In this example I just pass it along
59  """
60  state = my_pickled_classifier
61  return state
62 
63 
64 def apply(state, X):
65  """
66  In apply you will get the features you stated in the GeneralOptions.
67  In principle you can access the basf2 DataStore yourself in this function
68  and extract additional feature information for your classifier.
69  However, your method will only be work inside basf2 and not with basf2_mva_expert in this case
70  (and also you cannot use basf2_mva_evaluate.py).
71  """
72  p = state.predict_proba(X)
73  return np.require(p, dtype=np.float32, requirements=['A', 'W', 'C', 'O'])
74 
75 
76 if __name__ == "__main__":
77  """
78  Here we create the fake training
79  We written all the necessary hooks, now we can call the mva framework as usual.
80  Other Python-based frameworks like sklearn, tensorflow, xgboost, ... have predefined hooks,
81  but you can overwrite all of them.
82  """
83  from basf2 import conditions
84  # NOTE: do not use testing payloads in production! Any results obtained like this WILL NOT BE PUBLISHED
85  conditions.testing_payloads = [
86  'localdb/database.txt'
87  ]
88 
89  # First I create an external sklearn weightfile
90  from sklearn.ensemble import GradientBoostingClassifier
91  clf = GradientBoostingClassifier()
92  X = np.random.uniform(size=(1000, 3))
93  y = (np.random.uniform(size=1000) > 0.5).astype(int)
94  clf.fit(X, y)
95  import pickle
96  pickle.dump(clf, open('mytraining.pickle', 'wb'))
97 
98  variables = ['M', 'p', 'pt']
99 
100  # Now we need also a fake input file, we just create one
101  # the content doesn't matter, as long as the branches exist.
102  import ROOT
103  root_file = ROOT.TFile("fake_train.root", "recreate")
104  root_file.cd()
105  root_tree = ROOT.TTree('tree', 'title')
106  value = np.zeros(1, dtype=float)
107  for var in variables:
108  root_tree.Branch(var, value, var + '/F')
109  root_tree.Branch("isSignal", value, 'isSignal/F')
110  for i in range(10):
111  root_tree.Fill()
112  root_file.Write("tree")
113  root_file.Close()
114 
115  # Now we write the configuration for our fake training
116  general_options = basf2_mva.GeneralOptions()
117  general_options.m_datafiles = basf2_mva.vector("fake_train.root")
118  general_options.m_treename = "tree"
119  general_options.m_identifier = "MyModel"
120  general_options.m_variables = basf2_mva.vector(*variables)
121  general_options.m_target_variable = "isSignal"
122 
123  python_options = basf2_mva.PythonOptions()
124  python_options.m_framework = "custom"
125  python_options.m_steering_file = "mva/examples/python/howto_wrap_your_existing_training_into_a_weightfile.py"
126 
127  basf2_mva.teacher(general_options, python_options)
128 
129  # Apply the training as usualy
130  method = basf2_mva_util.Method(general_options.m_identifier)
131  p, t = method.apply_expert(basf2_mva.vector("test.root"), general_options.m_treename)
133  print("Custom Method", auc)
basf2_mva_util.calculate_roc_auc
def calculate_roc_auc(p, t)
Definition: basf2_mva_util.py:39
basf2_mva_util.Method
Definition: basf2_mva_util.py:81