22     """ Let's assume we have written our own classifier (or installed something from github) """ 
   25         """ Just print the passed parameters """ 
   26         print(my_fancy_parameters)
 
   29         """ Our method is so good, it doesn't even have to look at the data! """ 
   33         """ Always return 1, this will boost our signal efficiency to the max """ 
   34         return np.ones(len(X))
 
   40 def get_model(number_of_features, number_of_spectators, number_of_events, training_fraction, parameters):
 
   42     This is the first function which is called. 
   43     It must return a python object representing your method in memory, this object will be passed to all other hook functions. 
   44     In this case we return our FancyClassifier object. 
   45     @param number_of_features the total number of features 
   46     @param number_of_spectators the total number of spectators 
   47     @param number_of_events the total number of events 
   48     @param training_fraction the signal fraction in the training (if you do a classification, otherwise the number is meaningless) 
   49     @param parameters a python object which is created from a json string the user can pass via the m_config argument 
   54 def begin_fit(state, Xtest, Stest, ytest, wtest, nBatches):
 
   56     Is called once per training after get_model. 
   57     You can initialize your training here. 
   58     In addition a validation sample is passed, which you can use during the training (if the user set m_training_fraction != 1.0) 
   59     @param state the return value of get_model 
   60     @param Xtest numpy array containing the features of the validation sample 
   61     @param Stest numpy array containing the spectators of the validation sample 
   62     @param ytest numpy array containing the target values of the validation sample 
   63     @param wtest numpy array containing the weights of the validation sample 
   64     @param nBatches int containing the number of batches that will be passed to partial_fit in each epoch. 
   66     Since our method does not support out-of-core fitting, the usual thing is to add 
   67     some arrays which collect the data passed to partial_fit. 
   68     Our method doesn't use the validation sample either, so we just don't use it. 
   75 def partial_fit(state, X, S, y, w, epoch, batch):
 
   77     Can be called multiple times per training depending on the user configuration: 
   78     If m_nIterations == 1 and m_mini_batch_size == 0 (these are the default values) 
   79         partial_fit is called once with the complete training data 
   80     If m_nIterations == 1 and m_mini_batch_size != 0 
   81         partial_fit is called multiple times with only a subset of the training data of the desired size, 
   82         until the complete dataset was streamed via partial_fit 
   83     If m_nIterations > 1 and m_mini_batch_size == 0 
   84         partial_fit is called multiple times, each time with the complete training data 
   85     If m_nIterations > 1 and m_mini_batch_size != 0 
   86         partial_fit is called multiple times with only a subset of the training data of the desired size, 
   87         until the complete dataset was streamed m_nIterations times 
   89         partial_fit is called multiple times until partial_fit returns False 
   90     As soon as partial_fit returns False the streaming of data is stopped. 
   91     @param state the return value of begin_fit 
   92     @param X numpy array containing the features of the training sample 
   93     @param S numpy array containing the spectators of the training sample 
   94     @param y numpy array containing the target values of the training sample 
   95     @param w numpy array containing the weights of the training sample 
   96     @param epoch the index of the current iteration through the total data set. 
   97     @param batch the index of the current mini batch passed to partial_fit 
   99     Since our method doesn't use the streaming capability, 
  100     we just collect the data in our state object. 
  109     Is called once per training. 
  110     Here you can finish the training. 
  111     You must return a pickable object, which is saved in the weightfile, 
  112     later you must be able to create your estimator from this pickled object in the load function hook (see below). 
  113     @param state the return value of begin_fit 
  115     We can fit our method here. And since our state object is pickable, 
  116     we can just return it. You might want to use better mechanism in a real world example, 
  117     you can look at the implementations of the other methods (like tensorflow) how to save models 
  118     to files, read them and return them as a pickable object. 
  120     state.fit(state.X, state.y)
 
  121     pickable_object_for_weightfile = state
 
  122     return pickable_object_for_weightfile
 
  125 def feature_importance(state):
 
  127     Called after end_fit. 
  128     Should return a list containing the feature importances. 
  129     The feature importances are saved in the weightfile and can be read out by the user. 
  130     If your method doesn't support feature importance estimation return an empty list. 
  135 def load(pickable_object_from_weightfile):
 
  138     @param obj the return value of end_fit, which was loaded from the weightfile and unpickled 
  139     This should return again a state object, which is passed to apply later. 
  141     In our case we directly pickled the state, so there's nothing to do here. 
  142     In a real world scenario you might have to create files on disk in a temporary directory 
  143     and recreate your estimator from them. You can look at other methods (like tensorflow) how this is done. 
  145     state = pickable_object_from_weightfile
 
  151     Is called once per inference. 
  152     Should return a numpy array with the predicted values. 
  153     You have to make sure that the numpy array has the correct format (32bit float, C-style ordering)! 
  154     The last line in this function takes care of this, I strongly recommend to keep this line! 
  155     @param state the return value of load 
  156     @param X numpy array containing the features for which a prediction should be returned 
  159     return np.require(p, dtype=np.float32, requirements=[
'A', 
'W', 
'C', 
'O'])
 
  162 if __name__ == 
"__main__":
 
  164     We written all the necessary hooks, now we can call the mva framework as usual. 
  165     Other Python-based frameworks like sklearn, tensorflow, xgboost, ... have predefined hooks, 
  166     but you can overwrite all of them. 
  168     from basf2 
import conditions
 
  170     conditions.testing_payloads = [
 
  171         'localdb/database.txt' 
  175     variables = [
'M', 
'p', 
'pt', 
'pz',
 
  176                  'daughter(0, p)', 
'daughter(0, pz)', 
'daughter(0, pt)',
 
  177                  'daughter(1, p)', 
'daughter(1, pz)', 
'daughter(1, pt)',
 
  178                  'daughter(2, p)', 
'daughter(2, pz)', 
'daughter(2, pt)',
 
  179                  'chiProb', 
'dr', 
'dz',
 
  180                  'daughter(0, dr)', 
'daughter(1, dr)',
 
  181                  'daughter(0, dz)', 
'daughter(1, dz)',
 
  182                  'daughter(0, chiProb)', 
'daughter(1, chiProb)', 
'daughter(2, chiProb)',
 
  183                  'daughter(0, kaonID)', 
'daughter(0, pionID)',
 
  184                  'daughterInvM(0, 1)', 
'daughterInvM(0, 2)', 
'daughterInvM(1, 2)']
 
  186     general_options = basf2_mva.GeneralOptions()
 
  187     general_options.m_datafiles = basf2_mva.vector(
"train.root")
 
  188     general_options.m_treename = 
"tree" 
  189     general_options.m_identifier = 
"MyFancyModel" 
  190     general_options.m_variables = basf2_mva.vector(*variables)
 
  191     general_options.m_target_variable = 
"isSignal" 
  196     python_options = basf2_mva.PythonOptions()
 
  200     python_options.m_framework = 
"custom" 
  206     python_options.m_steering_file = 
"mva/examples/python/how_to_use_arbitrary_methods.py" 
  212     config_string = json.dumps({
'A': 
'Python', 
'Dictionary': 
'With Parameters', 
'And': [
'A List']})
 
  213     print(
"The json config string", config_string)
 
  214     python_options.m_config = config_string
 
  218     python_options.m_training_fraction = 0.7
 
  223     python_options.m_normalize = 
False 
  228     python_options.m_nIterations = 1
 
  229     python_options.m_mini_batch_size = 0
 
  233     basf2_mva.teacher(general_options, python_options)
 
  241     p, t = method.apply_expert(basf2_mva.vector(
"test.root"), general_options.m_treename)
 
  246     print(
"Custom Method", auc)
 
def calculate_auc_efficiency_vs_background_retention(p, t, w=None)
def __init__(self, *my_fancy_parameters)