Belle II Software  release-05-01-25
sklearn_mlpclassifier.py
1 #!/usr/bin/env python3
2 # -*- coding: utf-8 -*-
3 
4 # Thomas Keck 2016
5 
6 import basf2_mva
7 import basf2_mva_util
8 import time
9 
10 
11 def get_model(number_of_features, number_of_spectators, number_of_events, training_fraction, parameters):
12  """
13  Create SKLearn classifier and store it in a State object
14  """
15  from sklearn.neural_network import MLPClassifier
16  from basf2_mva_python_interface.sklearn import State
17  if isinstance(parameters, collections.Mapping):
18  clf = MLPClassifier(**parameters)
19  elif isinstance(parameters, collections.Sequence):
20  clf = MLPClassifier(*parameters)
21  else:
22  clf = MLPClassifier()
23  return State(clf)
24 
25 
26 def end_fit(state):
27  """
28  Merge received data together and fit estimator.
29  Neural network do not support weights at the moment (slearn 0.18.1).
30  So these are ignored here!
31  """
32  state.estimator = state.estimator.fit(np.vstack(state.X), np.hstack(state.y))
33  return state.estimator
34 
35 
36 if __name__ == "__main__":
37  from basf2 import conditions
38  # NOTE: do not use testing payloads in production! Any results obtained like this WILL NOT BE PUBLISHED
39  conditions.testing_payloads = [
40  'localdb/database.txt'
41  ]
42 
43  variables = ['M', 'p', 'pt', 'pz',
44  'daughter(0, p)', 'daughter(0, pz)', 'daughter(0, pt)',
45  'daughter(1, p)', 'daughter(1, pz)', 'daughter(1, pt)',
46  'daughter(2, p)', 'daughter(2, pz)', 'daughter(2, pt)',
47  'chiProb', 'dr', 'dz',
48  'daughter(0, dr)', 'daughter(1, dr)',
49  'daughter(0, dz)', 'daughter(1, dz)',
50  'daughter(0, chiProb)', 'daughter(1, chiProb)', 'daughter(2, chiProb)',
51  'daughter(0, kaonID)', 'daughter(0, pionID)',
52  'daughterInvariantMass(0, 1)', 'daughterInvariantMass(0, 2)', 'daughterInvariantMass(1, 2)']
53 
54  # Train a MVA method and directly upload it to the database
55  general_options = basf2_mva.GeneralOptions()
56  general_options.m_datafiles = basf2_mva.vector("train.root")
57  general_options.m_treename = "tree"
58  general_options.m_identifier = "SKLearn-NN"
59  general_options.m_variables = basf2_mva.vector(*variables)
60  general_options.m_target_variable = "isSignal"
61 
62  sklearn_nn_options = basf2_mva.PythonOptions()
63  sklearn_nn_options.m_framework = "sklearn"
64  sklearn_nn_options.m_steering_file = 'mva/examples/python/sklearn_mlpclassifier.py'
65  param = '{"hidden_layer_sizes": [29], "activation": "logistic", "max_iter": 100, "solver": "adam", "batch_size": 100}'
66  sklearn_nn_options.m_config = param
67  sklearn_nn_options.m_normalize = True
68 
69  test_data = ["test.root"] * 10
70  training_start = time.time()
71  basf2_mva.teacher(general_options, sklearn_nn_options)
72  training_stop = time.time()
73  training_time = training_stop - training_start
74  method = basf2_mva_util.Method(general_options.m_identifier)
75  inference_start = time.time()
76  p, t = method.apply_expert(basf2_mva.vector(*test_data), general_options.m_treename)
77  inference_stop = time.time()
78  inference_time = inference_stop - inference_start
80  print("SKLearn", training_time, inference_time, auc)
basf2_mva_util.calculate_roc_auc
def calculate_roc_auc(p, t)
Definition: basf2_mva_util.py:39
basf2_mva_python_interface.sklearn
Definition: sklearn.py:1
basf2_mva_util.Method
Definition: basf2_mva_util.py:81