Belle II Software development
sklearn_mlpclassifier.py
1#!/usr/bin/env python3
2
3
10
11import basf2_mva
12import basf2_mva_util
13import collections
14import numpy as np
15import time
16
17
18def get_model(number_of_features, number_of_spectators, number_of_events, training_fraction, parameters):
19 """
20 Create SKLearn classifier and store it in a State object
21 """
22 from sklearn.neural_network import MLPClassifier
24 if isinstance(parameters, collections.abc.Mapping):
25 clf = MLPClassifier(**parameters)
26 elif isinstance(parameters, collections.abc.Sequence):
27 clf = MLPClassifier(*parameters)
28 else:
29 clf = MLPClassifier()
30 return State(clf)
31
32
33def end_fit(state):
34 """
35 Merge received data together and fit estimator.
36 Neural network do not support weights at the moment (sklearn 0.18.1).
37 So these are ignored here!
38 """
39 state.estimator = state.estimator.fit(np.vstack(state.X), np.hstack(state.y))
40 return state.estimator
41
42
43if __name__ == "__main__":
44 from basf2 import conditions, find_file
45 # NOTE: do not use testing payloads in production! Any results obtained like this WILL NOT BE PUBLISHED
46 conditions.testing_payloads = [
47 'localdb/database.txt'
48 ]
49
50 variables = ['M', 'p', 'pt', 'pz',
51 'daughter(0, p)', 'daughter(0, pz)', 'daughter(0, pt)',
52 'daughter(1, p)', 'daughter(1, pz)', 'daughter(1, pt)',
53 'daughter(2, p)', 'daughter(2, pz)', 'daughter(2, pt)',
54 'chiProb', 'dr', 'dz',
55 'daughter(0, dr)', 'daughter(1, dr)',
56 'daughter(0, dz)', 'daughter(1, dz)',
57 'daughter(0, chiProb)', 'daughter(1, chiProb)', 'daughter(2, chiProb)',
58 'daughter(0, kaonID)', 'daughter(0, pionID)',
59 'daughterInvM(0, 1)', 'daughterInvM(0, 2)', 'daughterInvM(1, 2)']
60
61 train_file = find_file("mva/train_D0toKpipi.root", "examples")
62 test_file = find_file("mva/test_D0toKpipi.root", "examples")
63
64 training_data = basf2_mva.vector(train_file)
65 testing_data = basf2_mva.vector(test_file)
66
67 # Train a MVA method and directly upload it to the database
68 general_options = basf2_mva.GeneralOptions()
69 general_options.m_datafiles = training_data
70 general_options.m_treename = "tree"
71 general_options.m_identifier = "SKLearn-NN"
72 general_options.m_variables = basf2_mva.vector(*variables)
73 general_options.m_target_variable = "isSignal"
74
75 sklearn_nn_options = basf2_mva.PythonOptions()
76 sklearn_nn_options.m_framework = "sklearn"
77 sklearn_nn_options.m_steering_file = 'mva/examples/python/sklearn_mlpclassifier.py'
78 param = '{"hidden_layer_sizes": [29], "activation": "logistic", "max_iter": 100, "solver": "adam", "batch_size": 100}'
79 sklearn_nn_options.m_config = param
80 sklearn_nn_options.m_normalize = True
81
82 training_start = time.time()
83 basf2_mva.teacher(general_options, sklearn_nn_options)
84 training_stop = time.time()
85 training_time = training_stop - training_start
86 method = basf2_mva_util.Method(general_options.m_identifier)
87 inference_start = time.time()
88 p, t = method.apply_expert(testing_data, general_options.m_treename)
89 inference_stop = time.time()
90 inference_time = inference_stop - inference_start
92 print("SKLearn", training_time, inference_time, auc)
def calculate_auc_efficiency_vs_background_retention(p, t, w=None)