Belle II Software development
bayesian_optimization_multicore.py
1#!/usr/bin/env python3
2
3
10
11# A simple example to use bayesian optimization for the hyperparameters of a FastBDT.
12# The package used in this example is https://github.com/scikit-optimize
13# and can be installed with
14# pip3 install scikit-optimize
15
16from basf2 import find_file
17import basf2_mva
18import basf2_mva_util
19import skopt
20from skopt.space import Integer
21from sklearn.externals.joblib import Parallel, delayed
22import matplotlib.pyplot as plt
23
24
25def f(x, general_options, process_number):
26 """Returns the figure of merit for the optimization.
27 The functions trains the classifier with the given hyperparameters on the training sample and
28 calculates the AUC on the independent test sample.
29 """
30 g_options = general_options
31 g_options.m_identifier = f"test{process_number}.xml"
32 options = basf2_mva.FastBDTOptions()
33 options.m_nTrees = int(x[0])
34 options.m_nLevels = int(x[1])
35 basf2_mva.teacher(g_options, options)
36 m = basf2_mva_util.Method(g_options.m_identifier)
37 p, t = m.apply_expert(test_data, general_options.m_treename)
39
40
41if __name__ == "__main__":
42 train_file = find_file("mva/train_D0toKpipi.root", "examples")
43 test_file = find_file("mva/test_D0toKpipi.root", "examples")
44
45 training_data = basf2_mva.vector(train_file)
46 test_data = basf2_mva.vector(test_file)
47
48 general_options = basf2_mva.GeneralOptions()
49 general_options.m_datafiles = training_data
50 general_options.m_treename = "tree"
51 general_options.m_variables = basf2_mva.vector('p', 'pz', 'daughter(0, kaonID)', 'chiProb', 'M')
52 general_options.m_target_variable = "isSignal"
53
54 # init optimizer
55 optimizer = skopt.Optimizer(dimensions=[Integer(10, 1000), Integer(2, 6)], n_initial_points=3)
56
57 # calculate initial guess
58 initial_guess = [10, 2]
59 initial_res = f(initial_guess, general_options, 0)
60 optimizer.tell(initial_guess, initial_res)
61
62 # optimize
63 for i in range(10):
64 x = optimizer.ask(n_points=2) # x is a list of n_points points
65 y = Parallel(n_jobs=-1)(delayed(f)(v, general_options, index) for index, v in enumerate(x)) # evaluate points in parallel
66 res = optimizer.tell(x, y)
67
68 # Give some results
69 print(res)
70 skopt.plots.plot_convergence(res)
71 plt.savefig('convergence.png')
72 skopt.plots.plot_evaluations(res)
73 plt.savefig('evaluations.png')
74 skopt.plots.plot_objective(res)
75 plt.savefig('objective.png')
76
77 # Store result of optimization
78 skopt.dump(res, 'opt-result.pkl')
def calculate_auc_efficiency_vs_background_retention(p, t, w=None)