Belle II Software  release-08-01-10
bayesian_optimization_multicore.py
1 #!/usr/bin/env python3
2 
3 
10 
11 # A simple example to use bayesian optimization for the hyperparameters of a FastBDT.
12 # The package used in this example is https://github.com/scikit-optimize
13 # and can be installed with
14 # pip3 install scikit-optimize
15 
16 from basf2 import find_file
17 import basf2_mva
18 import basf2_mva_util
19 import skopt
20 from skopt.space import Integer
21 from sklearn.externals.joblib import Parallel, delayed
22 import matplotlib.pyplot as plt
23 
24 
25 def f(x, general_options, process_number):
26  """Returns the figure of merit for the optimization.
27  The functions trains the classifier with the given hyperparameters on the training sample and
28  calculates the AUC on the independent test sample.
29  """
30  g_options = general_options
31  g_options.m_identifier = f"test{process_number}.xml"
32  options = basf2_mva.FastBDTOptions()
33  options.m_nTrees = int(x[0])
34  options.m_nLevels = int(x[1])
35  basf2_mva.teacher(g_options, options)
36  m = basf2_mva_util.Method(g_options.m_identifier)
37  p, t = m.apply_expert(test_data, general_options.m_treename)
39 
40 
41 if __name__ == "__main__":
42  train_file = find_file("mva/train_D0toKpipi.root", "examples")
43  test_file = find_file("mva/test_D0toKpipi.root", "examples")
44 
45  training_data = basf2_mva.vector(train_file)
46  test_data = basf2_mva.vector(test_file)
47 
48  general_options = basf2_mva.GeneralOptions()
49  general_options.m_datafiles = training_data
50  general_options.m_treename = "tree"
51  general_options.m_variables = basf2_mva.vector('p', 'pz', 'daughter(0, kaonID)', 'chiProb', 'M')
52  general_options.m_target_variable = "isSignal"
53 
54  # init optimizer
55  optimizer = skopt.Optimizer(dimensions=[Integer(10, 1000), Integer(2, 6)], n_initial_points=3)
56 
57  # calculate initial guess
58  initial_guess = [10, 2]
59  initial_res = f(initial_guess, general_options, 0)
60  optimizer.tell(initial_guess, initial_res)
61 
62  # optimize
63  for i in range(10):
64  x = optimizer.ask(n_points=2) # x is a list of n_points points
65  y = Parallel(n_jobs=-1)(delayed(f)(v, general_options, index) for index, v in enumerate(x)) # evaluate points in parallel
66  res = optimizer.tell(x, y)
67 
68  # Give some results
69  print(res)
70  skopt.plots.plot_convergence(res)
71  plt.savefig('convergence.png')
72  skopt.plots.plot_evaluations(res)
73  plt.savefig('evaluations.png')
74  skopt.plots.plot_objective(res)
75  plt.savefig('objective.png')
76 
77  # Store result of optimization
78  skopt.dump(res, 'opt-result.pkl')
def calculate_auc_efficiency_vs_background_retention(p, t, w=None)