Belle II Software  release-06-02-00
bayesian_optimization_multicore.py
1 #!/usr/bin/env python3
2 
3 
10 
11 # A simple example to use bayesian optimization for the hyperparameters of a FastBDT.
12 # The package used in this example is https://github.com/scikit-optimize
13 # and can be installed with
14 # pip3 install scikit-optimize
15 
16 # Training and test sample can be downloaded:
17 # http://ekpwww.ekp.kit.edu/~tkeck/train.root
18 # http://ekpwww.ekp.kit.edu/~tkeck/test.root
19 
20 
21 import basf2_mva
22 import basf2_mva_util
23 import skopt
24 from skopt.space import Integer
25 from sklearn.externals.joblib import Parallel, delayed
26 import matplotlib.pyplot as plt
27 
28 
29 def f(x, general_options, process_number):
30  """Returns the figure of merit for the optimization.
31  The functions trains the classifier with the given hyperparameters on the training sample and
32  calculates the AUC on the independent test sample.
33  """
34  g_options = general_options
35  g_options.m_identifier = "test{}.xml".format(process_number)
36  options = basf2_mva.FastBDTOptions()
37  options.m_nTrees = int(x[0])
38  options.m_nLevels = int(x[1])
39  basf2_mva.teacher(g_options, options)
40  m = basf2_mva_util.Method(g_options.m_identifier)
41  p, t = m.apply_expert(test_data, general_options.m_treename)
43 
44 
45 if __name__ == "__main__":
46  training_data = basf2_mva.vector("train.root")
47  test_data = basf2_mva.vector("test.root")
48 
49  general_options = basf2_mva.GeneralOptions()
50  general_options.m_datafiles = training_data
51  general_options.m_treename = "tree"
52  general_options.m_variables = basf2_mva.vector('p', 'pz', 'daughter(0, kaonID)', 'chiProb', 'M')
53  general_options.m_target_variable = "isSignal"
54 
55  # init optimizer
56  optimizer = skopt.Optimizer(dimensions=[Integer(10, 1000), Integer(2, 6)], n_initial_points=3)
57 
58  # calculate initial guess
59  initial_guess = [10, 2]
60  initial_res = f(initial_guess, general_options, 0)
61  optimizer.tell(initial_guess, initial_res)
62 
63  # optimize
64  for i in range(10):
65  x = optimizer.ask(n_points=2) # x is a list of n_points points
66  y = Parallel(n_jobs=-1)(delayed(f)(v, general_options, index) for index, v in enumerate(x)) # evaluate points in parallel
67  res = optimizer.tell(x, y)
68 
69  # Give some results
70  print(res)
71  skopt.plots.plot_convergence(res)
72  plt.savefig('convergence.png')
73  skopt.plots.plot_evaluations(res)
74  plt.savefig('evaluations.png')
75  skopt.plots.plot_objective(res)
76  plt.savefig('objective.png')
77 
78  # Store result of optimization
79  skopt.dump(res, 'opt-result.pkl')
def calculate_roc_auc(p, t)