Belle II Software  release-05-02-19
bayesian_optimization_multicore.py
1 #!/usr/bin/env python3
2 # -*- coding: utf-8 -*-
3 
4 # Markus Prim 2017
5 # Thomas Keck
6 # Dennis Weyland
7 
8 # A simple example to use bayesian optimiation for the hyperparameters of a FastBDT.
9 # The package used in this example is https://github.com/scikit-optimize
10 # and can be installed with
11 # pip3 install scikit-optimize
12 
13 # Training and test sample can be downloaded:
14 # http://ekpwww.ekp.kit.edu/~tkeck/train.root
15 # http://ekpwww.ekp.kit.edu/~tkeck/test.root
16 
17 
18 import basf2_mva
19 import basf2_mva_util
20 import skopt
21 from skopt.space import Real, Integer
22 from sklearn.externals.joblib import Parallel, delayed
23 import matplotlib.pyplot as plt
24 
25 
26 def f(x, general_options, process_number):
27  """Returns the figure of merit for the optimization.
28  The functions trains the classifier with the given hyperparamters on the training sample and
29  calculates the AUC on the independet test sample.
30  """
31  g_options = general_options
32  g_options.m_identifier = "test{}.xml".format(process_number)
33  options = basf2_mva.FastBDTOptions()
34  options.m_nTrees = int(x[0])
35  options.m_nLevels = int(x[1])
36  basf2_mva.teacher(g_options, options)
37  m = basf2_mva_util.Method(g_options.m_identifier)
38  p, t = m.apply_expert(test_data, general_options.m_treename)
40 
41 
42 if __name__ == "__main__":
43  training_data = basf2_mva.vector("train.root")
44  test_data = basf2_mva.vector("test.root")
45 
46  general_options = basf2_mva.GeneralOptions()
47  general_options.m_datafiles = training_data
48  general_options.m_treename = "tree"
49  general_options.m_variables = basf2_mva.vector('p', 'pz', 'daughter(0, kaonID)', 'chiProb', 'M')
50  general_options.m_target_variable = "isSignal"
51 
52  # init optimizer
53  optimizer = skopt.Optimizer(dimensions=[Integer(10, 1000), Integer(2, 6)], n_initial_points=3)
54 
55  # calculate initial guess
56  initial_guess = [10, 2]
57  initial_res = f(initial_guess, general_options, 0)
58  optimizer.tell(initial_guess, initial_res)
59 
60  # optimize
61  for i in range(10):
62  x = optimizer.ask(n_points=2) # x is a list of n_points points
63  y = Parallel(n_jobs=-1)(delayed(f)(v, general_options, index) for index, v in enumerate(x)) # evaluate points in parallel
64  res = optimizer.tell(x, y)
65 
66  # Give some results
67  print(res)
68  skopt.plots.plot_convergence(res)
69  plt.savefig('convergence.png')
70  skopt.plots.plot_evaluations(res)
71  plt.savefig('evaluations.png')
72  skopt.plots.plot_objective(res)
73  plt.savefig('objective.png')
74 
75  # Store result of optimization
76  skopt.dump(res, 'opt-result.pkl')
basf2_mva_util.calculate_roc_auc
def calculate_roc_auc(p, t)
Definition: basf2_mva_util.py:39
basf2_mva_util.Method
Definition: basf2_mva_util.py:81