Belle II Software  release-05-01-25
hep_ml_ugboost.py
1 #!/usr/bin/env python3
2 # -*- coding: utf-8 -*-
3 
4 # Thomas Keck 2017
5 
6 import basf2_mva
7 import hep_ml
8 import hep_ml.losses
9 import hep_ml.gradientboosting
10 
11 
12 def get_model(number_of_features, number_of_spectators, number_of_events, training_fraction, parameters):
13  """
14  Create hep_ml classifier and store it in a State object.
15  """
16  train_features = list(range(number_of_features))
17  uniform_features = [number_of_features + i for i in range(number_of_spectators)]
18 
19  loss = hep_ml.losses.AdaLossFunction()
20  if parameters is not None and 'uniform_rate' in parameters:
21  loss = hep_ml.losses.BinFlatnessLossFunction(uniform_features=uniform_features, uniform_label=[0, 1],
22  fl_coefficient=parameters['uniform_rate'])
23  clf = hep_ml.gradientboosting.UGradientBoostingClassifier(loss=loss, n_estimators=100, subsample=0.5,
24  max_depth=5, train_features=train_features)
25  return State(clf)
26 
27 
28 if __name__ == "__main__":
29  from basf2 import conditions
30  # NOTE: do not use testing payloads in production! Any results obtained like this WILL NOT BE PUBLISHED
31  conditions.testing_payloads = [
32  'localdb/database.txt'
33  ]
34 
35  variables = ['p', 'pt', 'pz', 'phi',
36  'daughter(0, p)', 'daughter(0, pz)', 'daughter(0, pt)', 'daughter(0, phi)',
37  'daughter(1, p)', 'daughter(1, pz)', 'daughter(1, pt)', 'daughter(1, phi)',
38  'daughter(2, p)', 'daughter(2, pz)', 'daughter(2, pt)', 'daughter(2, phi)',
39  'chiProb', 'dr', 'dz', 'dphi',
40  'daughter(0, dr)', 'daughter(1, dr)', 'daughter(0, dz)', 'daughter(1, dz)',
41  'daughter(0, dphi)', 'daughter(1, dphi)',
42  'daughter(0, chiProb)', 'daughter(1, chiProb)', 'daughter(2, chiProb)',
43  'daughter(0, kaonID)', 'daughter(0, pionID)', 'daughter(1, kaonID)', 'daughter(1, pionID)',
44  'daughterAngle(0, 1)', 'daughterAngle(0, 2)', 'daughterAngle(1, 2)',
45  'daughter(2, daughter(0, E))', 'daughter(2, daughter(1, E))',
46  'daughter(2, daughter(0, clusterTiming))', 'daughter(2, daughter(1, clusterTiming))',
47  'daughter(2, daughter(0, clusterE9E25))', 'daughter(2, daughter(1, clusterE9E25))',
48  'daughter(2, daughter(0, minC2HDist))', 'daughter(2, daughter(1, minC2HDist))',
49  'M']
50 
51  variables2 = ['p', 'pt', 'pz', 'phi',
52  'chiProb', 'dr', 'dz', 'dphi',
53  'daughter(2, chiProb)',
54  'daughter(0, kaonID)', 'daughter(0, pionID)', 'daughter(1, kaonID)', 'daughter(1, pionID)',
55  'daughter(2, daughter(0, E))', 'daughter(2, daughter(1, E))',
56  'daughter(2, daughter(0, clusterTiming))', 'daughter(2, daughter(1, clusterTiming))',
57  'daughter(2, daughter(0, clusterE9E25))', 'daughter(2, daughter(1, clusterE9E25))',
58  'daughter(2, daughter(0, minC2HDist))', 'daughter(2, daughter(1, minC2HDist))']
59 
60  general_options = basf2_mva.GeneralOptions()
61  general_options.m_datafiles = basf2_mva.vector("train.root")
62  general_options.m_treename = "tree"
63  general_options.m_variables = basf2_mva.vector(*variables)
64  # Spectators are the variables for which the selection should be uniform
65  general_options.m_spectators = basf2_mva.vector('daughterInvariantMass(0, 1)', 'daughterInvariantMass(0, 2)')
66  general_options.m_target_variable = "isSignal"
67  general_options.m_identifier = "hep_ml_baseline"
68 
69  specific_options = basf2_mva.PythonOptions()
70  specific_options.m_framework = 'hep_ml'
71  specific_options.m_steering_file = 'mva/examples/orthogonal_discriminators/hep_ml_ugboost.py'
72  basf2_mva.teacher(general_options, specific_options)
73 
74  # Set the parameters of the uBoostClassifier
75  import json
76  specific_options.m_config = json.dumps({'uniform_rate': 10.0})
77  general_options.m_identifier = "hep_ml"
78  basf2_mva.teacher(general_options, specific_options)
79 
80  specific_options = basf2_mva.PythonOptions()
81  general_options.m_identifier = "hep_ml_feature_drop"
82  specific_options.m_framework = 'hep_ml'
83  specific_options.m_steering_file = 'mva/examples/orthogonal_discriminators/hep_ml_ugboost.py'
84  general_options.m_variables = basf2_mva.vector(*variables2)
85  basf2_mva.teacher(general_options, specific_options)