Belle II Software  release-05-01-25
builtin_sideband_substraction.py
1 #!/usr/bin/env python3
2 # -*- coding: utf-8 -*-
3 
4 # Thomas Keck 2016
5 
6 # The mva package has a builtin sideband substraction mechanism.
7 # If your data and mc do not match, you can use the MetaOptions to do a meta-training.
8 # Using 'm_use_sideband_substraction = True' the mva package will train
9 # * signal
10 # - from a user-defined signal region with positive weight
11 # - from a user-defined negative signal region with negative weight
12 # * against background
13 # - from a user-defined background region with positive weight
14 #
15 # You have to define the sideband in one of the branches in the provided datafiles
16 # 1 = signal region
17 # 2 = background region
18 # 3 = negative signal region
19 # otherwise = not used in the training
20 #
21 # In addition you have to provide a MC file which contains the same variables,
22 # from which the number of signal events in the different regions can be estimated.
23 # This is the only information which is used from MC during the training.
24 
25 # For the sideband substraction it is important that the used features
26 # are independent of the variable used to define the sidebands.
27 # Otherwise the classification quality will be poor.
28 
29 import basf2_mva
30 from basf2 import *
31 from modularAnalysis import *
32 
33 if __name__ == "__main__":
34  variables = ['p', 'pt', 'pz', 'phi',
35  # 'daughter(0, p)', 'daughter(0, pz)', 'daughter(0, pt)', 'daughter(0, phi)',
36  # 'daughter(1, p)', 'daughter(1, pz)', 'daughter(1, pt)', 'daughter(1, phi)',
37  # 'daughter(2, p)', 'daughter(2, pz)', 'daughter(2, pt)', 'daughter(2, phi)',
38  'chiProb', 'dr', 'dz', 'dphi',
39  'daughter(0, dr)', 'daughter(1, dr)', 'daughter(0, dz)', 'daughter(1, dz)',
40  'daughter(0, dphi)', 'daughter(1, dphi)',
41  'daughter(0, chiProb)', 'daughter(1, chiProb)', 'daughter(2, chiProb)', 'daughter(2, M)',
42  'daughter(0, atcPIDBelle(3,2))', 'daughter(1, atcPIDBelle(3,2))',
43  # 'daughterAngle(0, 1)', 'daughterAngle(0, 2)', 'daughterAngle(1, 2)',
44  'daughter(2, daughter(0, E))', 'daughter(2, daughter(1, E))',
45  'daughter(2, daughter(0, clusterLAT))', 'daughter(2, daughter(1, clusterLAT))',
46  'daughter(2, daughter(0, clusterHighestE))', 'daughter(2, daughter(1, clusterHighestE))',
47  'daughter(2, daughter(0, clusterNHits))', 'daughter(2, daughter(1, clusterNHits))',
48  'daughter(2, daughter(0, clusterE9E25))', 'daughter(2, daughter(1, clusterE9E25))',
49  'daughter(2, daughter(0, minC2HDist))', 'daughter(2, daughter(1, minC2HDist))',
50  # 'daughterInvariantMass(1, 2)', 'daughterInvariantMass(0, 1)', 'daughterInvariantMass(0, 2)'
51  ]
52 
53  general_options = basf2_mva.GeneralOptions()
54  general_options.m_datafiles = basf2_mva.vector("train_data.root")
55  general_options.m_identifier = "MVASideband"
56  general_options.m_treename = "tree"
57  general_options.m_variables = basf2_mva.vector(*variables)
58  general_options.m_target_variable = "isSignal"
59 
60  fastbdt_options = basf2_mva.FastBDTOptions()
61 
62  meta_options = basf2_mva.MetaOptions()
63  meta_options.m_use_sideband_substraction = True
64  meta_options.m_sideband_variable = 'sideband'
65  meta_options.m_sideband_mc_files = basf2_mva.vector("train_mc.root")
66 
67  basf2_mva.teacher(general_options, fastbdt_options, meta_options)