Belle II Software  release-08-02-04
builtin_sideband_subtraction.py
1 #!/usr/bin/env python3
2 
3 
10 
11 # The mva package has a builtin sideband subtraction mechanism.
12 # If your data and mc do not match, you can use the MetaOptions to do a meta-training.
13 # Using 'm_use_sideband_subtraction = True' the mva package will train
14 # * signal
15 # - from a user-defined signal region with positive weight
16 # - from a user-defined negative signal region with negative weight
17 # * against background
18 # - from a user-defined background region with positive weight
19 #
20 # You have to define the sideband in one of the branches in the provided datafiles
21 # 1 = signal region
22 # 2 = background region
23 # 3 = negative signal region
24 # otherwise = not used in the training
25 #
26 # In addition you have to provide a MC file which contains the same variables,
27 # from which the number of signal events in the different regions can be estimated.
28 # This is the only information which is used from MC during the training.
29 
30 # For the sideband subtraction it is important that the used features
31 # are independent of the variable used to define the sidebands.
32 # Otherwise the classification quality will be poor.
33 
34 import basf2_mva
35 
36 if __name__ == "__main__":
37  variables = ['p', 'pt', 'pz', 'phi',
38  # 'daughter(0, p)', 'daughter(0, pz)', 'daughter(0, pt)', 'daughter(0, phi)',
39  # 'daughter(1, p)', 'daughter(1, pz)', 'daughter(1, pt)', 'daughter(1, phi)',
40  # 'daughter(2, p)', 'daughter(2, pz)', 'daughter(2, pt)', 'daughter(2, phi)',
41  'chiProb', 'dr', 'dz', 'dphi',
42  'daughter(0, dr)', 'daughter(1, dr)', 'daughter(0, dz)', 'daughter(1, dz)',
43  'daughter(0, dphi)', 'daughter(1, dphi)',
44  'daughter(0, chiProb)', 'daughter(1, chiProb)', 'daughter(2, chiProb)', 'daughter(2, M)',
45  'daughter(0, atcPIDBelle(3,2))', 'daughter(1, atcPIDBelle(3,2))',
46  # 'daughterAngle(0, 1)', 'daughterAngle(0, 2)', 'daughterAngle(1, 2)',
47  'daughter(2, daughter(0, E))', 'daughter(2, daughter(1, E))',
48  'daughter(2, daughter(0, clusterLAT))', 'daughter(2, daughter(1, clusterLAT))',
49  'daughter(2, daughter(0, clusterHighestE))', 'daughter(2, daughter(1, clusterHighestE))',
50  'daughter(2, daughter(0, clusterNHits))', 'daughter(2, daughter(1, clusterNHits))',
51  'daughter(2, daughter(0, clusterE9E25))', 'daughter(2, daughter(1, clusterE9E25))',
52  'daughter(2, daughter(0, minC2TDist))', 'daughter(2, daughter(1, minC2TDist))',
53  # 'daughterInvM(1, 2)', 'daughterInvM(0, 1)', 'daughterInvM(0, 2)'
54  ]
55 
56  general_options = basf2_mva.GeneralOptions()
57  general_options.m_datafiles = basf2_mva.vector("train_data.root")
58  general_options.m_identifier = "MVASideband"
59  general_options.m_treename = "tree"
60  general_options.m_variables = basf2_mva.vector(*variables)
61  general_options.m_target_variable = "isSignal"
62 
63  fastbdt_options = basf2_mva.FastBDTOptions()
64 
65  meta_options = basf2_mva.MetaOptions()
66  meta_options.m_use_sideband_subtraction = True
67  meta_options.m_sideband_variable = 'sideband'
68  meta_options.m_sideband_mc_files = basf2_mva.vector("train_mc.root")
69 
70  basf2_mva.teacher(general_options, fastbdt_options, meta_options)