Belle II Software light-2406-ragdoll
builtin_sideband_subtraction.py
1#!/usr/bin/env python3
2
3
10
11# The mva package has a builtin sideband subtraction mechanism.
12# If your data and mc do not match, you can use the MetaOptions to do a meta-training.
13# Using 'm_use_sideband_subtraction = True' the mva package will train
14# * signal
15# - from a user-defined signal region with positive weight
16# - from a user-defined negative signal region with negative weight
17# * against background
18# - from a user-defined background region with positive weight
19#
20# You have to define the sideband in one of the branches in the provided datafiles
21# 1 = signal region
22# 2 = background region
23# 3 = negative signal region
24# otherwise = not used in the training
25#
26# In addition you have to provide a MC file which contains the same variables,
27# from which the number of signal events in the different regions can be estimated.
28# This is the only information which is used from MC during the training.
29
30# For the sideband subtraction it is important that the used features
31# are independent of the variable used to define the sidebands.
32# Otherwise the classification quality will be poor.
33
34import basf2_mva
35
36if __name__ == "__main__":
37 variables = ['p', 'pt', 'pz', 'phi',
38 # 'daughter(0, p)', 'daughter(0, pz)', 'daughter(0, pt)', 'daughter(0, phi)',
39 # 'daughter(1, p)', 'daughter(1, pz)', 'daughter(1, pt)', 'daughter(1, phi)',
40 # 'daughter(2, p)', 'daughter(2, pz)', 'daughter(2, pt)', 'daughter(2, phi)',
41 'chiProb', 'dr', 'dz', 'dphi',
42 'daughter(0, dr)', 'daughter(1, dr)', 'daughter(0, dz)', 'daughter(1, dz)',
43 'daughter(0, dphi)', 'daughter(1, dphi)',
44 'daughter(0, chiProb)', 'daughter(1, chiProb)', 'daughter(2, chiProb)', 'daughter(2, M)',
45 'daughter(0, atcPIDBelle(3,2))', 'daughter(1, atcPIDBelle(3,2))',
46 # 'daughterAngle(0, 1)', 'daughterAngle(0, 2)', 'daughterAngle(1, 2)',
47 'daughter(2, daughter(0, E))', 'daughter(2, daughter(1, E))',
48 'daughter(2, daughter(0, clusterLAT))', 'daughter(2, daughter(1, clusterLAT))',
49 'daughter(2, daughter(0, clusterHighestE))', 'daughter(2, daughter(1, clusterHighestE))',
50 'daughter(2, daughter(0, clusterNHits))', 'daughter(2, daughter(1, clusterNHits))',
51 'daughter(2, daughter(0, clusterE9E25))', 'daughter(2, daughter(1, clusterE9E25))',
52 'daughter(2, daughter(0, minC2TDist))', 'daughter(2, daughter(1, minC2TDist))',
53 # 'daughterInvM(1, 2)', 'daughterInvM(0, 1)', 'daughterInvM(0, 2)'
54 ]
55
56 general_options = basf2_mva.GeneralOptions()
57 general_options.m_datafiles = basf2_mva.vector("train_data.root")
58 general_options.m_identifier = "MVASideband"
59 general_options.m_treename = "tree"
60 general_options.m_variables = basf2_mva.vector(*variables)
61 general_options.m_target_variable = "isSignal"
62
63 fastbdt_options = basf2_mva.FastBDTOptions()
64
65 meta_options = basf2_mva.MetaOptions()
66 meta_options.m_use_sideband_subtraction = True
67 meta_options.m_sideband_variable = 'sideband'
68 meta_options.m_sideband_mc_files = basf2_mva.vector("train_mc.root")
69
70 basf2_mva.teacher(general_options, fastbdt_options, meta_options)