27 import multiprocessing
31 if __name__ ==
"__main__":
33 training_data = basf2_mva.vector(
"train.root")
34 test_data = basf2_mva.vector(
"test.root")
36 variables = [
'M',
'p',
'pt',
'pz',
37 'daughter(0, p)',
'daughter(0, pz)',
'daughter(0, pt)',
38 'daughter(1, p)',
'daughter(1, pz)',
'daughter(1, pt)',
39 'daughter(2, p)',
'daughter(2, pz)',
'daughter(2, pt)',
40 'chiProb',
'dr',
'dz',
41 'daughter(0, dr)',
'daughter(1, dr)',
42 'daughter(0, dz)',
'daughter(1, dz)',
43 'daughter(0, chiProb)',
'daughter(1, chiProb)',
'daughter(2, chiProb)',
44 'daughter(0, kaonID)',
'daughter(0, pionID)',
45 'daughterInvM(0, 1)',
'daughterInvM(0, 2)',
'daughterInvM(1, 2)']
48 general_options = basf2_mva.GeneralOptions()
49 general_options.m_datafiles = training_data
50 general_options.m_treename =
"tree"
51 general_options.m_identifier =
"test.xml"
52 general_options.m_variables = basf2_mva.vector(*variables)
53 general_options.m_target_variable =
"isSignal"
55 fastbdt_options = basf2_mva.FastBDTOptions()
56 basf2_mva.teacher(general_options, fastbdt_options)
58 def roc_for_variable_set(variables):
60 options = copy.copy(general_options)
61 options.m_variables = basf2_mva.vector(*variables)
62 m = method.train_teacher(training_data, general_options.m_treename, general_options=options)
63 p, t = m.apply_expert(test_data, general_options.m_treename)
67 p, t = method.apply_expert(test_data, general_options.m_treename)
71 print(
"Variable importances returned my method")
72 for variable
in method.variables:
73 print(variable, method.importances.get(variable, 0.0))
76 p = multiprocessing.Pool(
None, maxtasksperchild=1)
77 results = p.map(roc_for_variable_set, [[v
for v
in method.variables
if v != variable]
for variable
in method.variables])
78 sorted_variables_with_results = list(sorted(zip(method.variables, results), key=
lambda x: x[1]))
79 print(
"Variable importances calculated using loss if variable is removed")
80 for variable, auc
in sorted_variables_with_results:
81 print(variable, global_auc - auc)
84 removed_variables_with_results = sorted_variables_with_results[:1]
85 remaining_variables = [v
for v, r
in sorted_variables_with_results[1:]]
86 while len(remaining_variables) > 1:
87 results = p.map(roc_for_variable_set,
88 [[v
for v
in remaining_variables
if v != variable]
for variable
in remaining_variables])
89 sorted_variables_with_results = list(sorted(zip(remaining_variables, results), key=
lambda x: x[1]))
90 removed_variables_with_results += sorted_variables_with_results[:1]
91 remaining_variables = [v
for v, r
in sorted_variables_with_results[1:]]
92 removed_variables_with_results += sorted_variables_with_results[1:]
94 print(
"Variable importances calculated using loss if variables are recursively removed")
96 for variable, auc
in removed_variables_with_results:
97 print(variable, last_auc - auc)
def calculate_auc_efficiency_vs_background_retention(p, t, w=None)