27 import multiprocessing
31 if __name__ ==
"__main__":
32 training_data = basf2_mva.vector(
"train.root")
33 test_data = basf2_mva.vector(
"test.root")
35 variables = [
'M',
'p',
'pt',
'pz',
36 'daughter(0, p)',
'daughter(0, pz)',
'daughter(0, pt)',
37 'daughter(1, p)',
'daughter(1, pz)',
'daughter(1, pt)',
38 'daughter(2, p)',
'daughter(2, pz)',
'daughter(2, pt)',
39 'chiProb',
'dr',
'dz',
40 'daughter(0, dr)',
'daughter(1, dr)',
41 'daughter(0, dz)',
'daughter(1, dz)',
42 'daughter(0, chiProb)',
'daughter(1, chiProb)',
'daughter(2, chiProb)',
43 'daughter(0, kaonID)',
'daughter(0, pionID)',
44 'daughterInvariantMass(0, 1)',
'daughterInvariantMass(0, 2)',
'daughterInvariantMass(1, 2)']
47 general_options = basf2_mva.GeneralOptions()
48 general_options.m_datafiles = training_data
49 general_options.m_treename =
"tree"
50 general_options.m_identifier =
"test.xml"
51 general_options.m_variables = basf2_mva.vector(*variables)
52 general_options.m_target_variable =
"isSignal"
54 fastbdt_options = basf2_mva.FastBDTOptions()
55 basf2_mva.teacher(general_options, fastbdt_options)
57 def roc_for_variable_set(variables):
59 options = copy.copy(general_options)
60 options.m_variables = basf2_mva.vector(*variables)
61 m = method.train_teacher(training_data, general_options.m_treename, general_options=options)
62 p, t = m.apply_expert(test_data, general_options.m_treename)
66 p, t = method.apply_expert(test_data, general_options.m_treename)
70 print(
"Variable importances returned my method")
71 for variable
in method.variables:
72 print(variable, method.importances.get(variable, 0.0))
75 p = multiprocessing.Pool(
None, maxtasksperchild=1)
76 results = p.map(roc_for_variable_set, [[v
for v
in method.variables
if v != variable]
for variable
in method.variables])
77 sorted_variables_with_results = list(sorted(zip(method.variables, results), key=
lambda x: x[1]))
78 print(
"Variable importances calculated using loss if variable is removed")
79 for variable, auc
in sorted_variables_with_results:
80 print(variable, global_auc - auc)
83 removed_variables_with_results = sorted_variables_with_results[:1]
84 remaining_variables = [v
for v, r
in sorted_variables_with_results[1:]]
85 while len(remaining_variables) > 1:
86 results = p.map(roc_for_variable_set,
87 [[v
for v
in remaining_variables
if v != variable]
for variable
in remaining_variables])
88 sorted_variables_with_results = list(sorted(zip(remaining_variables, results), key=
lambda x: x[1]))
89 removed_variables_with_results += sorted_variables_with_results[:1]
90 remaining_variables = [v
for v, r
in sorted_variables_with_results[1:]]
91 removed_variables_with_results += sorted_variables_with_results[1:]
93 print(
"Variable importances calculated using loss if variables are recursively removed")
95 for variable, auc
in removed_variables_with_results:
96 print(variable, last_auc - auc)
def calculate_roc_auc(p, t)