27 import multiprocessing
 
   31 if __name__ == 
"__main__":
 
   33     training_data = basf2_mva.vector(
"train.root")
 
   34     test_data = basf2_mva.vector(
"test.root")
 
   36     variables = [
'M', 
'p', 
'pt', 
'pz',
 
   37                  'daughter(0, p)', 
'daughter(0, pz)', 
'daughter(0, pt)',
 
   38                  'daughter(1, p)', 
'daughter(1, pz)', 
'daughter(1, pt)',
 
   39                  'daughter(2, p)', 
'daughter(2, pz)', 
'daughter(2, pt)',
 
   40                  'chiProb', 
'dr', 
'dz',
 
   41                  'daughter(0, dr)', 
'daughter(1, dr)',
 
   42                  'daughter(0, dz)', 
'daughter(1, dz)',
 
   43                  'daughter(0, chiProb)', 
'daughter(1, chiProb)', 
'daughter(2, chiProb)',
 
   44                  'daughter(0, kaonID)', 
'daughter(0, pionID)',
 
   45                  'daughterInvM(0, 1)', 
'daughterInvM(0, 2)', 
'daughterInvM(1, 2)']
 
   48     general_options = basf2_mva.GeneralOptions()
 
   49     general_options.m_datafiles = training_data
 
   50     general_options.m_treename = 
"tree" 
   51     general_options.m_identifier = 
"test.xml" 
   52     general_options.m_variables = basf2_mva.vector(*variables)
 
   53     general_options.m_target_variable = 
"isSignal" 
   55     fastbdt_options = basf2_mva.FastBDTOptions()
 
   56     basf2_mva.teacher(general_options, fastbdt_options)
 
   58     def roc_for_variable_set(variables):
 
   60         options = copy.copy(general_options)
 
   61         options.m_variables = basf2_mva.vector(*variables)
 
   62         m = method.train_teacher(training_data, general_options.m_treename, general_options=options)
 
   63         p, t = m.apply_expert(test_data, general_options.m_treename)
 
   67     p, t = method.apply_expert(test_data, general_options.m_treename)
 
   71     print(
"Variable importances returned my method")
 
   72     for variable 
in method.variables:
 
   73         print(variable, method.importances.get(variable, 0.0))
 
   76     p = multiprocessing.Pool(
None, maxtasksperchild=1)
 
   77     results = p.map(roc_for_variable_set, [[v 
for v 
in method.variables 
if v != variable] 
for variable 
in method.variables])
 
   78     sorted_variables_with_results = list(sorted(zip(method.variables, results), key=
lambda x: x[1]))
 
   79     print(
"Variable importances calculated using loss if variable is removed")
 
   80     for variable, auc 
in sorted_variables_with_results:
 
   81         print(variable, global_auc - auc)
 
   84     removed_variables_with_results = sorted_variables_with_results[:1]
 
   85     remaining_variables = [v 
for v, r 
in sorted_variables_with_results[1:]]
 
   86     while len(remaining_variables) > 1:
 
   87         results = p.map(roc_for_variable_set,
 
   88                         [[v 
for v 
in remaining_variables 
if v != variable] 
for variable 
in remaining_variables])
 
   89         sorted_variables_with_results = list(sorted(zip(remaining_variables, results), key=
lambda x: x[1]))
 
   90         removed_variables_with_results += sorted_variables_with_results[:1]
 
   91         remaining_variables = [v 
for v, r 
in sorted_variables_with_results[1:]]
 
   92     removed_variables_with_results += sorted_variables_with_results[1:]
 
   94     print(
"Variable importances calculated using loss if variables are recursively removed")
 
   96     for variable, auc 
in removed_variables_with_results:
 
   97         print(variable, last_auc - auc)
 
def calculate_auc_efficiency_vs_background_retention(p, t, w=None)