13 from ROOT
import Belle2
16 def tree2dict(tree, tree_columns, dict_columns=None):
18 Convert a ROOT.TTree into a dictionary of np.arrays
19 @param tree the ROOT.TTree
20 @param tree_columns the column (or branch) names in the tree
21 @param dict_columns the corresponding column names in the dictionary
23 if len(tree_columns) == 0:
25 if dict_columns
is None:
26 dict_columns = tree_columns
29 d = root_numpy.tree2array(tree, branches=tree_columns)
30 d.dtype.names = dict_columns
32 d = {column: np.zeros((tree.GetEntries(),))
for column
in dict_columns}
33 for iEvent, event
in enumerate(tree):
34 for dict_column, tree_column
in zip(dict_columns, tree_columns):
35 d[dict_column][iEvent] = getattr(event, tree_column)
39 def calculate_roc_auc(p, t):
41 Calculates the area under the receiver oeprating characteristic curve (AUC ROC)
42 @param p np.array filled with the probability output of a classifier
43 @param t np.array filled with the target (0 or 1)
48 efficiency = (T - np.cumsum(t[index])) / float(T)
49 purity = (T - np.cumsum(t[index])) / (N - np.cumsum(np.ones(N)))
50 purity = np.where(np.isnan(purity), 0, purity)
51 return np.abs(np.trapz(purity, efficiency))
54 def calculate_flatness(f, p, w=None):
56 Calculates the flatness of a feature under cuts on a signal probability
57 @param f the feature values
58 @param p the probability values
59 @param w optional weights
60 @return the mean standard deviation between the local and global cut selection efficiency
62 quantiles = list(range(101))
63 binning_feature = np.unique(np.percentile(f, q=quantiles))
64 binning_probability = np.unique(np.percentile(p, q=quantiles))
65 if len(binning_feature) < 2:
66 binning_feature = np.array([np.min(f) - 1, np.max(f) + 1])
67 if len(binning_probability) < 2:
68 binning_probability = np.array([np.min(p) - 1, np.max(p) + 1])
69 hist_n, _ = np.histogramdd(np.c_[p, f],
70 bins=[binning_probability, binning_feature],
72 hist_inc = hist_n.sum(axis=1)
73 hist_inc /= hist_inc.sum(axis=0)
74 hist_n /= hist_n.sum(axis=0)
75 hist_n = hist_n.cumsum(axis=0)
76 hist_inc = hist_inc.cumsum(axis=0)
77 diff = (hist_n.T - hist_inc)**2
78 return np.sqrt(diff.sum() / (100 * 99))
83 Wrapper class providing an interface to the method stored under the given identifier.
84 It loads the Options, can apply the expert and train new ones using the current as a prototype.
85 This class is used by the basf_mva_evaluation tools
90 Load a method stored under the given identifier
91 @param identifier identifying the method
137 importances = self.
weightfile.getFeatureImportance()
154 def train_teacher(self, datafiles, treename, general_options=None, specific_options=None):
156 Train a new method using this method as a prototype
157 @param datafiles the training datafiles
158 @param treename the name of the tree containing the training data
159 @param general_options general options given to basf2_mva.teacher (if None the options of this method are used)
160 @param specific_options specific options given to basf2_mva.teacher (if None the options of this method are used)
162 if isinstance(datafiles, str):
163 datafiles = [datafiles]
164 if general_options
is None:
166 if specific_options
is None:
169 with tempfile.TemporaryDirectory()
as tempdir:
170 identifier = tempdir +
"/weightfile.xml"
172 general_options.m_datafiles = basf2_mva.vector(*datafiles)
173 general_options.m_identifier = identifier
175 basf2_mva.teacher(general_options, specific_options)
177 method =
Method(identifier)
182 Apply the expert of the method to data and return the calculated probability and the target
183 @param datafiles the datafiles
184 @param treename the name of the tree containing the data
186 if isinstance(datafiles, str):
187 datafiles = [datafiles]
188 with tempfile.TemporaryDirectory()
as tempdir:
189 identifier = tempdir +
"/weightfile.xml"
190 basf2_mva.Weightfile.save(self.
weightfile, identifier)
192 rootfilename = tempdir +
'/expert.root'
193 basf2_mva.expert(basf2_mva.vector(identifier),
194 basf2_mva.vector(*datafiles),
197 rootfile = ROOT.TFile(rootfilename,
"UPDATE")
198 roottree = rootfile.Get(
"variables")
200 expert_target = identifier +
'_' + self.
general_options.m_target_variable
202 d = tree2dict(roottree,
205 return d[self.
identifier], d[stripped_expert_target]