Belle II Software development
ft_mva_evaluate.py
1#!/usr/bin/env python3
2
3
10
11import basf2_mva_util
12
13import ftPlotting as plotting
14import argparse
15import tempfile
16
17import numpy as np
18from B2Tools import b2latex, format
19
20import os
21import shutil
22import collections
23import ROOT
24from ROOT import Belle2
25from flavorTagger import KId, muId, eId
26import flavorTagger as ft
27
28
29def getCommandLineOptions():
30 """ Parses the command line options of the fei and returns the corresponding arguments. """
31 parser = argparse.ArgumentParser()
32 parser.add_argument('-id', '--identifiers', dest='identifiers', type=str, required=True, action='append', nargs='+',
33 help='DB Identifier or weightfile')
34 parser.add_argument('-train', '--train_datafiles', dest='train_datafiles', type=str, required=False, action='append', nargs='+',
35 help='Data file containing ROOT TTree used during training')
36 parser.add_argument('-data', '--datafiles', dest='datafiles', type=str, required=True, action='append', nargs='+',
37 help='Data file containing ROOT TTree with independent test data')
38 parser.add_argument('-tree', '--treename', dest='treename', type=str, default='tree', help='Treename in data file')
39 parser.add_argument('-out', '--outputfile', dest='outputfile', type=str, default='output.pdf',
40 help='Name of the outputted pdf file')
41 parser.add_argument('-w', '--working_directory', dest='working_directory', type=str, default='',
42 help="""Working directory where the created images and root files are stored,
43 default is to create a temporary directory.""")
44 parser.add_argument('-b2Orb', '--BelleOrBelle2', dest='BelleOrBelle2', type=str, default='Belle2',
45 help="""Tell me if this is Belle or Belle2 MC please.""")
46 args = parser.parse_args()
47 return args
48
49
50def unique(input):
51 """
52 Returns a list containing only unique elements, keeps the original order of the list
53 @param input list containing the elements
54 """
55 output = []
56 for x in input:
57 if x not in output:
58 output.append(x)
59 return output
60
61
62def create_abbreviations(names, length=5):
63
64 variablesPlotParamsDict = {'useCMSFrame(p)': [r'$p^*$', r"{\rm GeV}/c\, "],
65 'useCMSFrame(pt)': [r'$p_{\rm t}^*$', r"{\rm GeV}/c\, "],
66 'p': [r'$p$', r"{\rm GeV}/c\, "],
67 'pt': [r'$p_{\rm t}$', r"{\rm GeV}/c\, "],
68 'pLambda': [r'$p_{\Lambda}$', r"{\rm GeV}/c\, "],
69 'useCMSFrame(p)Lambda': [r'$p^*_{\Lambda}$', r"{\rm GeV}/c\, "],
70 'useCMSFrame(p)FSC': [r'$p^*_{\rm Slow}$', r"{\rm GeV}/c\, "],
71 'cosTheta': [r'$\cos{\theta}$', ""],
72 eId[args.BelleOrBelle2]: [r'$\mathcal{L}_{e}$', ""],
73 'eid_dEdx': [r'$\mathcal{L}_{e}^{{\rm d}E/{\rm d}x}$', ""],
74 'eid_TOP': [r'$\mathcal{L}_{e}^{\rm TOP}$', ""],
75 'eid_ARICH': [r'$\mathcal{L}_{e}^{\rm ARICH}$', ""],
76 'eid_ECL': [r'$\mathcal{L}_{e}^{\rm ECL}$', ""],
77 'BtagToWBosonVariables(recoilMassSqrd)': [r'$M_{\rm rec}^2$', r"{\rm GeV}^2/c^4"],
78 'BtagToWBosonVariables(pMissCMS)': [r'$p^*_{\rm miss}$', r"{\rm GeV}/c\, "],
79 'BtagToWBosonVariables(cosThetaMissCMS)': [r'$\cos{\theta^*_{\rm miss}}$', ""],
80 'BtagToWBosonVariables(EW90)': [r'$E_{90}^{W}$', r"{\rm GeV}\, "],
81 'BtagToWBosonVariables(recoilMass)': [r'$M_{\rm rec}$', r"{\rm GeV}/c^2\, "],
82 'cosTPTO': [r'$\vert\cos{\theta^*_{\rm T}}\vert$', ""],
83 'cosTPTOFSC': [r'$\vert\cos{\theta^*_{\rm T,Slow}}\vert$', ""],
84 'ImpactXY': [r'$d_0$', r"{\rm mm}\, "],
85 'distance': [r'$\xi_0$', r"{\rm mm}\, "],
86 'chiProb': [r'$p$-${\rm value}$', ""],
87 muId[args.BelleOrBelle2]: [r'$\mathcal{L}_{\mu}$', ""],
88 'muid_dEdx': [r'$\mathcal{L}_{\mu}^{{\rm d}E/{\rm d}x}$', ""],
89 'muid_TOP': [r'$\mathcal{L}_{\mu}^{\rm TOP}$', ""],
90 'muid_ARICH': [r'$\mathcal{L}_{\mu}^{\rm ARICH}$', ""],
91 'muid_KLM': [r'$\mathcal{L}_{\mu}^{\rm KLM}$', ""],
92 KId[args.BelleOrBelle2]: [r'$\mathcal{L}_{K}$', ""],
93 'Kid_dEdx': [r'$\mathcal{L}_{K}^{{\rm d}E/{\rm d}x}$', ""],
94 'Kid_TOP': [r'$\mathcal{L}_{K}^{\rm TOP}$', ""],
95 'Kid_ARICH': [r'$\mathcal{L}_{K}^{\rm ARICH}$', ""],
96 'NumberOfKShortsInRoe': [r'$n_{K^0_S}$', ""],
97 'ptTracksRoe': [r'$\Sigma\, p_{\rm t}^2$', r"{\rm GeV^2}/c^2"],
98 'extraInfo(isRightCategory(Kaon))': [r"$y_{\rm Kaon}$", ""],
99 'HighestProbInCat(pi+:inRoe, isRightCategory(SlowPion))': [r"$y_{\rm SlowPion}$", ""],
100 'KaonPionVariables(cosKaonPion)': [r'$\cos{\theta^*_{K\pi}}$', ""],
101 'KaonPionVariables(HaveOpositeCharges)': [r'$\frac{1 - q_{K} \cdot q_\pi}{2}$', ""],
102 'pionID': [r'$\mathcal{L}_{\pi}$', ""],
103 'piid_dEdx': [r'$\mathcal{L}_{\pi}^{{\rm d}E/{\rm d}x}$', ""],
104 'piid_TOP': [r'$\mathcal{L}_{\pi}^{\rm TOP}$', ""],
105 'piid_ARICH': [r'$\mathcal{L}_{\pi}^{\rm ARICH}$', ""],
106 'pi_vs_edEdxid': [r'$\mathcal{L}_{\pi/e}^{{\rm d}E/{\rm d}x}$', ""],
107 'FSCVariables(pFastCMS)': [r'$p^*_{\rm Fast}$', r"{\rm GeV}/c\, "],
108 'FSCVariables(cosSlowFast)': [r'$\cos{\theta^*_{\rm SlowFast}}$', ''],
109 'FSCVariables(cosTPTOFast)': [r'$\vert\cos{\theta^*_{\rm T, Fast}}\vert$', ''],
110 'FSCVariables(SlowFastHaveOpositeCharges)': [r'$\frac{1 - q_{\rm Slow} \cdot q_{\rm Fast}}{2}$', ""],
111 'lambdaFlavor': [r'$q_{\Lambda}$', ""],
112 'M': [r'$M_{\Lambda}$', r"{\rm MeV}/c^2\, "],
113 'cosAngleBetweenMomentumAndVertexVector': [
114 r'$\cos{\theta_{\boldsymbol{x}_{\Lambda},\boldsymbol{p}_{\Lambda}}}$', ""],
115 'lambdaZError': [r'$\sigma_{\Lambda}^{zz}$', r"{\rm mm}\, "],
116 'daughter(0,p)': [r'$p_{\pi}$', r"{\rm GeV}/c\, "],
117 'daughter(0,useCMSFrame(p))': [r'$p^*_{\pi}$', r"{\rm GeV}/c\, "],
118 'daughter(1,p)': [r'$p_{p}$', r"{\rm GeV}/c"],
119 'daughter(1,useCMSFrame(p))': [r'$p^*_{p}$', r"{\rm GeV}/c\, "],
120 'daughter(1,protonID)': [r'$\mathcal{L}_{p}$', ""],
121 'daughter(0,pionID)': [r'$\mathcal{L}_{\pi}$', ""],
122 'QpOf(mu+:inRoe, isRightCategory(IntermediateMuon), isRightCategory(IntermediateMuon))': [
123 r'${\rm Int.\ Muon}$'],
124 'QpOf(mu+:inRoe, isRightCategory(Muon), isRightCategory(Muon))': [r'${\rm Muon}$'],
125 'QpOf(pi+:inRoe, isRightCategory(FSC), isRightCategory(SlowPion))': [r'${\rm FSC}$'],
126 'QpOf(e+:inRoe, isRightCategory(Electron), isRightCategory(Electron))': [r'${\rm Electron}$'],
127 'QpOf(e+:inRoe, isRightCategory(IntermediateElectron), isRightCategory(IntermediateElectron))': [
128 r'${\rm Int.\ El.}$'],
129 'weightedQpOf(Lambda0:inRoe, isRightCategory(Lambda), isRightCategory(Lambda))': [r'${\rm Lambda}$'],
130 'QpOf(K+:inRoe, isRightCategory(KaonPion), isRightCategory(Kaon))': [r'${\rm Kaon}$' + '-' + r'${\rm Pion}$'],
131 'QpOf(pi+:inRoe, isRightCategory(FastHadron), isRightCategory(FastHadron))': [r'${\rm Fast\ Hadron}$'],
132 'QpOf(mu+:inRoe, isRightCategory(IntermediateKinLepton), isRightCategory(IntermediateKinLepton))': [
133 r'${\rm Int.\ Kin.\ Lep.}$'],
134 'QpOf(pi+:inRoe, isRightCategory(MaximumPstar), isRightCategory(MaximumPstar))': [r'${\rm Max.}\,p^*$'],
135 'QpOf(pi+:inRoe, isRightCategory(SlowPion), isRightCategory(SlowPion))': [r'${\rm Slow\ Pion}$'],
136 'QpOf(mu+:inRoe, isRightCategory(KinLepton), isRightCategory(KinLepton))': [r'${\rm Kin.\ Lep.}$'],
137 'weightedQpOf(K+:inRoe, isRightCategory(Kaon), isRightCategory(Kaon))': [r'${\rm Kaon}$']}
138
139 if sum(args.identifiers, [])[0].find('LevelLambdaFBDT') != -1:
140 variablesPlotParamsDict['distance'] = [r'$\vert \boldsymbol{x}_{\Lambda}\vert$', r"{\rm mm}\, "]
141
142 count = dict()
143
144 for name in names:
145
146 if name in variablesPlotParamsDict:
147 abbreviation = variablesPlotParamsDict[name][0]
148 else:
149 abbreviation = name[:length]
150
151 if abbreviation not in count:
152 count[abbreviation] = 0
153 count[abbreviation] += 1
154 abbreviations = collections.OrderedDict()
155
156 count2 = dict()
157 for name in names:
158
159 if name in variablesPlotParamsDict:
160 abbreviation = variablesPlotParamsDict[name][0]
161 else:
162 abbreviation = name[:length]
163
164 abbreviations[name] = abbreviation
165 if count[abbreviation] > 1:
166 if abbreviation not in count2:
167 count2[abbreviation] = 0
168 count2[abbreviation] += 1
169 abbreviations[name] += str(count2[abbreviation])
170 return abbreviations
171
172
173if __name__ == '__main__':
174
175 ROOT.gROOT.SetBatch(True)
176
177 old_cwd = os.getcwd()
178 args = getCommandLineOptions()
179
180 identifiers = sum(args.identifiers, [])
181 identifier_abbreviations = create_abbreviations(identifiers)
182
183 datafiles = sum(args.datafiles, [])
184
185 print("Load methods")
186 methods = [basf2_mva_util.Method(identifier) for identifier in identifiers]
187
188 print("Apply experts on independent data")
189 test_probability = {}
190 test_target = {}
191 for method in methods:
192 p, t = method.apply_expert(datafiles, args.treename)
193 test_probability[identifier_abbreviations[method.identifier]] = p
194 test_target[identifier_abbreviations[method.identifier]] = t
195
196 print("Apply experts on training data")
197 train_probability = {}
198 train_target = {}
199 if args.train_datafiles is not None:
200 train_datafiles = sum(args.train_datafiles, [])
201 for method in methods:
202 p, t = method.apply_expert(train_datafiles, args.treename)
203 train_probability[identifier_abbreviations[method.identifier]] = p
204 train_target[identifier_abbreviations[method.identifier]] = t
205
206 variables = unique(v for method in methods for v in method.variables)
207 root_variables = unique(v for method in methods for v in method.root_variables)
208
209 print("Here Variables")
210 print(variables)
211
212 bkgrOutput = 0
213 displayHeatMap = False
214 classOutputLabel = r'${\rm Classifier\ Output}$'
215 isNN = False
216
217 if identifiers[0].find('Combiner') != -1 or identifiers[0].find('KaonFBDT') != -1 or \
218 identifiers[0].find('Electron') != -1 or identifiers[0].find('Muon') != -1 or \
219 identifiers[0].find('Lepton') != -1 or \
220 identifiers[0].find('SlowPion') != -1 or identifiers[0].find('FastHadron') != -1 or \
221 identifiers[0].find('KaonPion') != -1 or identifiers[0].find('FSC') != -1 or \
222 identifiers[0].find('MaximumPstar') != -1 or identifiers[0].find('Lambda') != -1:
223
224 if identifiers[0].find('Combiner') != -1:
225 displayHeatMap = True
226 bkgrOutput = -1
227
228 variables = [
229 'weightedQpOf(Lambda0:inRoe, isRightCategory(Lambda), isRightCategory(Lambda))',
230 'QpOf(pi+:inRoe, isRightCategory(FastHadron), isRightCategory(FastHadron))',
231 'QpOf(pi+:inRoe, isRightCategory(MaximumPstar), isRightCategory(MaximumPstar))',
232 'QpOf(pi+:inRoe, isRightCategory(FSC), isRightCategory(SlowPion))',
233 'QpOf(pi+:inRoe, isRightCategory(SlowPion), isRightCategory(SlowPion))',
234 'QpOf(K+:inRoe, isRightCategory(KaonPion), isRightCategory(Kaon))',
235 'weightedQpOf(K+:inRoe, isRightCategory(Kaon), isRightCategory(Kaon))',
236 'QpOf(mu+:inRoe, isRightCategory(IntermediateKinLepton), isRightCategory(IntermediateKinLepton))',
237 'QpOf(mu+:inRoe, isRightCategory(KinLepton), isRightCategory(KinLepton))',
238 'QpOf(mu+:inRoe, isRightCategory(IntermediateMuon), isRightCategory(IntermediateMuon))',
239 'QpOf(mu+:inRoe, isRightCategory(Muon), isRightCategory(Muon))',
240 'QpOf(e+:inRoe, isRightCategory(IntermediateElectron), isRightCategory(IntermediateElectron))',
241 'QpOf(e+:inRoe, isRightCategory(Electron), isRightCategory(Electron))'
242 ]
243 variables = list(reversed(variables))
244
245 if identifiers[0].find('Electron') != -1:
246 if identifiers[0].find('Intermediate') != -1:
247 variables = ft.getTrainingVariables('IntermediateElectron')
248 else:
249 variables = ft.getTrainingVariables('Electron')
250
251 if identifiers[0].find('Muon') != -1:
252 if identifiers[0].find('Intermediate') != -1:
253 variables = ft.getTrainingVariables('IntermediateMuon')
254 else:
255 variables = ft.getTrainingVariables('Muon')
256
257 if identifiers[0].find('Lepton') != -1:
258 if identifiers[0].find('Intermediate') != -1:
259 variables = ft.getTrainingVariables('IntermediateKinLepton')
260 else:
261 variables = ft.getTrainingVariables('KinLepton')
262
263 if identifiers[0].find('KaonFBDT') != -1:
264 displayHeatMap = True
265 variables = ft.getTrainingVariables('Kaon')
266
267 if identifiers[0].find('SlowPion') != -1:
268 variables = ft.getTrainingVariables('SlowPion')
269
270 if identifiers[0].find('FastHadron') != -1:
271 variables = ft.getTrainingVariables('FastHadron')
272
273 if identifiers[0].find('KaonPion') != -1:
274 variables = ft.getTrainingVariables('KaonPion')
275
276 if identifiers[0].find('FSC') != -1:
277 variables = ft.getTrainingVariables('FSC')
278
279 if identifiers[0].find('MaximumPstar') != -1:
280 variables = ft.getTrainingVariables('MaximumPstar')
281
282 if identifiers[0].find('Lambda') != -1:
283 displayHeatMap = True
284 variables = ft.getTrainingVariables('Lambda')
285
286 variables = list(reversed(variables))
287 for iVarPosition in range(len(variables)):
288 root_variables[iVarPosition] = Belle2.MakeROOTCompatible.makeROOTCompatible(variables[iVarPosition])
289
290 # Set special name
291 if identifiers[0].find('FSC') != -1:
292 variables = ['useCMSFrame(p)FSC' if v == 'useCMSFrame(p)' else v for v in variables]
293
294 if identifiers[0].find('Lambda') != -1:
295 displayHeatMap = True
296 variables = ['useCMSFrame(p)Lambda' if v == 'useCMSFrame(p)' else v for v in variables]
297
298 if identifiers[0].find('Combiner') != -1:
299 if identifiers[0].find('FANN') != -1:
300 classOutputLabel = r'$(q\cdot r)_{\rm MLP}$'
301 isNN = True
302 if identifiers[0].find('FBDT') != -1:
303 classOutputLabel = r'$(q\cdot r)_{\rm FBDT}$'
304 elif identifiers[0].find('LevelMaximumPstar') != -1:
305 classOutputLabel = r'$y_{{\rm Maximum}\, p^*}$'
306 elif identifiers[0].find('LevelFSCFBDT') != -1:
307 classOutputLabel = r'$y_{\rm FSC}$'
308 elif identifiers[0].find('LevelMuonFBDT') != -1:
309 classOutputLabel = r'$y_{\rm Muon}$'
310 elif identifiers[0].find('LevelElectronFBDT') != -1:
311 classOutputLabel = r'$y_{\rm Electron}$'
312 elif identifiers[0].find('LevelKaonFBDT') != -1:
313 classOutputLabel = r'$y_{\rm Kaon}$'
314 elif identifiers[0].find('LevelLambdaFBDT') != -1:
315 classOutputLabel = r'$y_{\rm Lambda}$'
316 elif identifiers[0].find('LevelIntermediateKinLeptonFBDT') != -1:
317 classOutputLabel = r'$y_{\rm Int.\, Kin.\, Lepton}$'
318 elif identifiers[0].find('LevelKinLeptonFBDT') != -1:
319 classOutputLabel = r'$y_{\rm Kin.\, Lepton}$'
320 elif identifiers[0].find('LevelIntermediateMuon') != -1:
321 classOutputLabel = r'$y_{\rm Int.\, Muon}$'
322 elif identifiers[0].find('LevelIntermediateElectron') != -1:
323 classOutputLabel = r'$y_{\rm Int.\, Electron}$'
324 elif identifiers[0].find('LevelKaonPionFBDT') != -1:
325 classOutputLabel = r'$y_{\rm Kaon-Pion}$'
326 elif identifiers[0].find('LevelFastHadron') != -1:
327 classOutputLabel = r'$y_{\rm Fast\, Hadron}$'
328 elif identifiers[0].find('LevelSlowPion') != -1:
329 classOutputLabel = r'$y_{\rm Slow\, Pion}$'
330
331 variable_abbreviations = create_abbreviations(variables)
332
333 spectators = unique(v for method in methods for v in method.spectators)
334 spectator_abbreviations = create_abbreviations(spectators)
335 root_spectators = unique(v for method in methods for v in method.root_spectators)
336
337 print("Load variables array")
338 rootchain = ROOT.TChain(args.treename)
339 for datafile in datafiles:
340 rootchain.Add(datafile)
341
342 variables_data = basf2_mva_util.tree2dict(rootchain, root_variables, list(variable_abbreviations.values()))
343 spectators_data = basf2_mva_util.tree2dict(rootchain, root_spectators, list(spectator_abbreviations.values()))
344
345 print("Create latex file")
346 # Change working directory after experts run, because they might want to access
347 # a localdb in the current working directory
348 with tempfile.TemporaryDirectory() as tempdir:
349 if args.working_directory == '':
350 os.chdir(tempdir)
351 else:
352 os.chdir(args.working_directory)
353
354 o = b2latex.LatexFile()
355 o += b2latex.TitlePage(title='Automatic MVA Evaluation',
356 authors=[r'Thomas Keck\\ Moritz Gelb\\ Nils Braun'],
357 abstract='Evaluation plots',
358 add_table_of_contents=True).finish()
359
360 o += b2latex.Section("Classifiers")
361 o += b2latex.String(r"""
362 This section contains the GeneralOptions and SpecificOptions of all classifiers represented by an XML tree.
363 The same information can be retrieved using the basf2\_mva\_info tool.
364 """)
365
366 table = b2latex.LongTable(r"ll", "Abbreviations of identifiers", "{name} & {abbr}", r"Identifier & Abbreviation")
367 for identifier in identifiers:
368 table.add(name=format.string(identifier), abbr=format.string(identifier_abbreviations[identifier]))
369 o += table.finish()
370
371# for method in methods:
372# o += b2latex.SubSection(format.string(method.identifier))
373# o += b2latex.Listing(language='XML').add(method.description).finish()
374
375 o += b2latex.Section("Variables")
376 o += b2latex.String("""
377 This section contains an overview of the importance and correlation of the variables used by the classifiers.
378 And distribution plots of the variables on the independent dataset. The distributions are normed for signal and
379 background separately, and only the region +- 3 sigma around the mean is shown.
380 """)
381
382 table = b2latex.LongTable(r"ll", "Abbreviations of variables", "{name} & {abbr}", r"Variable & Abbreviation")
383 for v in variables:
384 # table.add(name=format.string(v), abbr=format.string(variable_abbreviations[v]))
385 table.add(name=format.string(v), abbr=variable_abbreviations[v])
386 o += table.finish()
387
388 o += b2latex.SubSection("Importance")
389 graphics = b2latex.Graphics()
391 p.add({identifier_abbreviations[i.identifier]: np.array([i.importances.get(v, 0.0) for v in variables]) for i in methods},
392 identifier_abbreviations.values(), variable_abbreviations.values(), displayHeatMap)
393 p.finish()
394 p.save('importance.pdf')
395 graphics.add('importance.pdf', width=1.0)
396 o += graphics.finish()
397
398 o += b2latex.SubSection("Correlation")
399 first_identifier_abbr = list(identifier_abbreviations.values())[0]
400 graphics = b2latex.Graphics()
402 p.add(variables_data, variable_abbreviations.values(),
403 test_target[first_identifier_abbr] == 1,
404 test_target[first_identifier_abbr] == bkgrOutput, bkgrOutput)
405 p.finish()
406 p.save('correlation_plot.pdf')
407 graphics.add('correlation_plot.pdf', width=1.0)
408 o += graphics.finish()
409
410 if False:
411 graphics = b2latex.Graphics()
412 p = plotting.TSNE()
413 p.add(variables_data, variable_abbreviations.values(),
414 test_target[first_identifier_abbr] == 1,
415 test_target[first_identifier_abbr] == bkgrOutput)
416 p.finish()
417 p.save('tsne_plot.pdf')
418 graphics.add('tsne_plot.pdf', width=1.0)
419 o += graphics.finish()
420
421# for v in variables:
422# variable_abbr = variable_abbreviations[v]
423# o += b2latex.SubSection(format.string(v))
424# graphics = b2latex.Graphics()
425# p = plotting.VerboseDistribution(normed=True, range_in_std=3)
426# p.add(variables_data, variable_abbr, test_target[first_identifier_abbr] == 1, label=r"${\rm Signal}$")
427# p.add(variables_data, variable_abbr, test_target[first_identifier_abbr] == bkgrOutput, label=r"${\rm Background}$")
428# p.finish()
429# p.save('variable_{}.pdf'.format(hash(v)))
430# graphics.add('variable_{}.pdf'.format(hash(v)), width=1.0)
431# o += graphics.finish()
432
433 o += b2latex.Section("Classifier Plot")
434 o += b2latex.String("This section contains the receiver operating characteristics (ROC), purity projection, ..."
435 "of the classifiers on training and independent data."
436 "The legend of each plot contains the shortened identifier and the area under the ROC curve"
437 "in parenthesis.")
438
439 o += b2latex.Section("ROC Plot")
440 graphics = b2latex.Graphics()
442 for identifier in identifier_abbreviations.values():
443 auc = p.add(test_probability, identifier, test_target[identifier] == 1, test_target[identifier] == bkgrOutput)
444 o += b2latex.String("This is the Area under the ROC " + f" ({auc:.2f})" + ".")
445 f = open("AUCROCTest.txt", "w")
446 f.write(f"{auc:.6f}")
447 f.close()
448 p.finish()
449 # p.axis.set_title("ROC Rejection Plot on independent data")
450 p.save('roc_plot_test.pdf')
451 graphics.add('roc_plot_test.pdf', width=1.0)
452 o += graphics.finish()
453
454 if train_probability:
455 for i, identifier in enumerate(identifiers):
456 graphics = b2latex.Graphics()
458 identifier_abbr = identifier_abbreviations[identifier]
459 aucTrain = p.add(train_probability, identifier_abbr, train_target[identifier_abbr] == 1,
460 train_target[identifier_abbr] == bkgrOutput, label=r'{\rm Train}')
461 o += b2latex.String("This is the Area under the train ROC " + f" ({aucTrain:.2f})" + ". ")
462 f = open("AUCROCTrain.txt", "w")
463 f.write(f"{auc:.6f}")
464 f.close()
465 aucTest = p.add(test_probability, identifier_abbr, test_target[identifier_abbr] == 1,
466 test_target[identifier_abbr] == bkgrOutput, label=r'{\rm Test}')
467 o += b2latex.String("This is the Area under the test ROC " + f" ({aucTest:.2f})" + ".")
468 p.finish()
469 # p.axis.set_title(identifier)
470 p.save('roc_test.pdf')
471 graphics.add('roc_test.pdf', width=1.0)
472 o += graphics.finish()
473
474 o += b2latex.Section("Classification Results")
475
476 for identifier in identifiers:
477 identifier_abbr = identifier_abbreviations[identifier]
478 o += b2latex.SubSection(format.string(identifier_abbr))
479 graphics = b2latex.Graphics()
481 p.add(0, test_probability, identifier_abbr, test_target[identifier_abbr] == 1,
482 test_target[identifier_abbr] == bkgrOutput, normed=True)
483 p.sub_plots[0].axis.set_title("Classification result in test data ")
484
485 p.add(1, test_probability, identifier_abbr, test_target[identifier_abbr] == 1,
486 test_target[identifier_abbr] == bkgrOutput, normed=False)
487 p.sub_plots[1].axis.set_title("Classification result in test data ")
488 p.finish()
489
490 p.save('classification_result.pdf')
491 graphics.add('classification_result.pdf', width=1)
492 o += graphics.finish()
493
494 o += b2latex.Section("Diagonal Plot")
495 graphics = b2latex.Graphics()
497 for identifier in identifiers:
498 o += b2latex.SubSection(format.string(identifier_abbr))
499 identifier_abbr = identifier_abbreviations[identifier]
500 p.add(test_probability, identifier_abbr, test_target[identifier_abbr] == 1, test_target[identifier_abbr] == bkgrOutput)
501 p.finish()
502 p.axis.set_title("Diagonal plot on independent data")
503 p.save('diagonal_plot_test.pdf')
504 graphics.add('diagonal_plot_test.pdf', width=1.0)
505 o += graphics.finish()
506
507 if train_probability:
508 o += b2latex.SubSection("Overtraining Plot")
509 for identifier in identifiers:
510 identifier_abbr = identifier_abbreviations[identifier]
511 probability = {identifier_abbr: np.r_[train_probability[identifier_abbr], test_probability[identifier_abbr]]}
512 target = np.r_[train_target[identifier_abbr], test_target[identifier_abbr]]
513 train_mask = np.r_[np.ones(len(train_target[identifier_abbr])), np.zeros(len(test_target[identifier_abbr]))]
514 graphics = b2latex.Graphics()
516 p.add(probability, identifier_abbr,
517 train_mask == 1, train_mask == 0,
518 target == 1, target == bkgrOutput, None, bkgrOutput, isNN)
519 p.finish(xLabel=classOutputLabel)
520 # p.axis.set_title("Overtraining check for " + str(identifier))
521 p.save('overtraining_plot.pdf')
522 graphics.add('overtraining_plot.pdf', width=1.0)
523 o += graphics.finish()
524 print("Finished Overtraining plot")
525
526 o += b2latex.Section("Spectators")
527 o += b2latex.String("This section contains the distribution and dependence on the"
528 "classifier outputs of all spectator variables.")
529
530 table = b2latex.LongTable(r"ll", "Abbreviations of spectators", "{name} & {abbr}", r"Spectator & Abbreviation")
531 for s in spectators:
532 table.add(name=format.string(s), abbr=format.string(spectator_abbreviations[s]))
533 o += table.finish()
534
535 for spectator in spectators:
536 spectator_abbr = spectator_abbreviations[spectator]
537 o += b2latex.SubSection(format.string(spectator))
538 graphics = b2latex.Graphics()
540 p.add(spectators_data, spectator_abbr, test_target[first_identifier_abbr] == 1, label="Signal")
541 p.add(spectators_data, spectator_abbr, test_target[first_identifier_abbr] == bkgrOutput, label="Background")
542 p.finish()
543 p.save(f'spectator_{hash(spectator)}.pdf')
544 graphics.add(f'spectator_{hash(spectator)}.pdf', width=1.0)
545 o += graphics.finish()
546
547 for identifier in identifiers:
548 o += b2latex.SubSubSection(format.string(spectator) + " with classifier " + format.string(identifier))
549 identifier_abbr = identifier_abbreviations[identifier]
550 data = {identifier_abbr: test_probability[identifier_abbr], spectator_abbr: spectators_data[spectator_abbr]}
551 graphics = b2latex.Graphics()
553 p.add(data, spectator_abbr, identifier_abbr, list(range(10, 100, 10)),
554 test_target[identifier_abbr] == 1,
555 test_target[identifier_abbr] == bkgrOutput)
556 p.finish()
557 p.save(f'correlation_plot_{hash(spectator)}_{hash(identifier)}.pdf')
558 graphics.add(f'correlation_plot_{hash(spectator)}_{hash(identifier)}.pdf', width=1.0)
559 o += graphics.finish()
560
561 o.save('latex.tex', compile=True)
562 os.chdir(old_cwd)
563 if args.working_directory == '':
564 shutil.copy(tempdir + '/latex.pdf', args.outputfile)
565 else:
566 shutil.copy(args.working_directory + '/latex.pdf', args.outputfile)
static std::string makeROOTCompatible(std::string str)
Remove special characters that ROOT dislikes in branch names, e.g.