Belle II Software  release-05-01-25
ft_mva_evaluate.py
1 #!/usr/bin/env python3
2 # -*- coding: utf-8 -*-
3 
4 import basf2_mva_util
5 
6 import ftPlotting as plotting
7 import argparse
8 import tempfile
9 
10 import numpy as np
11 from B2Tools import b2latex, format
12 
13 import ROOT
14 
15 import os
16 import shutil
17 import collections
18 import ROOT
19 from ROOT import Belle2
20 from flavorTagger import KId, muId, eId
21 
22 
23 def getCommandLineOptions():
24  """ Parses the command line options of the fei and returns the corresponding arguments. """
25  parser = argparse.ArgumentParser()
26  parser.add_argument('-id', '--identifiers', dest='identifiers', type=str, required=True, action='append', nargs='+',
27  help='DB Identifier or weightfile')
28  parser.add_argument('-train', '--train_datafiles', dest='train_datafiles', type=str, required=False, action='append', nargs='+',
29  help='Data file containing ROOT TTree used during training')
30  parser.add_argument('-data', '--datafiles', dest='datafiles', type=str, required=True, action='append', nargs='+',
31  help='Data file containing ROOT TTree with independent test data')
32  parser.add_argument('-tree', '--treename', dest='treename', type=str, default='tree', help='Treename in data file')
33  parser.add_argument('-out', '--outputfile', dest='outputfile', type=str, default='output.pdf',
34  help='Name of the outputted pdf file')
35  parser.add_argument('-w', '--working_directory', dest='working_directory', type=str, default='',
36  help="""Working directory where the created images and root files are stored,
37  default is to create a temporary directory.""")
38  parser.add_argument('-b2Orb', '--BelleOrBelle2', dest='BelleOrBelle2', type=str, default='Belle2',
39  help="""Tell me if this is Belle or Belle2 MC please.""")
40  args = parser.parse_args()
41  return args
42 
43 
44 def unique(input):
45  """
46  Returns a list containing only unique elements, keeps the original order of the list
47  @param input list containing the elements
48  """
49  output = []
50  for x in input:
51  if x not in output:
52  output.append(x)
53  return output
54 
55 
56 def create_abbreviations(names, length=5):
57 
58  variablesPlotParamsDict = {'useCMSFrame(p)': [r'$p^*$', r"{\rm GeV}/c\, "],
59  'useCMSFrame(pt)': [r'$p_{\rm t}^*$', r"{\rm GeV}/c\, "],
60  'p': [r'$p$', r"{\rm GeV}/c\, "],
61  'pt': [r'$p_{\rm t}$', r"{\rm GeV}/c\, "],
62  'pLambda': [r'$p_{\Lambda}$', r"{\rm GeV}/c\, "],
63  'useCMSFrame(p)Lambda': [r'$p^*_{\Lambda}$', r"{\rm GeV}/c\, "],
64  'useCMSFrame(p)FSC': [r'$p^*_{\rm Slow}$', r"{\rm GeV}/c\, "],
65  'cosTheta': [r'$\cos{\theta}$', ""],
66  eId[args.BelleOrBelle2]: [r'$\mathcal{L}_{e}$', ""],
67  'eid_dEdx': [r'$\mathcal{L}_{e}^{{\rm d}E/{\rm d}x}$', ""],
68  'eid_TOP': [r'$\mathcal{L}_{e}^{\rm TOP}$', ""],
69  'eid_ARICH': [r'$\mathcal{L}_{e}^{\rm ARICH}$', ""],
70  'eid_ECL': [r'$\mathcal{L}_{e}^{\rm ECL}$', ""],
71  'BtagToWBosonVariables(recoilMassSqrd)': [r'$M_{\rm rec}^2$', r"{\rm GeV}^2/c^4"],
72  'BtagToWBosonVariables(pMissCMS)': [r'$p^*_{\rm miss}$', r"{\rm GeV}/c\, "],
73  'BtagToWBosonVariables(cosThetaMissCMS)': [r'$\cos{\theta^*_{\rm miss}}$', ""],
74  'BtagToWBosonVariables(EW90)': [r'$E_{90}^{W}$', r"{\rm GeV}\, "],
75  'BtagToWBosonVariables(recoilMass)': [r'$M_{\rm rec}$', r"{\rm GeV}/c^2\, "],
76  'cosTPTO': [r'$\vert\cos{\theta^*_{\rm T}}\vert$', ""],
77  'cosTPTOFSC': [r'$\vert\cos{\theta^*_{\rm T,Slow}}\vert$', ""],
78  'ImpactXY': [r'$d_0$', r"{\rm mm}\, "],
79  'distance': [r'$\xi_0$', r"{\rm mm}\, "],
80  'chiProb': [r'$p$-${\rm value}$', ""],
81  muId[args.BelleOrBelle2]: [r'$\mathcal{L}_{\mu}$', ""],
82  'muid_dEdx': [r'$\mathcal{L}_{\mu}^{{\rm d}E/{\rm d}x}$', ""],
83  'muid_TOP': [r'$\mathcal{L}_{\mu}^{\rm TOP}$', ""],
84  'muid_ARICH': [r'$\mathcal{L}_{\mu}^{\rm ARICH}$', ""],
85  'muid_KLM': [r'$\mathcal{L}_{\mu}^{\rm KLM}$', ""],
86  KId[args.BelleOrBelle2]: [r'$\mathcal{L}_{K}$', ""],
87  'Kid_dEdx': [r'$\mathcal{L}_{K}^{{\rm d}E/{\rm d}x}$', ""],
88  'Kid_TOP': [r'$\mathcal{L}_{K}^{\rm TOP}$', ""],
89  'Kid_ARICH': [r'$\mathcal{L}_{K}^{\rm ARICH}$', ""],
90  'NumberOfKShortsInRoe': [r'$n_{K^0_S}$', ""],
91  'ptTracksRoe': [r'$\Sigma\, p_{\rm t}^2$', r"{\rm GeV^2}/c^2"],
92  'extraInfo(isRightCategory(Kaon))': [r"$y_{\rm Kaon}$", ""],
93  'HighestProbInCat(pi+:inRoe, isRightCategory(SlowPion))': [r"$y_{\rm SlowPion}$", ""],
94  'KaonPionVariables(cosKaonPion)': [r'$\cos{\theta^*_{K\pi}}$', ""],
95  'KaonPionVariables(HaveOpositeCharges)': [r'$\frac{1 - q_{K} \cdot q_\pi}{2}$', ""],
96  'pionID': [r'$\mathcal{L}_{\pi}$', ""],
97  'piid_dEdx': [r'$\mathcal{L}_{\pi}^{{\rm d}E/{\rm d}x}$', ""],
98  'piid_TOP': [r'$\mathcal{L}_{\pi}^{\rm TOP}$', ""],
99  'piid_ARICH': [r'$\mathcal{L}_{\pi}^{\rm ARICH}$', ""],
100  'pi_vs_edEdxid': [r'$\mathcal{L}_{\pi/e}^{{\rm d}E/{\rm d}x}$', ""],
101  'FSCVariables(pFastCMS)': [r'$p^*_{\rm Fast}$', r"{\rm GeV}/c\, "],
102  'FSCVariables(cosSlowFast)': [r'$\cos{\theta^*_{\rm SlowFast}}$', ''],
103  'FSCVariables(cosTPTOFast)': [r'$\vert\cos{\theta^*_{\rm T, Fast}}\vert$', ''],
104  'FSCVariables(SlowFastHaveOpositeCharges)': [r'$\frac{1 - q_{\rm Slow} \cdot q_{\rm Fast}}{2}$', ""],
105  'lambdaFlavor': [r'$q_{\Lambda}$', ""],
106  'M': [r'$M_{\Lambda}$', r"{\rm MeV}/c^2\, "],
107  'cosAngleBetweenMomentumAndVertexVector': [
108  r'$\cos{\theta_{\boldsymbol{x}_{\Lambda},\boldsymbol{p}_{\Lambda}}}$', ""],
109  'lambdaZError': [r'$\sigma_{\Lambda}^{zz}$', r"{\rm mm}\, "],
110  'daughter(0,p)': [r'$p_{\pi}$', r"{\rm GeV}/c\, "],
111  'daughter(0,useCMSFrame(p))': [r'$p^*_{\pi}$', r"{\rm GeV}/c\, "],
112  'daughter(1,p)': [r'$p_{p}$', r"{\rm GeV}/c"],
113  'daughter(1,useCMSFrame(p))': [r'$p^*_{p}$', r"{\rm GeV}/c\, "],
114  'daughter(1,protonID)': [r'$\mathcal{L}_{p}$', ""],
115  'daughter(0,pionID)': [r'$\mathcal{L}_{\pi}$', ""],
116  'QpOf(mu+:inRoe, isRightCategory(IntermediateMuon), isRightCategory(IntermediateMuon))': [
117  r'${\rm Int.\ Muon}$'],
118  'QpOf(mu+:inRoe, isRightCategory(Muon), isRightCategory(Muon))': [r'${\rm Muon}$'],
119  'QpOf(pi+:inRoe, isRightCategory(FSC), isRightCategory(SlowPion))': [r'${\rm FSC}$'],
120  'QpOf(e+:inRoe, isRightCategory(Electron), isRightCategory(Electron))': [r'${\rm Electron}$'],
121  'QpOf(e+:inRoe, isRightCategory(IntermediateElectron), isRightCategory(IntermediateElectron))': [
122  r'${\rm Int.\ El.}$'],
123  'weightedQpOf(Lambda0:inRoe, isRightCategory(Lambda), isRightCategory(Lambda))': [r'${\rm Lambda}$'],
124  'QpOf(K+:inRoe, isRightCategory(KaonPion), isRightCategory(Kaon))': [r'${\rm Kaon}$' + '-' + r'${\rm Pion}$'],
125  'QpOf(pi+:inRoe, isRightCategory(FastHadron), isRightCategory(FastHadron))': [r'${\rm Fast\ Hadron}$'],
126  'QpOf(mu+:inRoe, isRightCategory(IntermediateKinLepton), isRightCategory(IntermediateKinLepton))': [
127  r'${\rm Int.\ Kin.\ Lep.}$'],
128  'QpOf(pi+:inRoe, isRightCategory(MaximumPstar), isRightCategory(MaximumPstar))': [r'${\rm Max.}\,p^*$'],
129  'QpOf(pi+:inRoe, isRightCategory(SlowPion), isRightCategory(SlowPion))': [r'${\rm Slow\ Pion}$'],
130  'QpOf(mu+:inRoe, isRightCategory(KinLepton), isRightCategory(KinLepton))': [r'${\rm Kin.\ Lep.}$'],
131  'weightedQpOf(K+:inRoe, isRightCategory(Kaon), isRightCategory(Kaon))': [r'${\rm Kaon}$']}
132 
133  if sum(args.identifiers, [])[0].find('LevelLambdaFBDT') != -1:
134  variablesPlotParamsDict['distance'] = [r'$\vert \boldsymbol{x}_{\Lambda}\vert$', r"{\rm mm}\, "]
135 
136  count = dict()
137 
138  for name in names:
139 
140  if name in variablesPlotParamsDict:
141  abbreviation = variablesPlotParamsDict[name][0]
142  else:
143  abbreviation = name[:length]
144 
145  if abbreviation not in count:
146  count[abbreviation] = 0
147  count[abbreviation] += 1
148  abbreviations = collections.OrderedDict()
149 
150  count2 = dict()
151  for name in names:
152 
153  if name in variablesPlotParamsDict:
154  abbreviation = variablesPlotParamsDict[name][0]
155  else:
156  abbreviation = name[:length]
157 
158  abbreviations[name] = abbreviation
159  if count[abbreviation] > 1:
160  if abbreviation not in count2:
161  count2[abbreviation] = 0
162  count2[abbreviation] += 1
163  abbreviations[name] += str(count2[abbreviation])
164  return abbreviations
165 
166 
167 if __name__ == '__main__':
168 
169  ROOT.gROOT.SetBatch(True)
170 
171  old_cwd = os.getcwd()
172  args = getCommandLineOptions()
173 
174  identifiers = sum(args.identifiers, [])
175  identifier_abbreviations = create_abbreviations(identifiers)
176 
177  datafiles = sum(args.datafiles, [])
178 
179  print("Load methods")
180  methods = [basf2_mva_util.Method(identifier) for identifier in identifiers]
181 
182  print("Apply experts on independent data")
183  test_probability = {}
184  test_target = {}
185  for method in methods:
186  p, t = method.apply_expert(datafiles, args.treename)
187  test_probability[identifier_abbreviations[method.identifier]] = p
188  test_target[identifier_abbreviations[method.identifier]] = t
189 
190  print("Apply experts on training data")
191  train_probability = {}
192  train_target = {}
193  if args.train_datafiles is not None:
194  train_datafiles = sum(args.train_datafiles, [])
195  for method in methods:
196  p, t = method.apply_expert(train_datafiles, args.treename)
197  train_probability[identifier_abbreviations[method.identifier]] = p
198  train_target[identifier_abbreviations[method.identifier]] = t
199 
200  variables = unique(v for method in methods for v in method.variables)
201  root_variables = unique(v for method in methods for v in method.root_variables)
202 
203  print("Here Variables")
204  print(variables)
205 
206  bkgrOutput = 0
207  displayHeatMap = False
208  classOutputLabel = r'${\rm Classifier\ Output}$'
209  isNN = False
210 
211  if identifiers[0].find('Combiner') != -1 or identifiers[0].find('KaonFBDT') != -1 or \
212  identifiers[0].find('Electron') != -1 or identifiers[0].find('Muon') != -1 or \
213  identifiers[0].find('Lepton') != -1 or \
214  identifiers[0].find('SlowPion') != -1 or identifiers[0].find('FastHadron') != -1 or \
215  identifiers[0].find('KaonPion') != -1 or identifiers[0].find('FSC') != -1 or \
216  identifiers[0].find('MaximumPstar') != -1 or identifiers[0].find('Lambda') != -1:
217 
218  if identifiers[0].find('Combiner') != -1:
219  displayHeatMap = True
220  bkgrOutput = -1
221 
222  variables = [
223  'weightedQpOf(Lambda0:inRoe, isRightCategory(Lambda), isRightCategory(Lambda))',
224  'QpOf(pi+:inRoe, isRightCategory(FastHadron), isRightCategory(FastHadron))',
225  'QpOf(pi+:inRoe, isRightCategory(MaximumPstar), isRightCategory(MaximumPstar))',
226  'QpOf(pi+:inRoe, isRightCategory(FSC), isRightCategory(SlowPion))',
227  'QpOf(pi+:inRoe, isRightCategory(SlowPion), isRightCategory(SlowPion))',
228  'QpOf(K+:inRoe, isRightCategory(KaonPion), isRightCategory(Kaon))',
229  'weightedQpOf(K+:inRoe, isRightCategory(Kaon), isRightCategory(Kaon))',
230  'QpOf(mu+:inRoe, isRightCategory(IntermediateKinLepton), isRightCategory(IntermediateKinLepton))',
231  'QpOf(mu+:inRoe, isRightCategory(KinLepton), isRightCategory(KinLepton))',
232  'QpOf(mu+:inRoe, isRightCategory(IntermediateMuon), isRightCategory(IntermediateMuon))',
233  'QpOf(mu+:inRoe, isRightCategory(Muon), isRightCategory(Muon))',
234  'QpOf(e+:inRoe, isRightCategory(IntermediateElectron), isRightCategory(IntermediateElectron))',
235  'QpOf(e+:inRoe, isRightCategory(Electron), isRightCategory(Electron))'
236  ]
237  variables = list(reversed(variables))
238 
239  if identifiers[0].find('Electron') != -1:
240 
241  variables = [
242  'useCMSFrame(p)',
243  'useCMSFrame(pt)',
244  'p',
245  'pt',
246  'cosTheta',
247  'ImpactXY',
248  'distance',
249  'BtagToWBosonVariables(recoilMassSqrd)',
250  'BtagToWBosonVariables(EW90)',
251  'BtagToWBosonVariables(pMissCMS)',
252  'BtagToWBosonVariables(cosThetaMissCMS)',
253  'cosTPTO',
254  'eid_dEdx',
255  'eid_TOP',
256  'eid_ARICH',
257  'eid_ECL',
258  eId[args.BelleOrBelle2],
259  'chiProb']
260 
261  if identifiers[0].find('Muon') != -1:
262 
263  variables = [
264  'useCMSFrame(p)',
265  'useCMSFrame(pt)',
266  'p',
267  'pt',
268  'cosTheta',
269  'ImpactXY',
270  'distance',
271  'BtagToWBosonVariables(recoilMassSqrd)',
272  'BtagToWBosonVariables(EW90)',
273  'BtagToWBosonVariables(pMissCMS)',
274  'BtagToWBosonVariables(cosThetaMissCMS)',
275  'cosTPTO',
276  'muid_dEdx',
277  'muid_TOP',
278  'muid_ARICH',
279  'muid_KLM',
280  muId[args.BelleOrBelle2],
281  'chiProb']
282 
283  if identifiers[0].find('Lepton') != -1:
284 
285  variables = [
286  'useCMSFrame(p)',
287  'useCMSFrame(pt)',
288  'p',
289  'pt',
290  'cosTheta',
291  'ImpactXY',
292  'distance',
293  'BtagToWBosonVariables(recoilMassSqrd)',
294  'BtagToWBosonVariables(EW90)',
295  'BtagToWBosonVariables(pMissCMS)',
296  'BtagToWBosonVariables(cosThetaMissCMS)',
297  'cosTPTO',
298  'eid_dEdx',
299  'eid_TOP',
300  'eid_ARICH',
301  'eid_ECL',
302  eId[args.BelleOrBelle2],
303  'muid_dEdx',
304  'muid_TOP',
305  'muid_ARICH',
306  'muid_KLM',
307  muId[args.BelleOrBelle2],
308  'chiProb']
309 
310  if identifiers[0].find('KaonFBDT') != -1:
311  displayHeatMap = True
312  variables = ['useCMSFrame(p)', 'useCMSFrame(pt)', 'pt', 'ptTracksRoe', 'cosTheta', 'ImpactXY', 'distance',
313  'BtagToWBosonVariables(recoilMassSqrd)', 'BtagToWBosonVariables(EW90)',
314  'BtagToWBosonVariables(pMissCMS)', 'BtagToWBosonVariables(cosThetaMissCMS)',
315  'cosTPTO', 'Kid_dEdx', 'Kid_TOP', 'Kid_ARICH', KId[args.BelleOrBelle2],
316  'NumberOfKShortsInRoe', 'chiProb']
317 
318  if identifiers[0].find('SlowPion') != -1 or identifiers[0].find('FastHadron') != -1:
319 
320  variables = [
321  'useCMSFrame(p)',
322  'useCMSFrame(pt)',
323  'p',
324  'pt',
325  'cosTheta',
326  'ImpactXY',
327  'distance',
328  'BtagToWBosonVariables(recoilMassSqrd)',
329  'BtagToWBosonVariables(EW90)',
330  'BtagToWBosonVariables(pMissCMS)',
331  'BtagToWBosonVariables(cosThetaMissCMS)',
332  'cosTPTO',
333  'Kid_dEdx', 'Kid_TOP', 'Kid_ARICH', KId[args.BelleOrBelle2],
334  'piid_dEdx',
335  'piid_TOP',
336  'piid_ARICH',
337  'pionID',
338  'pi_vs_edEdxid',
339  eId[args.BelleOrBelle2],
340  'ptTracksRoe',
341  'NumberOfKShortsInRoe',
342  'chiProb']
343 
344  if identifiers[0].find('KaonPion') != -1:
345 
346  variables = [KId[args.BelleOrBelle2], 'extraInfo(isRightCategory(Kaon))',
347  'HighestProbInCat(pi+:inRoe, isRightCategory(SlowPion))',
348  'KaonPionVariables(cosKaonPion)',
349  'KaonPionVariables(HaveOpositeCharges)'
350  ]
351 
352  if identifiers[0].find('FSC') != -1:
353 
354  variables = [KId[args.BelleOrBelle2], 'useCMSFrame(p)',
355  'FSCVariables(pFastCMS)',
356  'FSCVariables(SlowFastHaveOpositeCharges)',
357  'cosTPTO',
358  'FSCVariables(cosTPTOFast)',
359  'FSCVariables(cosSlowFast)']
360 
361  if identifiers[0].find('MaximumPstar') != -1:
362 
363  variables = ['useCMSFrame(p)',
364  'useCMSFrame(pt)',
365  'p',
366  'pt',
367  'ImpactXY',
368  'distance', 'cosTPTO']
369 
370  if identifiers[0].find('Lambda') != -1:
371  displayHeatMap = True
372  if args.BelleOrBelle2 == 'Belle':
373  variables = [
374  'useCMSFrame(p)',
375  'p',
376  'daughter(0,useCMSFrame(p))',
377  'daughter(0,p)',
378  'daughter(1,useCMSFrame(p))',
379  'daughter(1,p)',
380  'lambdaFlavor',
381  'M',
382  'NumberOfKShortsInRoe',
383  'cosAngleBetweenMomentumAndVertexVector',
384  'distance',
385  'lambdaZError',
386  'chiProb']
387  else:
388  variables = [
389  'daughter(0,pionID)',
390  'daughter(1,protonID)',
391  'useCMSFrame(p)',
392  'p',
393  'daughter(0,useCMSFrame(p))',
394  'daughter(0,p)',
395  'daughter(1,useCMSFrame(p))',
396  'daughter(1,p)',
397  'lambdaFlavor',
398  'M',
399  'NumberOfKShortsInRoe',
400  'cosAngleBetweenMomentumAndVertexVector',
401  'distance',
402  'lambdaZError',
403  'chiProb']
404 
405  variables = list(reversed(variables))
406  for iVarPosition in range(len(variables)):
407  root_variables[iVarPosition] = Belle2.makeROOTCompatible(variables[iVarPosition])
408 
409  if identifiers[0].find('FSC') != -1:
410 
411  variables = [KId[args.BelleOrBelle2], 'useCMSFrame(p)FSC',
412  'FSCVariables(pFastCMS)',
413  'FSCVariables(SlowFastHaveOpositeCharges)',
414  'cosTPTOFSC',
415  'FSCVariables(cosTPTOFast)',
416  'FSCVariables(cosSlowFast)']
417 
418  if identifiers[0].find('Lambda') != -1:
419  displayHeatMap = True
420  if args.BelleOrBelle2 == 'Belle':
421  variables = [
422  'useCMSFrame(p)Lambda',
423  'pLambda',
424  'daughter(0,useCMSFrame(p))',
425  'daughter(0,p)',
426  'daughter(1,useCMSFrame(p))',
427  'daughter(1,p)',
428  'lambdaFlavor',
429  'M',
430  'NumberOfKShortsInRoe',
431  'cosAngleBetweenMomentumAndVertexVector',
432  'distance',
433  'lambdaZError',
434  'chiProb']
435  else:
436  variables = [
437  'daughter(0,pionID)',
438  'daughter(1,protonID)',
439  'useCMSFrame(p)Lambda',
440  'pLambda',
441  'daughter(0,useCMSFrame(p))',
442  'daughter(0,p)',
443  'daughter(1,useCMSFrame(p))',
444  'daughter(1,p)',
445  'lambdaFlavor',
446  'M',
447  'NumberOfKShortsInRoe',
448  'cosAngleBetweenMomentumAndVertexVector',
449  'distance',
450  'lambdaZError',
451  'chiProb']
452 
453  if identifiers[0].find('Combiner') != -1:
454  if identifiers[0].find('FANN') != -1:
455  classOutputLabel = r'$(q\cdot r)_{\rm MLP}$'
456  isNN = True
457  if identifiers[0].find('FBDT') != -1:
458  classOutputLabel = r'$(q\cdot r)_{\rm FBDT}$'
459  elif identifiers[0].find('LevelMaximumPstar') != -1:
460  classOutputLabel = r'$y_{{\rm Maximum}\, p^*}$'
461  elif identifiers[0].find('LevelFSCFBDT') != -1:
462  classOutputLabel = r'$y_{\rm FSC}$'
463  elif identifiers[0].find('LevelMuonFBDT') != -1:
464  classOutputLabel = r'$y_{\rm Muon}$'
465  elif identifiers[0].find('LevelElectronFBDT') != -1:
466  classOutputLabel = r'$y_{\rm Electron}$'
467  elif identifiers[0].find('LevelKaonFBDT') != -1:
468  classOutputLabel = r'$y_{\rm Kaon}$'
469  elif identifiers[0].find('LevelLambdaFBDT') != -1:
470  classOutputLabel = r'$y_{\rm Lambda}$'
471  elif identifiers[0].find('LevelIntermediateKinLeptonFBDT') != -1:
472  classOutputLabel = r'$y_{\rm Int.\, Kin.\, Lepton}$'
473  elif identifiers[0].find('LevelKinLeptonFBDT') != -1:
474  classOutputLabel = r'$y_{\rm Kin.\, Lepton}$'
475  elif identifiers[0].find('LevelIntermediateMuon') != -1:
476  classOutputLabel = r'$y_{\rm Int.\, Muon}$'
477  elif identifiers[0].find('LevelIntermediateElectron') != -1:
478  classOutputLabel = r'$y_{\rm Int.\, Electron}$'
479  elif identifiers[0].find('LevelKaonPionFBDT') != -1:
480  classOutputLabel = r'$y_{\rm Kaon-Pion}$'
481  elif identifiers[0].find('LevelFastHadron') != -1:
482  classOutputLabel = r'$y_{\rm Fast\, Hadron}$'
483  elif identifiers[0].find('LevelSlowPion') != -1:
484  classOutputLabel = r'$y_{\rm Slow\, Pion}$'
485 
486  variable_abbreviations = create_abbreviations(variables)
487 
488  spectators = unique(v for method in methods for v in method.spectators)
489  spectator_abbreviations = create_abbreviations(spectators)
490  root_spectators = unique(v for method in methods for v in method.root_spectators)
491 
492  print("Load variables array")
493  rootchain = ROOT.TChain(args.treename)
494  for datafile in datafiles:
495  rootchain.Add(datafile)
496 
497  variables_data = basf2_mva_util.tree2dict(rootchain, root_variables, list(variable_abbreviations.values()))
498  spectators_data = basf2_mva_util.tree2dict(rootchain, root_spectators, list(spectator_abbreviations.values()))
499 
500  print("Create latex file")
501  # Change working directory after experts run, because they might want to access
502  # a locadb in the current working directory
503  with tempfile.TemporaryDirectory() as tempdir:
504  if args.working_directory == '':
505  os.chdir(tempdir)
506  else:
507  os.chdir(args.working_directory)
508 
509  o = b2latex.LatexFile()
510  o += b2latex.TitlePage(title='Automatic MVA Evaluation',
511  authors=[r'Thomas Keck\\ Moritz Gelb\\ Nils Braun'],
512  abstract='Evaluation plots',
513  add_table_of_contents=True).finish()
514 
515  o += b2latex.Section("Classifiers")
516  o += b2latex.String(r"""
517  This section contains the GeneralOptions and SpecificOptions of all classifiers represented by an XML tree.
518  The same information can be retreived using the basf2\_mva\_info tool.
519  """)
520 
521  table = b2latex.LongTable(r"ll", "Abbreviations of identifiers", "{name} & {abbr}", r"Identifier & Abbreviation")
522  for identifier in identifiers:
523  table.add(name=format.string(identifier), abbr=format.string(identifier_abbreviations[identifier]))
524  o += table.finish()
525 
526 # for method in methods:
527 # o += b2latex.SubSection(format.string(method.identifier))
528 # o += b2latex.Listing(language='XML').add(method.description).finish()
529 
530  o += b2latex.Section("Variables")
531  o += b2latex.String("""
532  This section contains an overview of the importance and correlation of the variables used by the classifiers.
533  And distribution plots of the variables on the independent dataset. The distributions are normed for signal and
534  background separately, and only the region +- 3 sigma around the mean is shown.
535  """)
536 
537  table = b2latex.LongTable(r"ll", "Abbreviations of variables", "{name} & {abbr}", r"Variable & Abbreviation")
538  for v in variables:
539  # table.add(name=format.string(v), abbr=format.string(variable_abbreviations[v]))
540  table.add(name=format.string(v), abbr=variable_abbreviations[v])
541  o += table.finish()
542 
543  o += b2latex.SubSection("Importance")
544  graphics = b2latex.Graphics()
545  p = plotting.Importance()
546  p.add({identifier_abbreviations[i.identifier]: np.array([i.importances.get(v, 0.0) for v in variables]) for i in methods},
547  identifier_abbreviations.values(), variable_abbreviations.values(), displayHeatMap)
548  p.finish()
549  p.save('importance.pdf')
550  graphics.add('importance.pdf', width=1.0)
551  o += graphics.finish()
552 
553  o += b2latex.SubSection("Correlation")
554  first_identifier_abbr = list(identifier_abbreviations.values())[0]
555  graphics = b2latex.Graphics()
557  p.add(variables_data, variable_abbreviations.values(),
558  test_target[first_identifier_abbr] == 1,
559  test_target[first_identifier_abbr] == bkgrOutput, bkgrOutput)
560  p.finish()
561  p.save('correlation_plot.pdf')
562  graphics.add('correlation_plot.pdf', width=1.0)
563  o += graphics.finish()
564 
565  if False:
566  graphics = b2latex.Graphics()
567  p = plotting.TSNE()
568  p.add(variables_data, variable_abbreviations.values(),
569  test_target[first_identifier_abbr] == 1,
570  test_target[first_identifier_abbr] == bkgrOutput)
571  p.finish()
572  p.save('tsne_plot.pdf')
573  graphics.add('tsne_plot.pdf', width=1.0)
574  o += graphics.finish()
575 
576 # for v in variables:
577 # variable_abbr = variable_abbreviations[v]
578 # o += b2latex.SubSection(format.string(v))
579 # graphics = b2latex.Graphics()
580 # p = plotting.VerboseDistribution(normed=True, range_in_std=3)
581 # p.add(variables_data, variable_abbr, test_target[first_identifier_abbr] == 1, label=r"${\rm Signal}$")
582 # p.add(variables_data, variable_abbr, test_target[first_identifier_abbr] == bkgrOutput, label=r"${\rm Background}$")
583 # p.finish()
584 # p.save('variable_{}.pdf'.format(hash(v)))
585 # graphics.add('variable_{}.pdf'.format(hash(v)), width=1.0)
586 # o += graphics.finish()
587 
588  o += b2latex.Section("Classifier Plot")
589  o += b2latex.String("This section contains the receiver operating characteristics (ROC), purity projection, ..."
590  "of the classifiers on training and independent data."
591  "The legend of each plot contains the shortened identifier and the area under the ROC curve"
592  "in parenthesis.")
593 
594  o += b2latex.Section("ROC Plot")
595  graphics = b2latex.Graphics()
597  for identifier in identifier_abbreviations.values():
598  auc = p.add(test_probability, identifier, test_target[identifier] == 1, test_target[identifier] == bkgrOutput)
599  o += b2latex.String("This is the Area under the ROC " + " ({:.2f})".format(auc) + ".")
600  f = open("AUCROCTest.txt", "w")
601  f.write("{:.6f}".format(auc))
602  f.close()
603  p.finish()
604  # p.axis.set_title("ROC Rejection Plot on independent data")
605  p.save('roc_plot_test.pdf')
606  graphics.add('roc_plot_test.pdf', width=1.0)
607  o += graphics.finish()
608 
609  if train_probability:
610  for i, identifier in enumerate(identifiers):
611  graphics = b2latex.Graphics()
613  identifier_abbr = identifier_abbreviations[identifier]
614  aucTrain = p.add(train_probability, identifier_abbr, train_target[identifier_abbr] == 1,
615  train_target[identifier_abbr] == bkgrOutput, label=r'{\rm Train}')
616  o += b2latex.String("This is the Area under the train ROC " + " ({:.2f})".format(aucTrain) + ". ")
617  f = open("AUCROCTrain.txt", "w")
618  f.write("{:.6f}".format(auc))
619  f.close()
620  aucTest = p.add(test_probability, identifier_abbr, test_target[identifier_abbr] == 1,
621  test_target[identifier_abbr] == bkgrOutput, label=r'{\rm Test}')
622  o += b2latex.String("This is the Area under the test ROC " + " ({:.2f})".format(aucTest) + ".")
623  p.finish()
624  # p.axis.set_title(identifier)
625  p.save('roc_test.pdf')
626  graphics.add('roc_test.pdf', width=1.0)
627  o += graphics.finish()
628 
629  o += b2latex.Section("Classification Results")
630 
631  for identifier in identifiers:
632  identifier_abbr = identifier_abbreviations[identifier]
633  o += b2latex.SubSection(format.string(identifier_abbr))
634  graphics = b2latex.Graphics()
636  p.add(0, test_probability, identifier_abbr, test_target[identifier_abbr] == 1,
637  test_target[identifier_abbr] == bkgrOutput, normed=True)
638  p.sub_plots[0].axis.set_title("Classification result in test data ")
639 
640  p.add(1, test_probability, identifier_abbr, test_target[identifier_abbr] == 1,
641  test_target[identifier_abbr] == bkgrOutput, normed=False)
642  p.sub_plots[1].axis.set_title("Classification result in test data ")
643  p.finish()
644 
645  p.save('classification_result.pdf')
646  graphics.add('classification_result.pdf', width=1)
647  o += graphics.finish()
648 
649  o += b2latex.Section("Diagonal Plot")
650  graphics = b2latex.Graphics()
651  p = plotting.Diagonal()
652  for identifier in identifiers:
653  o += b2latex.SubSection(format.string(identifier_abbr))
654  identifier_abbr = identifier_abbreviations[identifier]
655  p.add(test_probability, identifier_abbr, test_target[identifier_abbr] == 1, test_target[identifier_abbr] == bkgrOutput)
656  p.finish()
657  p.axis.set_title("Diagonal plot on independent data")
658  p.save('diagonal_plot_test.pdf')
659  graphics.add('diagonal_plot_test.pdf', width=1.0)
660  o += graphics.finish()
661 
662  if train_probability:
663  o += b2latex.SubSection("Overtraining Plot")
664  for identifier in identifiers:
665  identifier_abbr = identifier_abbreviations[identifier]
666  probability = {identifier_abbr: np.r_[train_probability[identifier_abbr], test_probability[identifier_abbr]]}
667  target = np.r_[train_target[identifier_abbr], test_target[identifier_abbr]]
668  train_mask = np.r_[np.ones(len(train_target[identifier_abbr])), np.zeros(len(test_target[identifier_abbr]))]
669  graphics = b2latex.Graphics()
671  p.add(probability, identifier_abbr,
672  train_mask == 1, train_mask == 0,
673  target == 1, target == bkgrOutput, None, bkgrOutput, isNN)
674  p.finish(xLabel=classOutputLabel)
675  # p.axis.set_title("Overtraining check for " + str(identifier))
676  p.save('overtraining_plot.pdf')
677  graphics.add('overtraining_plot.pdf', width=1.0)
678  o += graphics.finish()
679  print("Finished Overtraining plot")
680 
681  o += b2latex.Section("Spectators")
682  o += b2latex.String("This section contains the distribution and dependence on the"
683  "classifier outputs of all spectator variables.")
684 
685  table = b2latex.LongTable(r"ll", "Abbreviations of spectators", "{name} & {abbr}", r"Spectator & Abbreviation")
686  for s in spectators:
687  table.add(name=format.string(s), abbr=format.string(spectator_abbreviations[s]))
688  o += table.finish()
689 
690  for spectator in spectators:
691  spectator_abbr = spectator_abbreviations[spectator]
692  o += b2latex.SubSection(format.string(spectator))
693  graphics = b2latex.Graphics()
695  p.add(spectators_data, spectator_abbr, test_target[first_identifier_abbr] == 1, label="Signal")
696  p.add(spectators_data, spectator_abbr, test_target[first_identifier_abbr] == bkgrOutput, label="Background")
697  p.finish()
698  p.save('spectator_{}.pdf'.format(hash(spectator)))
699  graphics.add('spectator_{}.pdf'.format(hash(spectator)), width=1.0)
700  o += graphics.finish()
701 
702  for identifier in identifiers:
703  o += b2latex.SubSubSection(format.string(spectator) + " with classifier " + format.string(identifier))
704  identifier_abbr = identifier_abbreviations[identifier]
705  data = {identifier_abbr: test_probability[identifier_abbr], spectator_abbr: spectators_data[spectator_abbr]}
706  graphics = b2latex.Graphics()
708  p.add(data, spectator_abbr, identifier_abbr, list(range(10, 100, 10)),
709  test_target[identifier_abbr] == 1,
710  test_target[identifier_abbr] == bkgrOutput)
711  p.finish()
712  p.save('correlation_plot_{}_{}.pdf'.format(hash(spectator), hash(identifier)))
713  graphics.add('correlation_plot_{}_{}.pdf'.format(hash(spectator), hash(identifier)), width=1.0)
714  o += graphics.finish()
715 
716  o.save('latex.tex', compile=True)
717  os.chdir(old_cwd)
718  if args.working_directory == '':
719  shutil.copy(tempdir + '/latex.pdf', args.outputfile)
720  else:
721  shutil.copy(args.working_directory + '/latex.pdf', args.outputfile)
plotting.VerboseDistribution
Definition: plotting.py:937
basf2_mva_util.tree2dict
def tree2dict(tree, tree_columns, dict_columns=None)
Definition: basf2_mva_util.py:16
plotting.Correlation
Definition: plotting.py:1012
plotting.PurityAndEfficiencyOverCut
Definition: plotting.py:241
plotting.RejectionOverEfficiency
Definition: plotting.py:396
plotting.Diagonal
Definition: plotting.py:495
plotting.CorrelationMatrix
Definition: plotting.py:1183
basf2_mva_util.Method
Definition: basf2_mva_util.py:81
plotting.Importance
Definition: plotting.py:1130
Belle2::makeROOTCompatible
std::string makeROOTCompatible(std::string str)
Remove special characters that ROOT dislikes in branch names, e.g.
Definition: MakeROOTCompatible.cc:74
plotting.Overtraining
Definition: plotting.py:813
plotting.Multiplot
Definition: plotting.py:445
plotting.TSNE
Definition: plotting.py:1097