Belle II Software  light-2212-foldex
ft_mva_evaluate.py
1 #!/usr/bin/env python3
2 # -*- coding: utf-8 -*-
3 
4 
11 
12 import basf2_mva_util
13 
14 import ftPlotting as plotting
15 import argparse
16 import tempfile
17 
18 import numpy as np
19 from B2Tools import b2latex, format
20 
21 import os
22 import shutil
23 import collections
24 import ROOT
25 from ROOT import Belle2
26 from flavorTagger import KId, muId, eId
27 import flavorTagger as ft
28 
29 
30 def getCommandLineOptions():
31  """ Parses the command line options of the fei and returns the corresponding arguments. """
32  parser = argparse.ArgumentParser()
33  parser.add_argument('-id', '--identifiers', dest='identifiers', type=str, required=True, action='append', nargs='+',
34  help='DB Identifier or weightfile')
35  parser.add_argument('-train', '--train_datafiles', dest='train_datafiles', type=str, required=False, action='append', nargs='+',
36  help='Data file containing ROOT TTree used during training')
37  parser.add_argument('-data', '--datafiles', dest='datafiles', type=str, required=True, action='append', nargs='+',
38  help='Data file containing ROOT TTree with independent test data')
39  parser.add_argument('-tree', '--treename', dest='treename', type=str, default='tree', help='Treename in data file')
40  parser.add_argument('-out', '--outputfile', dest='outputfile', type=str, default='output.pdf',
41  help='Name of the outputted pdf file')
42  parser.add_argument('-w', '--working_directory', dest='working_directory', type=str, default='',
43  help="""Working directory where the created images and root files are stored,
44  default is to create a temporary directory.""")
45  parser.add_argument('-b2Orb', '--BelleOrBelle2', dest='BelleOrBelle2', type=str, default='Belle2',
46  help="""Tell me if this is Belle or Belle2 MC please.""")
47  args = parser.parse_args()
48  return args
49 
50 
51 def unique(input):
52  """
53  Returns a list containing only unique elements, keeps the original order of the list
54  @param input list containing the elements
55  """
56  output = []
57  for x in input:
58  if x not in output:
59  output.append(x)
60  return output
61 
62 
63 def create_abbreviations(names, length=5):
64 
65  variablesPlotParamsDict = {'useCMSFrame(p)': [r'$p^*$', r"{\rm GeV}/c\, "],
66  'useCMSFrame(pt)': [r'$p_{\rm t}^*$', r"{\rm GeV}/c\, "],
67  'p': [r'$p$', r"{\rm GeV}/c\, "],
68  'pt': [r'$p_{\rm t}$', r"{\rm GeV}/c\, "],
69  'pLambda': [r'$p_{\Lambda}$', r"{\rm GeV}/c\, "],
70  'useCMSFrame(p)Lambda': [r'$p^*_{\Lambda}$', r"{\rm GeV}/c\, "],
71  'useCMSFrame(p)FSC': [r'$p^*_{\rm Slow}$', r"{\rm GeV}/c\, "],
72  'cosTheta': [r'$\cos{\theta}$', ""],
73  eId[args.BelleOrBelle2]: [r'$\mathcal{L}_{e}$', ""],
74  'eid_dEdx': [r'$\mathcal{L}_{e}^{{\rm d}E/{\rm d}x}$', ""],
75  'eid_TOP': [r'$\mathcal{L}_{e}^{\rm TOP}$', ""],
76  'eid_ARICH': [r'$\mathcal{L}_{e}^{\rm ARICH}$', ""],
77  'eid_ECL': [r'$\mathcal{L}_{e}^{\rm ECL}$', ""],
78  'BtagToWBosonVariables(recoilMassSqrd)': [r'$M_{\rm rec}^2$', r"{\rm GeV}^2/c^4"],
79  'BtagToWBosonVariables(pMissCMS)': [r'$p^*_{\rm miss}$', r"{\rm GeV}/c\, "],
80  'BtagToWBosonVariables(cosThetaMissCMS)': [r'$\cos{\theta^*_{\rm miss}}$', ""],
81  'BtagToWBosonVariables(EW90)': [r'$E_{90}^{W}$', r"{\rm GeV}\, "],
82  'BtagToWBosonVariables(recoilMass)': [r'$M_{\rm rec}$', r"{\rm GeV}/c^2\, "],
83  'cosTPTO': [r'$\vert\cos{\theta^*_{\rm T}}\vert$', ""],
84  'cosTPTOFSC': [r'$\vert\cos{\theta^*_{\rm T,Slow}}\vert$', ""],
85  'ImpactXY': [r'$d_0$', r"{\rm mm}\, "],
86  'distance': [r'$\xi_0$', r"{\rm mm}\, "],
87  'chiProb': [r'$p$-${\rm value}$', ""],
88  muId[args.BelleOrBelle2]: [r'$\mathcal{L}_{\mu}$', ""],
89  'muid_dEdx': [r'$\mathcal{L}_{\mu}^{{\rm d}E/{\rm d}x}$', ""],
90  'muid_TOP': [r'$\mathcal{L}_{\mu}^{\rm TOP}$', ""],
91  'muid_ARICH': [r'$\mathcal{L}_{\mu}^{\rm ARICH}$', ""],
92  'muid_KLM': [r'$\mathcal{L}_{\mu}^{\rm KLM}$', ""],
93  KId[args.BelleOrBelle2]: [r'$\mathcal{L}_{K}$', ""],
94  'Kid_dEdx': [r'$\mathcal{L}_{K}^{{\rm d}E/{\rm d}x}$', ""],
95  'Kid_TOP': [r'$\mathcal{L}_{K}^{\rm TOP}$', ""],
96  'Kid_ARICH': [r'$\mathcal{L}_{K}^{\rm ARICH}$', ""],
97  'NumberOfKShortsInRoe': [r'$n_{K^0_S}$', ""],
98  'ptTracksRoe': [r'$\Sigma\, p_{\rm t}^2$', r"{\rm GeV^2}/c^2"],
99  'extraInfo(isRightCategory(Kaon))': [r"$y_{\rm Kaon}$", ""],
100  'HighestProbInCat(pi+:inRoe, isRightCategory(SlowPion))': [r"$y_{\rm SlowPion}$", ""],
101  'KaonPionVariables(cosKaonPion)': [r'$\cos{\theta^*_{K\pi}}$', ""],
102  'KaonPionVariables(HaveOpositeCharges)': [r'$\frac{1 - q_{K} \cdot q_\pi}{2}$', ""],
103  'pionID': [r'$\mathcal{L}_{\pi}$', ""],
104  'piid_dEdx': [r'$\mathcal{L}_{\pi}^{{\rm d}E/{\rm d}x}$', ""],
105  'piid_TOP': [r'$\mathcal{L}_{\pi}^{\rm TOP}$', ""],
106  'piid_ARICH': [r'$\mathcal{L}_{\pi}^{\rm ARICH}$', ""],
107  'pi_vs_edEdxid': [r'$\mathcal{L}_{\pi/e}^{{\rm d}E/{\rm d}x}$', ""],
108  'FSCVariables(pFastCMS)': [r'$p^*_{\rm Fast}$', r"{\rm GeV}/c\, "],
109  'FSCVariables(cosSlowFast)': [r'$\cos{\theta^*_{\rm SlowFast}}$', ''],
110  'FSCVariables(cosTPTOFast)': [r'$\vert\cos{\theta^*_{\rm T, Fast}}\vert$', ''],
111  'FSCVariables(SlowFastHaveOpositeCharges)': [r'$\frac{1 - q_{\rm Slow} \cdot q_{\rm Fast}}{2}$', ""],
112  'lambdaFlavor': [r'$q_{\Lambda}$', ""],
113  'M': [r'$M_{\Lambda}$', r"{\rm MeV}/c^2\, "],
114  'cosAngleBetweenMomentumAndVertexVector': [
115  r'$\cos{\theta_{\boldsymbol{x}_{\Lambda},\boldsymbol{p}_{\Lambda}}}$', ""],
116  'lambdaZError': [r'$\sigma_{\Lambda}^{zz}$', r"{\rm mm}\, "],
117  'daughter(0,p)': [r'$p_{\pi}$', r"{\rm GeV}/c\, "],
118  'daughter(0,useCMSFrame(p))': [r'$p^*_{\pi}$', r"{\rm GeV}/c\, "],
119  'daughter(1,p)': [r'$p_{p}$', r"{\rm GeV}/c"],
120  'daughter(1,useCMSFrame(p))': [r'$p^*_{p}$', r"{\rm GeV}/c\, "],
121  'daughter(1,protonID)': [r'$\mathcal{L}_{p}$', ""],
122  'daughter(0,pionID)': [r'$\mathcal{L}_{\pi}$', ""],
123  'QpOf(mu+:inRoe, isRightCategory(IntermediateMuon), isRightCategory(IntermediateMuon))': [
124  r'${\rm Int.\ Muon}$'],
125  'QpOf(mu+:inRoe, isRightCategory(Muon), isRightCategory(Muon))': [r'${\rm Muon}$'],
126  'QpOf(pi+:inRoe, isRightCategory(FSC), isRightCategory(SlowPion))': [r'${\rm FSC}$'],
127  'QpOf(e+:inRoe, isRightCategory(Electron), isRightCategory(Electron))': [r'${\rm Electron}$'],
128  'QpOf(e+:inRoe, isRightCategory(IntermediateElectron), isRightCategory(IntermediateElectron))': [
129  r'${\rm Int.\ El.}$'],
130  'weightedQpOf(Lambda0:inRoe, isRightCategory(Lambda), isRightCategory(Lambda))': [r'${\rm Lambda}$'],
131  'QpOf(K+:inRoe, isRightCategory(KaonPion), isRightCategory(Kaon))': [r'${\rm Kaon}$' + '-' + r'${\rm Pion}$'],
132  'QpOf(pi+:inRoe, isRightCategory(FastHadron), isRightCategory(FastHadron))': [r'${\rm Fast\ Hadron}$'],
133  'QpOf(mu+:inRoe, isRightCategory(IntermediateKinLepton), isRightCategory(IntermediateKinLepton))': [
134  r'${\rm Int.\ Kin.\ Lep.}$'],
135  'QpOf(pi+:inRoe, isRightCategory(MaximumPstar), isRightCategory(MaximumPstar))': [r'${\rm Max.}\,p^*$'],
136  'QpOf(pi+:inRoe, isRightCategory(SlowPion), isRightCategory(SlowPion))': [r'${\rm Slow\ Pion}$'],
137  'QpOf(mu+:inRoe, isRightCategory(KinLepton), isRightCategory(KinLepton))': [r'${\rm Kin.\ Lep.}$'],
138  'weightedQpOf(K+:inRoe, isRightCategory(Kaon), isRightCategory(Kaon))': [r'${\rm Kaon}$']}
139 
140  if sum(args.identifiers, [])[0].find('LevelLambdaFBDT') != -1:
141  variablesPlotParamsDict['distance'] = [r'$\vert \boldsymbol{x}_{\Lambda}\vert$', r"{\rm mm}\, "]
142 
143  count = dict()
144 
145  for name in names:
146 
147  if name in variablesPlotParamsDict:
148  abbreviation = variablesPlotParamsDict[name][0]
149  else:
150  abbreviation = name[:length]
151 
152  if abbreviation not in count:
153  count[abbreviation] = 0
154  count[abbreviation] += 1
155  abbreviations = collections.OrderedDict()
156 
157  count2 = dict()
158  for name in names:
159 
160  if name in variablesPlotParamsDict:
161  abbreviation = variablesPlotParamsDict[name][0]
162  else:
163  abbreviation = name[:length]
164 
165  abbreviations[name] = abbreviation
166  if count[abbreviation] > 1:
167  if abbreviation not in count2:
168  count2[abbreviation] = 0
169  count2[abbreviation] += 1
170  abbreviations[name] += str(count2[abbreviation])
171  return abbreviations
172 
173 
174 if __name__ == '__main__':
175 
176  ROOT.gROOT.SetBatch(True)
177 
178  old_cwd = os.getcwd()
179  args = getCommandLineOptions()
180 
181  identifiers = sum(args.identifiers, [])
182  identifier_abbreviations = create_abbreviations(identifiers)
183 
184  datafiles = sum(args.datafiles, [])
185 
186  print("Load methods")
187  methods = [basf2_mva_util.Method(identifier) for identifier in identifiers]
188 
189  print("Apply experts on independent data")
190  test_probability = {}
191  test_target = {}
192  for method in methods:
193  p, t = method.apply_expert(datafiles, args.treename)
194  test_probability[identifier_abbreviations[method.identifier]] = p
195  test_target[identifier_abbreviations[method.identifier]] = t
196 
197  print("Apply experts on training data")
198  train_probability = {}
199  train_target = {}
200  if args.train_datafiles is not None:
201  train_datafiles = sum(args.train_datafiles, [])
202  for method in methods:
203  p, t = method.apply_expert(train_datafiles, args.treename)
204  train_probability[identifier_abbreviations[method.identifier]] = p
205  train_target[identifier_abbreviations[method.identifier]] = t
206 
207  variables = unique(v for method in methods for v in method.variables)
208  root_variables = unique(v for method in methods for v in method.root_variables)
209 
210  print("Here Variables")
211  print(variables)
212 
213  bkgrOutput = 0
214  displayHeatMap = False
215  classOutputLabel = r'${\rm Classifier\ Output}$'
216  isNN = False
217 
218  if identifiers[0].find('Combiner') != -1 or identifiers[0].find('KaonFBDT') != -1 or \
219  identifiers[0].find('Electron') != -1 or identifiers[0].find('Muon') != -1 or \
220  identifiers[0].find('Lepton') != -1 or \
221  identifiers[0].find('SlowPion') != -1 or identifiers[0].find('FastHadron') != -1 or \
222  identifiers[0].find('KaonPion') != -1 or identifiers[0].find('FSC') != -1 or \
223  identifiers[0].find('MaximumPstar') != -1 or identifiers[0].find('Lambda') != -1:
224 
225  if identifiers[0].find('Combiner') != -1:
226  displayHeatMap = True
227  bkgrOutput = -1
228 
229  variables = [
230  'weightedQpOf(Lambda0:inRoe, isRightCategory(Lambda), isRightCategory(Lambda))',
231  'QpOf(pi+:inRoe, isRightCategory(FastHadron), isRightCategory(FastHadron))',
232  'QpOf(pi+:inRoe, isRightCategory(MaximumPstar), isRightCategory(MaximumPstar))',
233  'QpOf(pi+:inRoe, isRightCategory(FSC), isRightCategory(SlowPion))',
234  'QpOf(pi+:inRoe, isRightCategory(SlowPion), isRightCategory(SlowPion))',
235  'QpOf(K+:inRoe, isRightCategory(KaonPion), isRightCategory(Kaon))',
236  'weightedQpOf(K+:inRoe, isRightCategory(Kaon), isRightCategory(Kaon))',
237  'QpOf(mu+:inRoe, isRightCategory(IntermediateKinLepton), isRightCategory(IntermediateKinLepton))',
238  'QpOf(mu+:inRoe, isRightCategory(KinLepton), isRightCategory(KinLepton))',
239  'QpOf(mu+:inRoe, isRightCategory(IntermediateMuon), isRightCategory(IntermediateMuon))',
240  'QpOf(mu+:inRoe, isRightCategory(Muon), isRightCategory(Muon))',
241  'QpOf(e+:inRoe, isRightCategory(IntermediateElectron), isRightCategory(IntermediateElectron))',
242  'QpOf(e+:inRoe, isRightCategory(Electron), isRightCategory(Electron))'
243  ]
244  variables = list(reversed(variables))
245 
246  if identifiers[0].find('Electron') != -1:
247  if identifiers[0].find('Intermediate') != -1:
248  variables = ft.getTrainingVariables('IntermediateElectron')
249  else:
250  variables = ft.getTrainingVariables('Electron')
251 
252  if identifiers[0].find('Muon') != -1:
253  if identifiers[0].find('Intermediate') != -1:
254  variables = ft.getTrainingVariables('IntermediateMuon')
255  else:
256  variables = ft.getTrainingVariables('Muon')
257 
258  if identifiers[0].find('Lepton') != -1:
259  if identifiers[0].find('Intermediate') != -1:
260  variables = ft.getTrainingVariables('IntermediateKinLepton')
261  else:
262  variables = ft.getTrainingVariables('KinLepton')
263 
264  if identifiers[0].find('KaonFBDT') != -1:
265  displayHeatMap = True
266  variables = ft.getTrainingVariables('Kaon')
267 
268  if identifiers[0].find('SlowPion') != -1:
269  variables = ft.getTrainingVariables('SlowPion')
270 
271  if identifiers[0].find('FastHadron') != -1:
272  variables = ft.getTrainingVariables('FastHadron')
273 
274  if identifiers[0].find('KaonPion') != -1:
275  variables = ft.getTrainingVariables('KaonPion')
276 
277  if identifiers[0].find('FSC') != -1:
278  variables = ft.getTrainingVariables('FSC')
279 
280  if identifiers[0].find('MaximumPstar') != -1:
281  variables = ft.getTrainingVariables('MaximumPstar')
282 
283  if identifiers[0].find('Lambda') != -1:
284  displayHeatMap = True
285  variables = ft.getTrainingVariables('Lambda')
286 
287  variables = list(reversed(variables))
288  for iVarPosition in range(len(variables)):
289  root_variables[iVarPosition] = Belle2.MakeROOTCompatible.makeROOTCompatible(variables[iVarPosition])
290 
291  # Set special name
292  if identifiers[0].find('FSC') != -1:
293  variables = ['useCMSFrame(p)FSC' if v == 'useCMSFrame(p)' else v for v in variables]
294 
295  if identifiers[0].find('Lambda') != -1:
296  displayHeatMap = True
297  variables = ['useCMSFrame(p)Lambda' if v == 'useCMSFrame(p)' else v for v in variables]
298 
299  if identifiers[0].find('Combiner') != -1:
300  if identifiers[0].find('FANN') != -1:
301  classOutputLabel = r'$(q\cdot r)_{\rm MLP}$'
302  isNN = True
303  if identifiers[0].find('FBDT') != -1:
304  classOutputLabel = r'$(q\cdot r)_{\rm FBDT}$'
305  elif identifiers[0].find('LevelMaximumPstar') != -1:
306  classOutputLabel = r'$y_{{\rm Maximum}\, p^*}$'
307  elif identifiers[0].find('LevelFSCFBDT') != -1:
308  classOutputLabel = r'$y_{\rm FSC}$'
309  elif identifiers[0].find('LevelMuonFBDT') != -1:
310  classOutputLabel = r'$y_{\rm Muon}$'
311  elif identifiers[0].find('LevelElectronFBDT') != -1:
312  classOutputLabel = r'$y_{\rm Electron}$'
313  elif identifiers[0].find('LevelKaonFBDT') != -1:
314  classOutputLabel = r'$y_{\rm Kaon}$'
315  elif identifiers[0].find('LevelLambdaFBDT') != -1:
316  classOutputLabel = r'$y_{\rm Lambda}$'
317  elif identifiers[0].find('LevelIntermediateKinLeptonFBDT') != -1:
318  classOutputLabel = r'$y_{\rm Int.\, Kin.\, Lepton}$'
319  elif identifiers[0].find('LevelKinLeptonFBDT') != -1:
320  classOutputLabel = r'$y_{\rm Kin.\, Lepton}$'
321  elif identifiers[0].find('LevelIntermediateMuon') != -1:
322  classOutputLabel = r'$y_{\rm Int.\, Muon}$'
323  elif identifiers[0].find('LevelIntermediateElectron') != -1:
324  classOutputLabel = r'$y_{\rm Int.\, Electron}$'
325  elif identifiers[0].find('LevelKaonPionFBDT') != -1:
326  classOutputLabel = r'$y_{\rm Kaon-Pion}$'
327  elif identifiers[0].find('LevelFastHadron') != -1:
328  classOutputLabel = r'$y_{\rm Fast\, Hadron}$'
329  elif identifiers[0].find('LevelSlowPion') != -1:
330  classOutputLabel = r'$y_{\rm Slow\, Pion}$'
331 
332  variable_abbreviations = create_abbreviations(variables)
333 
334  spectators = unique(v for method in methods for v in method.spectators)
335  spectator_abbreviations = create_abbreviations(spectators)
336  root_spectators = unique(v for method in methods for v in method.root_spectators)
337 
338  print("Load variables array")
339  rootchain = ROOT.TChain(args.treename)
340  for datafile in datafiles:
341  rootchain.Add(datafile)
342 
343  variables_data = basf2_mva_util.tree2dict(rootchain, root_variables, list(variable_abbreviations.values()))
344  spectators_data = basf2_mva_util.tree2dict(rootchain, root_spectators, list(spectator_abbreviations.values()))
345 
346  print("Create latex file")
347  # Change working directory after experts run, because they might want to access
348  # a localdb in the current working directory
349  with tempfile.TemporaryDirectory() as tempdir:
350  if args.working_directory == '':
351  os.chdir(tempdir)
352  else:
353  os.chdir(args.working_directory)
354 
355  o = b2latex.LatexFile()
356  o += b2latex.TitlePage(title='Automatic MVA Evaluation',
357  authors=[r'Thomas Keck\\ Moritz Gelb\\ Nils Braun'],
358  abstract='Evaluation plots',
359  add_table_of_contents=True).finish()
360 
361  o += b2latex.Section("Classifiers")
362  o += b2latex.String(r"""
363  This section contains the GeneralOptions and SpecificOptions of all classifiers represented by an XML tree.
364  The same information can be retrieved using the basf2\_mva\_info tool.
365  """)
366 
367  table = b2latex.LongTable(r"ll", "Abbreviations of identifiers", "{name} & {abbr}", r"Identifier & Abbreviation")
368  for identifier in identifiers:
369  table.add(name=format.string(identifier), abbr=format.string(identifier_abbreviations[identifier]))
370  o += table.finish()
371 
372 # for method in methods:
373 # o += b2latex.SubSection(format.string(method.identifier))
374 # o += b2latex.Listing(language='XML').add(method.description).finish()
375 
376  o += b2latex.Section("Variables")
377  o += b2latex.String("""
378  This section contains an overview of the importance and correlation of the variables used by the classifiers.
379  And distribution plots of the variables on the independent dataset. The distributions are normed for signal and
380  background separately, and only the region +- 3 sigma around the mean is shown.
381  """)
382 
383  table = b2latex.LongTable(r"ll", "Abbreviations of variables", "{name} & {abbr}", r"Variable & Abbreviation")
384  for v in variables:
385  # table.add(name=format.string(v), abbr=format.string(variable_abbreviations[v]))
386  table.add(name=format.string(v), abbr=variable_abbreviations[v])
387  o += table.finish()
388 
389  o += b2latex.SubSection("Importance")
390  graphics = b2latex.Graphics()
391  p = plotting.Importance()
392  p.add({identifier_abbreviations[i.identifier]: np.array([i.importances.get(v, 0.0) for v in variables]) for i in methods},
393  identifier_abbreviations.values(), variable_abbreviations.values(), displayHeatMap)
394  p.finish()
395  p.save('importance.pdf')
396  graphics.add('importance.pdf', width=1.0)
397  o += graphics.finish()
398 
399  o += b2latex.SubSection("Correlation")
400  first_identifier_abbr = list(identifier_abbreviations.values())[0]
401  graphics = b2latex.Graphics()
403  p.add(variables_data, variable_abbreviations.values(),
404  test_target[first_identifier_abbr] == 1,
405  test_target[first_identifier_abbr] == bkgrOutput, bkgrOutput)
406  p.finish()
407  p.save('correlation_plot.pdf')
408  graphics.add('correlation_plot.pdf', width=1.0)
409  o += graphics.finish()
410 
411  if False:
412  graphics = b2latex.Graphics()
413  p = plotting.TSNE()
414  p.add(variables_data, variable_abbreviations.values(),
415  test_target[first_identifier_abbr] == 1,
416  test_target[first_identifier_abbr] == bkgrOutput)
417  p.finish()
418  p.save('tsne_plot.pdf')
419  graphics.add('tsne_plot.pdf', width=1.0)
420  o += graphics.finish()
421 
422 # for v in variables:
423 # variable_abbr = variable_abbreviations[v]
424 # o += b2latex.SubSection(format.string(v))
425 # graphics = b2latex.Graphics()
426 # p = plotting.VerboseDistribution(normed=True, range_in_std=3)
427 # p.add(variables_data, variable_abbr, test_target[first_identifier_abbr] == 1, label=r"${\rm Signal}$")
428 # p.add(variables_data, variable_abbr, test_target[first_identifier_abbr] == bkgrOutput, label=r"${\rm Background}$")
429 # p.finish()
430 # p.save('variable_{}.pdf'.format(hash(v)))
431 # graphics.add('variable_{}.pdf'.format(hash(v)), width=1.0)
432 # o += graphics.finish()
433 
434  o += b2latex.Section("Classifier Plot")
435  o += b2latex.String("This section contains the receiver operating characteristics (ROC), purity projection, ..."
436  "of the classifiers on training and independent data."
437  "The legend of each plot contains the shortened identifier and the area under the ROC curve"
438  "in parenthesis.")
439 
440  o += b2latex.Section("ROC Plot")
441  graphics = b2latex.Graphics()
443  for identifier in identifier_abbreviations.values():
444  auc = p.add(test_probability, identifier, test_target[identifier] == 1, test_target[identifier] == bkgrOutput)
445  o += b2latex.String("This is the Area under the ROC " + " ({:.2f})".format(auc) + ".")
446  f = open("AUCROCTest.txt", "w")
447  f.write("{:.6f}".format(auc))
448  f.close()
449  p.finish()
450  # p.axis.set_title("ROC Rejection Plot on independent data")
451  p.save('roc_plot_test.pdf')
452  graphics.add('roc_plot_test.pdf', width=1.0)
453  o += graphics.finish()
454 
455  if train_probability:
456  for i, identifier in enumerate(identifiers):
457  graphics = b2latex.Graphics()
459  identifier_abbr = identifier_abbreviations[identifier]
460  aucTrain = p.add(train_probability, identifier_abbr, train_target[identifier_abbr] == 1,
461  train_target[identifier_abbr] == bkgrOutput, label=r'{\rm Train}')
462  o += b2latex.String("This is the Area under the train ROC " + " ({:.2f})".format(aucTrain) + ". ")
463  f = open("AUCROCTrain.txt", "w")
464  f.write("{:.6f}".format(auc))
465  f.close()
466  aucTest = p.add(test_probability, identifier_abbr, test_target[identifier_abbr] == 1,
467  test_target[identifier_abbr] == bkgrOutput, label=r'{\rm Test}')
468  o += b2latex.String("This is the Area under the test ROC " + " ({:.2f})".format(aucTest) + ".")
469  p.finish()
470  # p.axis.set_title(identifier)
471  p.save('roc_test.pdf')
472  graphics.add('roc_test.pdf', width=1.0)
473  o += graphics.finish()
474 
475  o += b2latex.Section("Classification Results")
476 
477  for identifier in identifiers:
478  identifier_abbr = identifier_abbreviations[identifier]
479  o += b2latex.SubSection(format.string(identifier_abbr))
480  graphics = b2latex.Graphics()
482  p.add(0, test_probability, identifier_abbr, test_target[identifier_abbr] == 1,
483  test_target[identifier_abbr] == bkgrOutput, normed=True)
484  p.sub_plots[0].axis.set_title("Classification result in test data ")
485 
486  p.add(1, test_probability, identifier_abbr, test_target[identifier_abbr] == 1,
487  test_target[identifier_abbr] == bkgrOutput, normed=False)
488  p.sub_plots[1].axis.set_title("Classification result in test data ")
489  p.finish()
490 
491  p.save('classification_result.pdf')
492  graphics.add('classification_result.pdf', width=1)
493  o += graphics.finish()
494 
495  o += b2latex.Section("Diagonal Plot")
496  graphics = b2latex.Graphics()
497  p = plotting.Diagonal()
498  for identifier in identifiers:
499  o += b2latex.SubSection(format.string(identifier_abbr))
500  identifier_abbr = identifier_abbreviations[identifier]
501  p.add(test_probability, identifier_abbr, test_target[identifier_abbr] == 1, test_target[identifier_abbr] == bkgrOutput)
502  p.finish()
503  p.axis.set_title("Diagonal plot on independent data")
504  p.save('diagonal_plot_test.pdf')
505  graphics.add('diagonal_plot_test.pdf', width=1.0)
506  o += graphics.finish()
507 
508  if train_probability:
509  o += b2latex.SubSection("Overtraining Plot")
510  for identifier in identifiers:
511  identifier_abbr = identifier_abbreviations[identifier]
512  probability = {identifier_abbr: np.r_[train_probability[identifier_abbr], test_probability[identifier_abbr]]}
513  target = np.r_[train_target[identifier_abbr], test_target[identifier_abbr]]
514  train_mask = np.r_[np.ones(len(train_target[identifier_abbr])), np.zeros(len(test_target[identifier_abbr]))]
515  graphics = b2latex.Graphics()
517  p.add(probability, identifier_abbr,
518  train_mask == 1, train_mask == 0,
519  target == 1, target == bkgrOutput, None, bkgrOutput, isNN)
520  p.finish(xLabel=classOutputLabel)
521  # p.axis.set_title("Overtraining check for " + str(identifier))
522  p.save('overtraining_plot.pdf')
523  graphics.add('overtraining_plot.pdf', width=1.0)
524  o += graphics.finish()
525  print("Finished Overtraining plot")
526 
527  o += b2latex.Section("Spectators")
528  o += b2latex.String("This section contains the distribution and dependence on the"
529  "classifier outputs of all spectator variables.")
530 
531  table = b2latex.LongTable(r"ll", "Abbreviations of spectators", "{name} & {abbr}", r"Spectator & Abbreviation")
532  for s in spectators:
533  table.add(name=format.string(s), abbr=format.string(spectator_abbreviations[s]))
534  o += table.finish()
535 
536  for spectator in spectators:
537  spectator_abbr = spectator_abbreviations[spectator]
538  o += b2latex.SubSection(format.string(spectator))
539  graphics = b2latex.Graphics()
541  p.add(spectators_data, spectator_abbr, test_target[first_identifier_abbr] == 1, label="Signal")
542  p.add(spectators_data, spectator_abbr, test_target[first_identifier_abbr] == bkgrOutput, label="Background")
543  p.finish()
544  p.save('spectator_{}.pdf'.format(hash(spectator)))
545  graphics.add('spectator_{}.pdf'.format(hash(spectator)), width=1.0)
546  o += graphics.finish()
547 
548  for identifier in identifiers:
549  o += b2latex.SubSubSection(format.string(spectator) + " with classifier " + format.string(identifier))
550  identifier_abbr = identifier_abbreviations[identifier]
551  data = {identifier_abbr: test_probability[identifier_abbr], spectator_abbr: spectators_data[spectator_abbr]}
552  graphics = b2latex.Graphics()
554  p.add(data, spectator_abbr, identifier_abbr, list(range(10, 100, 10)),
555  test_target[identifier_abbr] == 1,
556  test_target[identifier_abbr] == bkgrOutput)
557  p.finish()
558  p.save('correlation_plot_{}_{}.pdf'.format(hash(spectator), hash(identifier)))
559  graphics.add('correlation_plot_{}_{}.pdf'.format(hash(spectator), hash(identifier)), width=1.0)
560  o += graphics.finish()
561 
562  o.save('latex.tex', compile=True)
563  os.chdir(old_cwd)
564  if args.working_directory == '':
565  shutil.copy(tempdir + '/latex.pdf', args.outputfile)
566  else:
567  shutil.copy(args.working_directory + '/latex.pdf', args.outputfile)
def tree2dict(tree, tree_columns, dict_columns=None)
static std::string makeROOTCompatible(std::string str)
Remove special characters that ROOT dislikes in branch names, e.g.