Belle II Software  release-06-01-15
ft_mva_evaluate.py
1 #!/usr/bin/env python3
2 # -*- coding: utf-8 -*-
3 
4 
11 
12 import basf2_mva_util
13 
14 import ftPlotting as plotting
15 import argparse
16 import tempfile
17 
18 import numpy as np
19 from B2Tools import b2latex, format
20 
21 import os
22 import shutil
23 import collections
24 import ROOT
25 from ROOT import Belle2
26 from flavorTagger import KId, muId, eId
27 import flavorTagger as ft
28 
29 
30 def getCommandLineOptions():
31  """ Parses the command line options of the fei and returns the corresponding arguments. """
32  parser = argparse.ArgumentParser()
33  parser.add_argument('-id', '--identifiers', dest='identifiers', type=str, required=True, action='append', nargs='+',
34  help='DB Identifier or weightfile')
35  parser.add_argument('-train', '--train_datafiles', dest='train_datafiles', type=str, required=False, action='append', nargs='+',
36  help='Data file containing ROOT TTree used during training')
37  parser.add_argument('-data', '--datafiles', dest='datafiles', type=str, required=True, action='append', nargs='+',
38  help='Data file containing ROOT TTree with independent test data')
39  parser.add_argument('-tree', '--treename', dest='treename', type=str, default='tree', help='Treename in data file')
40  parser.add_argument('-out', '--outputfile', dest='outputfile', type=str, default='output.pdf',
41  help='Name of the outputted pdf file')
42  parser.add_argument('-w', '--working_directory', dest='working_directory', type=str, default='',
43  help="""Working directory where the created images and root files are stored,
44  default is to create a temporary directory.""")
45  parser.add_argument('-b2Orb', '--BelleOrBelle2', dest='BelleOrBelle2', type=str, default='Belle2',
46  help="""Tell me if this is Belle or Belle2 MC please.""")
47  args = parser.parse_args()
48  return args
49 
50 
51 def unique(input):
52  """
53  Returns a list containing only unique elements, keeps the original order of the list
54  @param input list containing the elements
55  """
56  output = []
57  for x in input:
58  if x not in output:
59  output.append(x)
60  return output
61 
62 
63 def create_abbreviations(names, length=5):
64 
65  variablesPlotParamsDict = {'useCMSFrame(p)': [r'$p^*$', r"{\rm GeV}/c\, "],
66  'useCMSFrame(pt)': [r'$p_{\rm t}^*$', r"{\rm GeV}/c\, "],
67  'p': [r'$p$', r"{\rm GeV}/c\, "],
68  'pt': [r'$p_{\rm t}$', r"{\rm GeV}/c\, "],
69  'pLambda': [r'$p_{\Lambda}$', r"{\rm GeV}/c\, "],
70  'useCMSFrame(p)Lambda': [r'$p^*_{\Lambda}$', r"{\rm GeV}/c\, "],
71  'useCMSFrame(p)FSC': [r'$p^*_{\rm Slow}$', r"{\rm GeV}/c\, "],
72  'cosTheta': [r'$\cos{\theta}$', ""],
73  eId[args.BelleOrBelle2]: [r'$\mathcal{L}_{e}$', ""],
74  'eid_dEdx': [r'$\mathcal{L}_{e}^{{\rm d}E/{\rm d}x}$', ""],
75  'eid_TOP': [r'$\mathcal{L}_{e}^{\rm TOP}$', ""],
76  'eid_ARICH': [r'$\mathcal{L}_{e}^{\rm ARICH}$', ""],
77  'eid_ECL': [r'$\mathcal{L}_{e}^{\rm ECL}$', ""],
78  'BtagToWBosonVariables(recoilMassSqrd)': [r'$M_{\rm rec}^2$', r"{\rm GeV}^2/c^4"],
79  'BtagToWBosonVariables(pMissCMS)': [r'$p^*_{\rm miss}$', r"{\rm GeV}/c\, "],
80  'BtagToWBosonVariables(cosThetaMissCMS)': [r'$\cos{\theta^*_{\rm miss}}$', ""],
81  'BtagToWBosonVariables(EW90)': [r'$E_{90}^{W}$', r"{\rm GeV}\, "],
82  'BtagToWBosonVariables(recoilMass)': [r'$M_{\rm rec}$', r"{\rm GeV}/c^2\, "],
83  'cosTPTO': [r'$\vert\cos{\theta^*_{\rm T}}\vert$', ""],
84  'cosTPTOFSC': [r'$\vert\cos{\theta^*_{\rm T,Slow}}\vert$', ""],
85  'ImpactXY': [r'$d_0$', r"{\rm mm}\, "],
86  'distance': [r'$\xi_0$', r"{\rm mm}\, "],
87  'chiProb': [r'$p$-${\rm value}$', ""],
88  muId[args.BelleOrBelle2]: [r'$\mathcal{L}_{\mu}$', ""],
89  'muid_dEdx': [r'$\mathcal{L}_{\mu}^{{\rm d}E/{\rm d}x}$', ""],
90  'muid_TOP': [r'$\mathcal{L}_{\mu}^{\rm TOP}$', ""],
91  'muid_ARICH': [r'$\mathcal{L}_{\mu}^{\rm ARICH}$', ""],
92  'muid_KLM': [r'$\mathcal{L}_{\mu}^{\rm KLM}$', ""],
93  KId[args.BelleOrBelle2]: [r'$\mathcal{L}_{K}$', ""],
94  'Kid_dEdx': [r'$\mathcal{L}_{K}^{{\rm d}E/{\rm d}x}$', ""],
95  'Kid_TOP': [r'$\mathcal{L}_{K}^{\rm TOP}$', ""],
96  'Kid_ARICH': [r'$\mathcal{L}_{K}^{\rm ARICH}$', ""],
97  'NumberOfKShortsInRoe': [r'$n_{K^0_S}$', ""],
98  'ptTracksRoe': [r'$\Sigma\, p_{\rm t}^2$', r"{\rm GeV^2}/c^2"],
99  'extraInfo(isRightCategory(Kaon))': [r"$y_{\rm Kaon}$", ""],
100  'HighestProbInCat(pi+:inRoe, isRightCategory(SlowPion))': [r"$y_{\rm SlowPion}$", ""],
101  'KaonPionVariables(cosKaonPion)': [r'$\cos{\theta^*_{K\pi}}$', ""],
102  'KaonPionVariables(HaveOpositeCharges)': [r'$\frac{1 - q_{K} \cdot q_\pi}{2}$', ""],
103  'pionID': [r'$\mathcal{L}_{\pi}$', ""],
104  'piid_dEdx': [r'$\mathcal{L}_{\pi}^{{\rm d}E/{\rm d}x}$', ""],
105  'piid_TOP': [r'$\mathcal{L}_{\pi}^{\rm TOP}$', ""],
106  'piid_ARICH': [r'$\mathcal{L}_{\pi}^{\rm ARICH}$', ""],
107  'pi_vs_edEdxid': [r'$\mathcal{L}_{\pi/e}^{{\rm d}E/{\rm d}x}$', ""],
108  'FSCVariables(pFastCMS)': [r'$p^*_{\rm Fast}$', r"{\rm GeV}/c\, "],
109  'FSCVariables(cosSlowFast)': [r'$\cos{\theta^*_{\rm SlowFast}}$', ''],
110  'FSCVariables(cosTPTOFast)': [r'$\vert\cos{\theta^*_{\rm T, Fast}}\vert$', ''],
111  'FSCVariables(SlowFastHaveOpositeCharges)': [r'$\frac{1 - q_{\rm Slow} \cdot q_{\rm Fast}}{2}$', ""],
112  'lambdaFlavor': [r'$q_{\Lambda}$', ""],
113  'M': [r'$M_{\Lambda}$', r"{\rm MeV}/c^2\, "],
114  'cosAngleBetweenMomentumAndVertexVector': [
115  r'$\cos{\theta_{\boldsymbol{x}_{\Lambda},\boldsymbol{p}_{\Lambda}}}$', ""],
116  'lambdaZError': [r'$\sigma_{\Lambda}^{zz}$', r"{\rm mm}\, "],
117  'daughter(0,p)': [r'$p_{\pi}$', r"{\rm GeV}/c\, "],
118  'daughter(0,useCMSFrame(p))': [r'$p^*_{\pi}$', r"{\rm GeV}/c\, "],
119  'daughter(1,p)': [r'$p_{p}$', r"{\rm GeV}/c"],
120  'daughter(1,useCMSFrame(p))': [r'$p^*_{p}$', r"{\rm GeV}/c\, "],
121  'daughter(1,protonID)': [r'$\mathcal{L}_{p}$', ""],
122  'daughter(0,pionID)': [r'$\mathcal{L}_{\pi}$', ""],
123  'QpOf(mu+:inRoe, isRightCategory(IntermediateMuon), isRightCategory(IntermediateMuon))': [
124  r'${\rm Int.\ Muon}$'],
125  'QpOf(mu+:inRoe, isRightCategory(Muon), isRightCategory(Muon))': [r'${\rm Muon}$'],
126  'QpOf(pi+:inRoe, isRightCategory(FSC), isRightCategory(SlowPion))': [r'${\rm FSC}$'],
127  'QpOf(e+:inRoe, isRightCategory(Electron), isRightCategory(Electron))': [r'${\rm Electron}$'],
128  'QpOf(e+:inRoe, isRightCategory(IntermediateElectron), isRightCategory(IntermediateElectron))': [
129  r'${\rm Int.\ El.}$'],
130  'weightedQpOf(Lambda0:inRoe, isRightCategory(Lambda), isRightCategory(Lambda))': [r'${\rm Lambda}$'],
131  'QpOf(K+:inRoe, isRightCategory(KaonPion), isRightCategory(Kaon))': [r'${\rm Kaon}$' + '-' + r'${\rm Pion}$'],
132  'QpOf(pi+:inRoe, isRightCategory(FastHadron), isRightCategory(FastHadron))': [r'${\rm Fast\ Hadron}$'],
133  'QpOf(mu+:inRoe, isRightCategory(IntermediateKinLepton), isRightCategory(IntermediateKinLepton))': [
134  r'${\rm Int.\ Kin.\ Lep.}$'],
135  'QpOf(pi+:inRoe, isRightCategory(MaximumPstar), isRightCategory(MaximumPstar))': [r'${\rm Max.}\,p^*$'],
136  'QpOf(pi+:inRoe, isRightCategory(SlowPion), isRightCategory(SlowPion))': [r'${\rm Slow\ Pion}$'],
137  'QpOf(mu+:inRoe, isRightCategory(KinLepton), isRightCategory(KinLepton))': [r'${\rm Kin.\ Lep.}$'],
138  'weightedQpOf(K+:inRoe, isRightCategory(Kaon), isRightCategory(Kaon))': [r'${\rm Kaon}$']}
139 
140  if sum(args.identifiers, [])[0].find('LevelLambdaFBDT') != -1:
141  variablesPlotParamsDict['distance'] = [r'$\vert \boldsymbol{x}_{\Lambda}\vert$', r"{\rm mm}\, "]
142 
143  count = dict()
144 
145  for name in names:
146 
147  if name in variablesPlotParamsDict:
148  abbreviation = variablesPlotParamsDict[name][0]
149  else:
150  abbreviation = name[:length]
151 
152  if abbreviation not in count:
153  count[abbreviation] = 0
154  count[abbreviation] += 1
155  abbreviations = collections.OrderedDict()
156 
157  count2 = dict()
158  for name in names:
159 
160  if name in variablesPlotParamsDict:
161  abbreviation = variablesPlotParamsDict[name][0]
162  else:
163  abbreviation = name[:length]
164 
165  abbreviations[name] = abbreviation
166  if count[abbreviation] > 1:
167  if abbreviation not in count2:
168  count2[abbreviation] = 0
169  count2[abbreviation] += 1
170  abbreviations[name] += str(count2[abbreviation])
171  return abbreviations
172 
173 
174 if __name__ == '__main__':
175 
176  ROOT.gROOT.SetBatch(True)
177 
178  old_cwd = os.getcwd()
179  args = getCommandLineOptions()
180 
181  identifiers = sum(args.identifiers, [])
182  identifier_abbreviations = create_abbreviations(identifiers)
183 
184  datafiles = sum(args.datafiles, [])
185 
186  print("Load methods")
187  methods = [basf2_mva_util.Method(identifier) for identifier in identifiers]
188 
189  print("Apply experts on independent data")
190  test_probability = {}
191  test_target = {}
192  for method in methods:
193  p, t = method.apply_expert(datafiles, args.treename)
194  test_probability[identifier_abbreviations[method.identifier]] = p
195  test_target[identifier_abbreviations[method.identifier]] = t
196 
197  print("Apply experts on training data")
198  train_probability = {}
199  train_target = {}
200  if args.train_datafiles is not None:
201  train_datafiles = sum(args.train_datafiles, [])
202  for method in methods:
203  p, t = method.apply_expert(train_datafiles, args.treename)
204  train_probability[identifier_abbreviations[method.identifier]] = p
205  train_target[identifier_abbreviations[method.identifier]] = t
206 
207  variables = unique(v for method in methods for v in method.variables)
208  root_variables = unique(v for method in methods for v in method.root_variables)
209 
210  print("Here Variables")
211  print(variables)
212 
213  bkgrOutput = 0
214  displayHeatMap = False
215  classOutputLabel = r'${\rm Classifier\ Output}$'
216  isNN = False
217 
218  if identifiers[0].find('Combiner') != -1 or identifiers[0].find('KaonFBDT') != -1 or \
219  identifiers[0].find('Electron') != -1 or identifiers[0].find('Muon') != -1 or \
220  identifiers[0].find('Lepton') != -1 or \
221  identifiers[0].find('SlowPion') != -1 or identifiers[0].find('FastHadron') != -1 or \
222  identifiers[0].find('KaonPion') != -1 or identifiers[0].find('FSC') != -1 or \
223  identifiers[0].find('MaximumPstar') != -1 or identifiers[0].find('Lambda') != -1:
224 
225  if identifiers[0].find('Combiner') != -1:
226  displayHeatMap = True
227  bkgrOutput = -1
228 
229  variables = [
230  'weightedQpOf(Lambda0:inRoe, isRightCategory(Lambda), isRightCategory(Lambda))',
231  'QpOf(pi+:inRoe, isRightCategory(FastHadron), isRightCategory(FastHadron))',
232  'QpOf(pi+:inRoe, isRightCategory(MaximumPstar), isRightCategory(MaximumPstar))',
233  'QpOf(pi+:inRoe, isRightCategory(FSC), isRightCategory(SlowPion))',
234  'QpOf(pi+:inRoe, isRightCategory(SlowPion), isRightCategory(SlowPion))',
235  'QpOf(K+:inRoe, isRightCategory(KaonPion), isRightCategory(Kaon))',
236  'weightedQpOf(K+:inRoe, isRightCategory(Kaon), isRightCategory(Kaon))',
237  'QpOf(mu+:inRoe, isRightCategory(IntermediateKinLepton), isRightCategory(IntermediateKinLepton))',
238  'QpOf(mu+:inRoe, isRightCategory(KinLepton), isRightCategory(KinLepton))',
239  'QpOf(mu+:inRoe, isRightCategory(IntermediateMuon), isRightCategory(IntermediateMuon))',
240  'QpOf(mu+:inRoe, isRightCategory(Muon), isRightCategory(Muon))',
241  'QpOf(e+:inRoe, isRightCategory(IntermediateElectron), isRightCategory(IntermediateElectron))',
242  'QpOf(e+:inRoe, isRightCategory(Electron), isRightCategory(Electron))'
243  ]
244  variables = list(reversed(variables))
245 
246  if identifiers[0].find('Electron') != -1:
247  ft.setVariables()
248  if identifiers[0].find('Intermediate') != -1:
249  variables = ft.variables['IntermediateElectron']
250  else:
251  variables = ft.variables['Electron']
252 
253  if identifiers[0].find('Muon') != -1:
254  ft.setVariables()
255  if identifiers[0].find('Intermediate') != -1:
256  variables = ft.variables['IntermediateMuon']
257  else:
258  variables = ft.variables['Muon']
259 
260  if identifiers[0].find('Lepton') != -1:
261  ft.setVariables()
262  if identifiers[0].find('Intermediate') != -1:
263  variables = ft.variables['IntermediateKinLepton']
264  else:
265  variables = ft.variables['KinLepton']
266 
267  if identifiers[0].find('KaonFBDT') != -1:
268  displayHeatMap = True
269  ft.setVariables()
270  variables = ft.variables['Kaon']
271 
272  if identifiers[0].find('SlowPion') != -1:
273  ft.setVariables()
274  variables = ft.variables['SlowPion']
275 
276  if identifiers[0].find('FastHadron') != -1:
277  ft.setVariables()
278  variables = ft.variables['FastHadron']
279 
280  if identifiers[0].find('KaonPion') != -1:
281  ft.setVariables()
282  variables = ft.variables['KaonPion']
283 
284  if identifiers[0].find('FSC') != -1:
285  ft.setVariables()
286  variables = ft.variables['FSC']
287 
288  if identifiers[0].find('MaximumPstar') != -1:
289  ft.setVariables()
290  variables = ft.variables['MaximumPstar']
291 
292  if identifiers[0].find('Lambda') != -1:
293  displayHeatMap = True
294  ft.setVariables()
295  variables = ft.variables['Lambda']
296 
297  variables = list(reversed(variables))
298  for iVarPosition in range(len(variables)):
299  root_variables[iVarPosition] = Belle2.makeROOTCompatible(variables[iVarPosition])
300 
301  # Set special name
302  if identifiers[0].find('FSC') != -1:
303  variables = ['useCMSFrame(p)FSC' if v == 'useCMSFrame(p)' else v for v in variables]
304 
305  if identifiers[0].find('Lambda') != -1:
306  displayHeatMap = True
307  variables = ['useCMSFrame(p)Lambda' if v == 'useCMSFrame(p)' else v for v in variables]
308 
309  if identifiers[0].find('Combiner') != -1:
310  if identifiers[0].find('FANN') != -1:
311  classOutputLabel = r'$(q\cdot r)_{\rm MLP}$'
312  isNN = True
313  if identifiers[0].find('FBDT') != -1:
314  classOutputLabel = r'$(q\cdot r)_{\rm FBDT}$'
315  elif identifiers[0].find('LevelMaximumPstar') != -1:
316  classOutputLabel = r'$y_{{\rm Maximum}\, p^*}$'
317  elif identifiers[0].find('LevelFSCFBDT') != -1:
318  classOutputLabel = r'$y_{\rm FSC}$'
319  elif identifiers[0].find('LevelMuonFBDT') != -1:
320  classOutputLabel = r'$y_{\rm Muon}$'
321  elif identifiers[0].find('LevelElectronFBDT') != -1:
322  classOutputLabel = r'$y_{\rm Electron}$'
323  elif identifiers[0].find('LevelKaonFBDT') != -1:
324  classOutputLabel = r'$y_{\rm Kaon}$'
325  elif identifiers[0].find('LevelLambdaFBDT') != -1:
326  classOutputLabel = r'$y_{\rm Lambda}$'
327  elif identifiers[0].find('LevelIntermediateKinLeptonFBDT') != -1:
328  classOutputLabel = r'$y_{\rm Int.\, Kin.\, Lepton}$'
329  elif identifiers[0].find('LevelKinLeptonFBDT') != -1:
330  classOutputLabel = r'$y_{\rm Kin.\, Lepton}$'
331  elif identifiers[0].find('LevelIntermediateMuon') != -1:
332  classOutputLabel = r'$y_{\rm Int.\, Muon}$'
333  elif identifiers[0].find('LevelIntermediateElectron') != -1:
334  classOutputLabel = r'$y_{\rm Int.\, Electron}$'
335  elif identifiers[0].find('LevelKaonPionFBDT') != -1:
336  classOutputLabel = r'$y_{\rm Kaon-Pion}$'
337  elif identifiers[0].find('LevelFastHadron') != -1:
338  classOutputLabel = r'$y_{\rm Fast\, Hadron}$'
339  elif identifiers[0].find('LevelSlowPion') != -1:
340  classOutputLabel = r'$y_{\rm Slow\, Pion}$'
341 
342  variable_abbreviations = create_abbreviations(variables)
343 
344  spectators = unique(v for method in methods for v in method.spectators)
345  spectator_abbreviations = create_abbreviations(spectators)
346  root_spectators = unique(v for method in methods for v in method.root_spectators)
347 
348  print("Load variables array")
349  rootchain = ROOT.TChain(args.treename)
350  for datafile in datafiles:
351  rootchain.Add(datafile)
352 
353  variables_data = basf2_mva_util.tree2dict(rootchain, root_variables, list(variable_abbreviations.values()))
354  spectators_data = basf2_mva_util.tree2dict(rootchain, root_spectators, list(spectator_abbreviations.values()))
355 
356  print("Create latex file")
357  # Change working directory after experts run, because they might want to access
358  # a localdb in the current working directory
359  with tempfile.TemporaryDirectory() as tempdir:
360  if args.working_directory == '':
361  os.chdir(tempdir)
362  else:
363  os.chdir(args.working_directory)
364 
365  o = b2latex.LatexFile()
366  o += b2latex.TitlePage(title='Automatic MVA Evaluation',
367  authors=[r'Thomas Keck\\ Moritz Gelb\\ Nils Braun'],
368  abstract='Evaluation plots',
369  add_table_of_contents=True).finish()
370 
371  o += b2latex.Section("Classifiers")
372  o += b2latex.String(r"""
373  This section contains the GeneralOptions and SpecificOptions of all classifiers represented by an XML tree.
374  The same information can be retrieved using the basf2\_mva\_info tool.
375  """)
376 
377  table = b2latex.LongTable(r"ll", "Abbreviations of identifiers", "{name} & {abbr}", r"Identifier & Abbreviation")
378  for identifier in identifiers:
379  table.add(name=format.string(identifier), abbr=format.string(identifier_abbreviations[identifier]))
380  o += table.finish()
381 
382 # for method in methods:
383 # o += b2latex.SubSection(format.string(method.identifier))
384 # o += b2latex.Listing(language='XML').add(method.description).finish()
385 
386  o += b2latex.Section("Variables")
387  o += b2latex.String("""
388  This section contains an overview of the importance and correlation of the variables used by the classifiers.
389  And distribution plots of the variables on the independent dataset. The distributions are normed for signal and
390  background separately, and only the region +- 3 sigma around the mean is shown.
391  """)
392 
393  table = b2latex.LongTable(r"ll", "Abbreviations of variables", "{name} & {abbr}", r"Variable & Abbreviation")
394  for v in variables:
395  # table.add(name=format.string(v), abbr=format.string(variable_abbreviations[v]))
396  table.add(name=format.string(v), abbr=variable_abbreviations[v])
397  o += table.finish()
398 
399  o += b2latex.SubSection("Importance")
400  graphics = b2latex.Graphics()
401  p = plotting.Importance()
402  p.add({identifier_abbreviations[i.identifier]: np.array([i.importances.get(v, 0.0) for v in variables]) for i in methods},
403  identifier_abbreviations.values(), variable_abbreviations.values(), displayHeatMap)
404  p.finish()
405  p.save('importance.pdf')
406  graphics.add('importance.pdf', width=1.0)
407  o += graphics.finish()
408 
409  o += b2latex.SubSection("Correlation")
410  first_identifier_abbr = list(identifier_abbreviations.values())[0]
411  graphics = b2latex.Graphics()
413  p.add(variables_data, variable_abbreviations.values(),
414  test_target[first_identifier_abbr] == 1,
415  test_target[first_identifier_abbr] == bkgrOutput, bkgrOutput)
416  p.finish()
417  p.save('correlation_plot.pdf')
418  graphics.add('correlation_plot.pdf', width=1.0)
419  o += graphics.finish()
420 
421  if False:
422  graphics = b2latex.Graphics()
423  p = plotting.TSNE()
424  p.add(variables_data, variable_abbreviations.values(),
425  test_target[first_identifier_abbr] == 1,
426  test_target[first_identifier_abbr] == bkgrOutput)
427  p.finish()
428  p.save('tsne_plot.pdf')
429  graphics.add('tsne_plot.pdf', width=1.0)
430  o += graphics.finish()
431 
432 # for v in variables:
433 # variable_abbr = variable_abbreviations[v]
434 # o += b2latex.SubSection(format.string(v))
435 # graphics = b2latex.Graphics()
436 # p = plotting.VerboseDistribution(normed=True, range_in_std=3)
437 # p.add(variables_data, variable_abbr, test_target[first_identifier_abbr] == 1, label=r"${\rm Signal}$")
438 # p.add(variables_data, variable_abbr, test_target[first_identifier_abbr] == bkgrOutput, label=r"${\rm Background}$")
439 # p.finish()
440 # p.save('variable_{}.pdf'.format(hash(v)))
441 # graphics.add('variable_{}.pdf'.format(hash(v)), width=1.0)
442 # o += graphics.finish()
443 
444  o += b2latex.Section("Classifier Plot")
445  o += b2latex.String("This section contains the receiver operating characteristics (ROC), purity projection, ..."
446  "of the classifiers on training and independent data."
447  "The legend of each plot contains the shortened identifier and the area under the ROC curve"
448  "in parenthesis.")
449 
450  o += b2latex.Section("ROC Plot")
451  graphics = b2latex.Graphics()
453  for identifier in identifier_abbreviations.values():
454  auc = p.add(test_probability, identifier, test_target[identifier] == 1, test_target[identifier] == bkgrOutput)
455  o += b2latex.String("This is the Area under the ROC " + " ({:.2f})".format(auc) + ".")
456  f = open("AUCROCTest.txt", "w")
457  f.write("{:.6f}".format(auc))
458  f.close()
459  p.finish()
460  # p.axis.set_title("ROC Rejection Plot on independent data")
461  p.save('roc_plot_test.pdf')
462  graphics.add('roc_plot_test.pdf', width=1.0)
463  o += graphics.finish()
464 
465  if train_probability:
466  for i, identifier in enumerate(identifiers):
467  graphics = b2latex.Graphics()
469  identifier_abbr = identifier_abbreviations[identifier]
470  aucTrain = p.add(train_probability, identifier_abbr, train_target[identifier_abbr] == 1,
471  train_target[identifier_abbr] == bkgrOutput, label=r'{\rm Train}')
472  o += b2latex.String("This is the Area under the train ROC " + " ({:.2f})".format(aucTrain) + ". ")
473  f = open("AUCROCTrain.txt", "w")
474  f.write("{:.6f}".format(auc))
475  f.close()
476  aucTest = p.add(test_probability, identifier_abbr, test_target[identifier_abbr] == 1,
477  test_target[identifier_abbr] == bkgrOutput, label=r'{\rm Test}')
478  o += b2latex.String("This is the Area under the test ROC " + " ({:.2f})".format(aucTest) + ".")
479  p.finish()
480  # p.axis.set_title(identifier)
481  p.save('roc_test.pdf')
482  graphics.add('roc_test.pdf', width=1.0)
483  o += graphics.finish()
484 
485  o += b2latex.Section("Classification Results")
486 
487  for identifier in identifiers:
488  identifier_abbr = identifier_abbreviations[identifier]
489  o += b2latex.SubSection(format.string(identifier_abbr))
490  graphics = b2latex.Graphics()
492  p.add(0, test_probability, identifier_abbr, test_target[identifier_abbr] == 1,
493  test_target[identifier_abbr] == bkgrOutput, normed=True)
494  p.sub_plots[0].axis.set_title("Classification result in test data ")
495 
496  p.add(1, test_probability, identifier_abbr, test_target[identifier_abbr] == 1,
497  test_target[identifier_abbr] == bkgrOutput, normed=False)
498  p.sub_plots[1].axis.set_title("Classification result in test data ")
499  p.finish()
500 
501  p.save('classification_result.pdf')
502  graphics.add('classification_result.pdf', width=1)
503  o += graphics.finish()
504 
505  o += b2latex.Section("Diagonal Plot")
506  graphics = b2latex.Graphics()
507  p = plotting.Diagonal()
508  for identifier in identifiers:
509  o += b2latex.SubSection(format.string(identifier_abbr))
510  identifier_abbr = identifier_abbreviations[identifier]
511  p.add(test_probability, identifier_abbr, test_target[identifier_abbr] == 1, test_target[identifier_abbr] == bkgrOutput)
512  p.finish()
513  p.axis.set_title("Diagonal plot on independent data")
514  p.save('diagonal_plot_test.pdf')
515  graphics.add('diagonal_plot_test.pdf', width=1.0)
516  o += graphics.finish()
517 
518  if train_probability:
519  o += b2latex.SubSection("Overtraining Plot")
520  for identifier in identifiers:
521  identifier_abbr = identifier_abbreviations[identifier]
522  probability = {identifier_abbr: np.r_[train_probability[identifier_abbr], test_probability[identifier_abbr]]}
523  target = np.r_[train_target[identifier_abbr], test_target[identifier_abbr]]
524  train_mask = np.r_[np.ones(len(train_target[identifier_abbr])), np.zeros(len(test_target[identifier_abbr]))]
525  graphics = b2latex.Graphics()
527  p.add(probability, identifier_abbr,
528  train_mask == 1, train_mask == 0,
529  target == 1, target == bkgrOutput, None, bkgrOutput, isNN)
530  p.finish(xLabel=classOutputLabel)
531  # p.axis.set_title("Overtraining check for " + str(identifier))
532  p.save('overtraining_plot.pdf')
533  graphics.add('overtraining_plot.pdf', width=1.0)
534  o += graphics.finish()
535  print("Finished Overtraining plot")
536 
537  o += b2latex.Section("Spectators")
538  o += b2latex.String("This section contains the distribution and dependence on the"
539  "classifier outputs of all spectator variables.")
540 
541  table = b2latex.LongTable(r"ll", "Abbreviations of spectators", "{name} & {abbr}", r"Spectator & Abbreviation")
542  for s in spectators:
543  table.add(name=format.string(s), abbr=format.string(spectator_abbreviations[s]))
544  o += table.finish()
545 
546  for spectator in spectators:
547  spectator_abbr = spectator_abbreviations[spectator]
548  o += b2latex.SubSection(format.string(spectator))
549  graphics = b2latex.Graphics()
551  p.add(spectators_data, spectator_abbr, test_target[first_identifier_abbr] == 1, label="Signal")
552  p.add(spectators_data, spectator_abbr, test_target[first_identifier_abbr] == bkgrOutput, label="Background")
553  p.finish()
554  p.save('spectator_{}.pdf'.format(hash(spectator)))
555  graphics.add('spectator_{}.pdf'.format(hash(spectator)), width=1.0)
556  o += graphics.finish()
557 
558  for identifier in identifiers:
559  o += b2latex.SubSubSection(format.string(spectator) + " with classifier " + format.string(identifier))
560  identifier_abbr = identifier_abbreviations[identifier]
561  data = {identifier_abbr: test_probability[identifier_abbr], spectator_abbr: spectators_data[spectator_abbr]}
562  graphics = b2latex.Graphics()
564  p.add(data, spectator_abbr, identifier_abbr, list(range(10, 100, 10)),
565  test_target[identifier_abbr] == 1,
566  test_target[identifier_abbr] == bkgrOutput)
567  p.finish()
568  p.save('correlation_plot_{}_{}.pdf'.format(hash(spectator), hash(identifier)))
569  graphics.add('correlation_plot_{}_{}.pdf'.format(hash(spectator), hash(identifier)), width=1.0)
570  o += graphics.finish()
571 
572  o.save('latex.tex', compile=True)
573  os.chdir(old_cwd)
574  if args.working_directory == '':
575  shutil.copy(tempdir + '/latex.pdf', args.outputfile)
576  else:
577  shutil.copy(args.working_directory + '/latex.pdf', args.outputfile)
def tree2dict(tree, tree_columns, dict_columns=None)
std::string makeROOTCompatible(std::string str)
Remove special characters that ROOT dislikes in branch names, e.g.