Belle II Software  release-08-01-10
ft_mva_evaluate.py
1 #!/usr/bin/env python3
2 
3 
10 
11 import basf2_mva_util
12 
13 import ftPlotting as plotting
14 import argparse
15 import tempfile
16 
17 import numpy as np
18 from B2Tools import b2latex, format
19 
20 import os
21 import shutil
22 import collections
23 import ROOT
24 from ROOT import Belle2
25 from flavorTagger import KId, muId, eId
26 import flavorTagger as ft
27 
28 
29 def getCommandLineOptions():
30  """ Parses the command line options of the fei and returns the corresponding arguments. """
31  parser = argparse.ArgumentParser()
32  parser.add_argument('-id', '--identifiers', dest='identifiers', type=str, required=True, action='append', nargs='+',
33  help='DB Identifier or weightfile')
34  parser.add_argument('-train', '--train_datafiles', dest='train_datafiles', type=str, required=False, action='append', nargs='+',
35  help='Data file containing ROOT TTree used during training')
36  parser.add_argument('-data', '--datafiles', dest='datafiles', type=str, required=True, action='append', nargs='+',
37  help='Data file containing ROOT TTree with independent test data')
38  parser.add_argument('-tree', '--treename', dest='treename', type=str, default='tree', help='Treename in data file')
39  parser.add_argument('-out', '--outputfile', dest='outputfile', type=str, default='output.pdf',
40  help='Name of the outputted pdf file')
41  parser.add_argument('-w', '--working_directory', dest='working_directory', type=str, default='',
42  help="""Working directory where the created images and root files are stored,
43  default is to create a temporary directory.""")
44  parser.add_argument('-b2Orb', '--BelleOrBelle2', dest='BelleOrBelle2', type=str, default='Belle2',
45  help="""Tell me if this is Belle or Belle2 MC please.""")
46  args = parser.parse_args()
47  return args
48 
49 
50 def unique(input):
51  """
52  Returns a list containing only unique elements, keeps the original order of the list
53  @param input list containing the elements
54  """
55  output = []
56  for x in input:
57  if x not in output:
58  output.append(x)
59  return output
60 
61 
62 def create_abbreviations(names, length=5):
63 
64  variablesPlotParamsDict = {'useCMSFrame(p)': [r'$p^*$', r"{\rm GeV}/c\, "],
65  'useCMSFrame(pt)': [r'$p_{\rm t}^*$', r"{\rm GeV}/c\, "],
66  'p': [r'$p$', r"{\rm GeV}/c\, "],
67  'pt': [r'$p_{\rm t}$', r"{\rm GeV}/c\, "],
68  'pLambda': [r'$p_{\Lambda}$', r"{\rm GeV}/c\, "],
69  'useCMSFrame(p)Lambda': [r'$p^*_{\Lambda}$', r"{\rm GeV}/c\, "],
70  'useCMSFrame(p)FSC': [r'$p^*_{\rm Slow}$', r"{\rm GeV}/c\, "],
71  'cosTheta': [r'$\cos{\theta}$', ""],
72  eId[args.BelleOrBelle2]: [r'$\mathcal{L}_{e}$', ""],
73  'eid_dEdx': [r'$\mathcal{L}_{e}^{{\rm d}E/{\rm d}x}$', ""],
74  'eid_TOP': [r'$\mathcal{L}_{e}^{\rm TOP}$', ""],
75  'eid_ARICH': [r'$\mathcal{L}_{e}^{\rm ARICH}$', ""],
76  'eid_ECL': [r'$\mathcal{L}_{e}^{\rm ECL}$', ""],
77  'BtagToWBosonVariables(recoilMassSqrd)': [r'$M_{\rm rec}^2$', r"{\rm GeV}^2/c^4"],
78  'BtagToWBosonVariables(pMissCMS)': [r'$p^*_{\rm miss}$', r"{\rm GeV}/c\, "],
79  'BtagToWBosonVariables(cosThetaMissCMS)': [r'$\cos{\theta^*_{\rm miss}}$', ""],
80  'BtagToWBosonVariables(EW90)': [r'$E_{90}^{W}$', r"{\rm GeV}\, "],
81  'BtagToWBosonVariables(recoilMass)': [r'$M_{\rm rec}$', r"{\rm GeV}/c^2\, "],
82  'cosTPTO': [r'$\vert\cos{\theta^*_{\rm T}}\vert$', ""],
83  'cosTPTOFSC': [r'$\vert\cos{\theta^*_{\rm T,Slow}}\vert$', ""],
84  'ImpactXY': [r'$d_0$', r"{\rm mm}\, "],
85  'distance': [r'$\xi_0$', r"{\rm mm}\, "],
86  'chiProb': [r'$p$-${\rm value}$', ""],
87  muId[args.BelleOrBelle2]: [r'$\mathcal{L}_{\mu}$', ""],
88  'muid_dEdx': [r'$\mathcal{L}_{\mu}^{{\rm d}E/{\rm d}x}$', ""],
89  'muid_TOP': [r'$\mathcal{L}_{\mu}^{\rm TOP}$', ""],
90  'muid_ARICH': [r'$\mathcal{L}_{\mu}^{\rm ARICH}$', ""],
91  'muid_KLM': [r'$\mathcal{L}_{\mu}^{\rm KLM}$', ""],
92  KId[args.BelleOrBelle2]: [r'$\mathcal{L}_{K}$', ""],
93  'Kid_dEdx': [r'$\mathcal{L}_{K}^{{\rm d}E/{\rm d}x}$', ""],
94  'Kid_TOP': [r'$\mathcal{L}_{K}^{\rm TOP}$', ""],
95  'Kid_ARICH': [r'$\mathcal{L}_{K}^{\rm ARICH}$', ""],
96  'NumberOfKShortsInRoe': [r'$n_{K^0_S}$', ""],
97  'ptTracksRoe': [r'$\Sigma\, p_{\rm t}^2$', r"{\rm GeV^2}/c^2"],
98  'extraInfo(isRightCategory(Kaon))': [r"$y_{\rm Kaon}$", ""],
99  'HighestProbInCat(pi+:inRoe, isRightCategory(SlowPion))': [r"$y_{\rm SlowPion}$", ""],
100  'KaonPionVariables(cosKaonPion)': [r'$\cos{\theta^*_{K\pi}}$', ""],
101  'KaonPionVariables(HaveOpositeCharges)': [r'$\frac{1 - q_{K} \cdot q_\pi}{2}$', ""],
102  'pionID': [r'$\mathcal{L}_{\pi}$', ""],
103  'piid_dEdx': [r'$\mathcal{L}_{\pi}^{{\rm d}E/{\rm d}x}$', ""],
104  'piid_TOP': [r'$\mathcal{L}_{\pi}^{\rm TOP}$', ""],
105  'piid_ARICH': [r'$\mathcal{L}_{\pi}^{\rm ARICH}$', ""],
106  'pi_vs_edEdxid': [r'$\mathcal{L}_{\pi/e}^{{\rm d}E/{\rm d}x}$', ""],
107  'FSCVariables(pFastCMS)': [r'$p^*_{\rm Fast}$', r"{\rm GeV}/c\, "],
108  'FSCVariables(cosSlowFast)': [r'$\cos{\theta^*_{\rm SlowFast}}$', ''],
109  'FSCVariables(cosTPTOFast)': [r'$\vert\cos{\theta^*_{\rm T, Fast}}\vert$', ''],
110  'FSCVariables(SlowFastHaveOpositeCharges)': [r'$\frac{1 - q_{\rm Slow} \cdot q_{\rm Fast}}{2}$', ""],
111  'lambdaFlavor': [r'$q_{\Lambda}$', ""],
112  'M': [r'$M_{\Lambda}$', r"{\rm MeV}/c^2\, "],
113  'cosAngleBetweenMomentumAndVertexVector': [
114  r'$\cos{\theta_{\boldsymbol{x}_{\Lambda},\boldsymbol{p}_{\Lambda}}}$', ""],
115  'lambdaZError': [r'$\sigma_{\Lambda}^{zz}$', r"{\rm mm}\, "],
116  'daughter(0,p)': [r'$p_{\pi}$', r"{\rm GeV}/c\, "],
117  'daughter(0,useCMSFrame(p))': [r'$p^*_{\pi}$', r"{\rm GeV}/c\, "],
118  'daughter(1,p)': [r'$p_{p}$', r"{\rm GeV}/c"],
119  'daughter(1,useCMSFrame(p))': [r'$p^*_{p}$', r"{\rm GeV}/c\, "],
120  'daughter(1,protonID)': [r'$\mathcal{L}_{p}$', ""],
121  'daughter(0,pionID)': [r'$\mathcal{L}_{\pi}$', ""],
122  'QpOf(mu+:inRoe, isRightCategory(IntermediateMuon), isRightCategory(IntermediateMuon))': [
123  r'${\rm Int.\ Muon}$'],
124  'QpOf(mu+:inRoe, isRightCategory(Muon), isRightCategory(Muon))': [r'${\rm Muon}$'],
125  'QpOf(pi+:inRoe, isRightCategory(FSC), isRightCategory(SlowPion))': [r'${\rm FSC}$'],
126  'QpOf(e+:inRoe, isRightCategory(Electron), isRightCategory(Electron))': [r'${\rm Electron}$'],
127  'QpOf(e+:inRoe, isRightCategory(IntermediateElectron), isRightCategory(IntermediateElectron))': [
128  r'${\rm Int.\ El.}$'],
129  'weightedQpOf(Lambda0:inRoe, isRightCategory(Lambda), isRightCategory(Lambda))': [r'${\rm Lambda}$'],
130  'QpOf(K+:inRoe, isRightCategory(KaonPion), isRightCategory(Kaon))': [r'${\rm Kaon}$' + '-' + r'${\rm Pion}$'],
131  'QpOf(pi+:inRoe, isRightCategory(FastHadron), isRightCategory(FastHadron))': [r'${\rm Fast\ Hadron}$'],
132  'QpOf(mu+:inRoe, isRightCategory(IntermediateKinLepton), isRightCategory(IntermediateKinLepton))': [
133  r'${\rm Int.\ Kin.\ Lep.}$'],
134  'QpOf(pi+:inRoe, isRightCategory(MaximumPstar), isRightCategory(MaximumPstar))': [r'${\rm Max.}\,p^*$'],
135  'QpOf(pi+:inRoe, isRightCategory(SlowPion), isRightCategory(SlowPion))': [r'${\rm Slow\ Pion}$'],
136  'QpOf(mu+:inRoe, isRightCategory(KinLepton), isRightCategory(KinLepton))': [r'${\rm Kin.\ Lep.}$'],
137  'weightedQpOf(K+:inRoe, isRightCategory(Kaon), isRightCategory(Kaon))': [r'${\rm Kaon}$']}
138 
139  if sum(args.identifiers, [])[0].find('LevelLambdaFBDT') != -1:
140  variablesPlotParamsDict['distance'] = [r'$\vert \boldsymbol{x}_{\Lambda}\vert$', r"{\rm mm}\, "]
141 
142  count = dict()
143 
144  for name in names:
145 
146  if name in variablesPlotParamsDict:
147  abbreviation = variablesPlotParamsDict[name][0]
148  else:
149  abbreviation = name[:length]
150 
151  if abbreviation not in count:
152  count[abbreviation] = 0
153  count[abbreviation] += 1
154  abbreviations = collections.OrderedDict()
155 
156  count2 = dict()
157  for name in names:
158 
159  if name in variablesPlotParamsDict:
160  abbreviation = variablesPlotParamsDict[name][0]
161  else:
162  abbreviation = name[:length]
163 
164  abbreviations[name] = abbreviation
165  if count[abbreviation] > 1:
166  if abbreviation not in count2:
167  count2[abbreviation] = 0
168  count2[abbreviation] += 1
169  abbreviations[name] += str(count2[abbreviation])
170  return abbreviations
171 
172 
173 if __name__ == '__main__':
174 
175  ROOT.gROOT.SetBatch(True)
176 
177  old_cwd = os.getcwd()
178  args = getCommandLineOptions()
179 
180  identifiers = sum(args.identifiers, [])
181  identifier_abbreviations = create_abbreviations(identifiers)
182 
183  datafiles = sum(args.datafiles, [])
184 
185  print("Load methods")
186  methods = [basf2_mva_util.Method(identifier) for identifier in identifiers]
187 
188  print("Apply experts on independent data")
189  test_probability = {}
190  test_target = {}
191  for method in methods:
192  p, t = method.apply_expert(datafiles, args.treename)
193  test_probability[identifier_abbreviations[method.identifier]] = p
194  test_target[identifier_abbreviations[method.identifier]] = t
195 
196  print("Apply experts on training data")
197  train_probability = {}
198  train_target = {}
199  if args.train_datafiles is not None:
200  train_datafiles = sum(args.train_datafiles, [])
201  for method in methods:
202  p, t = method.apply_expert(train_datafiles, args.treename)
203  train_probability[identifier_abbreviations[method.identifier]] = p
204  train_target[identifier_abbreviations[method.identifier]] = t
205 
206  variables = unique(v for method in methods for v in method.variables)
207  root_variables = unique(v for method in methods for v in method.root_variables)
208 
209  print("Here Variables")
210  print(variables)
211 
212  bkgrOutput = 0
213  displayHeatMap = False
214  classOutputLabel = r'${\rm Classifier\ Output}$'
215  isNN = False
216 
217  if identifiers[0].find('Combiner') != -1 or identifiers[0].find('KaonFBDT') != -1 or \
218  identifiers[0].find('Electron') != -1 or identifiers[0].find('Muon') != -1 or \
219  identifiers[0].find('Lepton') != -1 or \
220  identifiers[0].find('SlowPion') != -1 or identifiers[0].find('FastHadron') != -1 or \
221  identifiers[0].find('KaonPion') != -1 or identifiers[0].find('FSC') != -1 or \
222  identifiers[0].find('MaximumPstar') != -1 or identifiers[0].find('Lambda') != -1:
223 
224  if identifiers[0].find('Combiner') != -1:
225  displayHeatMap = True
226  bkgrOutput = -1
227 
228  variables = [
229  'weightedQpOf(Lambda0:inRoe, isRightCategory(Lambda), isRightCategory(Lambda))',
230  'QpOf(pi+:inRoe, isRightCategory(FastHadron), isRightCategory(FastHadron))',
231  'QpOf(pi+:inRoe, isRightCategory(MaximumPstar), isRightCategory(MaximumPstar))',
232  'QpOf(pi+:inRoe, isRightCategory(FSC), isRightCategory(SlowPion))',
233  'QpOf(pi+:inRoe, isRightCategory(SlowPion), isRightCategory(SlowPion))',
234  'QpOf(K+:inRoe, isRightCategory(KaonPion), isRightCategory(Kaon))',
235  'weightedQpOf(K+:inRoe, isRightCategory(Kaon), isRightCategory(Kaon))',
236  'QpOf(mu+:inRoe, isRightCategory(IntermediateKinLepton), isRightCategory(IntermediateKinLepton))',
237  'QpOf(mu+:inRoe, isRightCategory(KinLepton), isRightCategory(KinLepton))',
238  'QpOf(mu+:inRoe, isRightCategory(IntermediateMuon), isRightCategory(IntermediateMuon))',
239  'QpOf(mu+:inRoe, isRightCategory(Muon), isRightCategory(Muon))',
240  'QpOf(e+:inRoe, isRightCategory(IntermediateElectron), isRightCategory(IntermediateElectron))',
241  'QpOf(e+:inRoe, isRightCategory(Electron), isRightCategory(Electron))'
242  ]
243  variables = list(reversed(variables))
244 
245  if identifiers[0].find('Electron') != -1:
246  if identifiers[0].find('Intermediate') != -1:
247  variables = ft.getTrainingVariables('IntermediateElectron')
248  else:
249  variables = ft.getTrainingVariables('Electron')
250 
251  if identifiers[0].find('Muon') != -1:
252  if identifiers[0].find('Intermediate') != -1:
253  variables = ft.getTrainingVariables('IntermediateMuon')
254  else:
255  variables = ft.getTrainingVariables('Muon')
256 
257  if identifiers[0].find('Lepton') != -1:
258  if identifiers[0].find('Intermediate') != -1:
259  variables = ft.getTrainingVariables('IntermediateKinLepton')
260  else:
261  variables = ft.getTrainingVariables('KinLepton')
262 
263  if identifiers[0].find('KaonFBDT') != -1:
264  displayHeatMap = True
265  variables = ft.getTrainingVariables('Kaon')
266 
267  if identifiers[0].find('SlowPion') != -1:
268  variables = ft.getTrainingVariables('SlowPion')
269 
270  if identifiers[0].find('FastHadron') != -1:
271  variables = ft.getTrainingVariables('FastHadron')
272 
273  if identifiers[0].find('KaonPion') != -1:
274  variables = ft.getTrainingVariables('KaonPion')
275 
276  if identifiers[0].find('FSC') != -1:
277  variables = ft.getTrainingVariables('FSC')
278 
279  if identifiers[0].find('MaximumPstar') != -1:
280  variables = ft.getTrainingVariables('MaximumPstar')
281 
282  if identifiers[0].find('Lambda') != -1:
283  displayHeatMap = True
284  variables = ft.getTrainingVariables('Lambda')
285 
286  variables = list(reversed(variables))
287  for iVarPosition in range(len(variables)):
288  root_variables[iVarPosition] = Belle2.MakeROOTCompatible.makeROOTCompatible(variables[iVarPosition])
289 
290  # Set special name
291  if identifiers[0].find('FSC') != -1:
292  variables = ['useCMSFrame(p)FSC' if v == 'useCMSFrame(p)' else v for v in variables]
293 
294  if identifiers[0].find('Lambda') != -1:
295  displayHeatMap = True
296  variables = ['useCMSFrame(p)Lambda' if v == 'useCMSFrame(p)' else v for v in variables]
297 
298  if identifiers[0].find('Combiner') != -1:
299  if identifiers[0].find('FANN') != -1:
300  classOutputLabel = r'$(q\cdot r)_{\rm MLP}$'
301  isNN = True
302  if identifiers[0].find('FBDT') != -1:
303  classOutputLabel = r'$(q\cdot r)_{\rm FBDT}$'
304  elif identifiers[0].find('LevelMaximumPstar') != -1:
305  classOutputLabel = r'$y_{{\rm Maximum}\, p^*}$'
306  elif identifiers[0].find('LevelFSCFBDT') != -1:
307  classOutputLabel = r'$y_{\rm FSC}$'
308  elif identifiers[0].find('LevelMuonFBDT') != -1:
309  classOutputLabel = r'$y_{\rm Muon}$'
310  elif identifiers[0].find('LevelElectronFBDT') != -1:
311  classOutputLabel = r'$y_{\rm Electron}$'
312  elif identifiers[0].find('LevelKaonFBDT') != -1:
313  classOutputLabel = r'$y_{\rm Kaon}$'
314  elif identifiers[0].find('LevelLambdaFBDT') != -1:
315  classOutputLabel = r'$y_{\rm Lambda}$'
316  elif identifiers[0].find('LevelIntermediateKinLeptonFBDT') != -1:
317  classOutputLabel = r'$y_{\rm Int.\, Kin.\, Lepton}$'
318  elif identifiers[0].find('LevelKinLeptonFBDT') != -1:
319  classOutputLabel = r'$y_{\rm Kin.\, Lepton}$'
320  elif identifiers[0].find('LevelIntermediateMuon') != -1:
321  classOutputLabel = r'$y_{\rm Int.\, Muon}$'
322  elif identifiers[0].find('LevelIntermediateElectron') != -1:
323  classOutputLabel = r'$y_{\rm Int.\, Electron}$'
324  elif identifiers[0].find('LevelKaonPionFBDT') != -1:
325  classOutputLabel = r'$y_{\rm Kaon-Pion}$'
326  elif identifiers[0].find('LevelFastHadron') != -1:
327  classOutputLabel = r'$y_{\rm Fast\, Hadron}$'
328  elif identifiers[0].find('LevelSlowPion') != -1:
329  classOutputLabel = r'$y_{\rm Slow\, Pion}$'
330 
331  variable_abbreviations = create_abbreviations(variables)
332 
333  spectators = unique(v for method in methods for v in method.spectators)
334  spectator_abbreviations = create_abbreviations(spectators)
335  root_spectators = unique(v for method in methods for v in method.root_spectators)
336 
337  print("Load variables array")
338  rootchain = ROOT.TChain(args.treename)
339  for datafile in datafiles:
340  rootchain.Add(datafile)
341 
342  variables_data = basf2_mva_util.tree2dict(rootchain, root_variables, list(variable_abbreviations.values()))
343  spectators_data = basf2_mva_util.tree2dict(rootchain, root_spectators, list(spectator_abbreviations.values()))
344 
345  print("Create latex file")
346  # Change working directory after experts run, because they might want to access
347  # a localdb in the current working directory
348  with tempfile.TemporaryDirectory() as tempdir:
349  if args.working_directory == '':
350  os.chdir(tempdir)
351  else:
352  os.chdir(args.working_directory)
353 
354  o = b2latex.LatexFile()
355  o += b2latex.TitlePage(title='Automatic MVA Evaluation',
356  authors=[r'Thomas Keck\\ Moritz Gelb\\ Nils Braun'],
357  abstract='Evaluation plots',
358  add_table_of_contents=True).finish()
359 
360  o += b2latex.Section("Classifiers")
361  o += b2latex.String(r"""
362  This section contains the GeneralOptions and SpecificOptions of all classifiers represented by an XML tree.
363  The same information can be retrieved using the basf2\_mva\_info tool.
364  """)
365 
366  table = b2latex.LongTable(r"ll", "Abbreviations of identifiers", "{name} & {abbr}", r"Identifier & Abbreviation")
367  for identifier in identifiers:
368  table.add(name=format.string(identifier), abbr=format.string(identifier_abbreviations[identifier]))
369  o += table.finish()
370 
371 # for method in methods:
372 # o += b2latex.SubSection(format.string(method.identifier))
373 # o += b2latex.Listing(language='XML').add(method.description).finish()
374 
375  o += b2latex.Section("Variables")
376  o += b2latex.String("""
377  This section contains an overview of the importance and correlation of the variables used by the classifiers.
378  And distribution plots of the variables on the independent dataset. The distributions are normed for signal and
379  background separately, and only the region +- 3 sigma around the mean is shown.
380  """)
381 
382  table = b2latex.LongTable(r"ll", "Abbreviations of variables", "{name} & {abbr}", r"Variable & Abbreviation")
383  for v in variables:
384  # table.add(name=format.string(v), abbr=format.string(variable_abbreviations[v]))
385  table.add(name=format.string(v), abbr=variable_abbreviations[v])
386  o += table.finish()
387 
388  o += b2latex.SubSection("Importance")
389  graphics = b2latex.Graphics()
390  p = plotting.Importance()
391  p.add({identifier_abbreviations[i.identifier]: np.array([i.importances.get(v, 0.0) for v in variables]) for i in methods},
392  identifier_abbreviations.values(), variable_abbreviations.values(), displayHeatMap)
393  p.finish()
394  p.save('importance.pdf')
395  graphics.add('importance.pdf', width=1.0)
396  o += graphics.finish()
397 
398  o += b2latex.SubSection("Correlation")
399  first_identifier_abbr = list(identifier_abbreviations.values())[0]
400  graphics = b2latex.Graphics()
402  p.add(variables_data, variable_abbreviations.values(),
403  test_target[first_identifier_abbr] == 1,
404  test_target[first_identifier_abbr] == bkgrOutput, bkgrOutput)
405  p.finish()
406  p.save('correlation_plot.pdf')
407  graphics.add('correlation_plot.pdf', width=1.0)
408  o += graphics.finish()
409 
410  if False:
411  graphics = b2latex.Graphics()
412  p = plotting.TSNE()
413  p.add(variables_data, variable_abbreviations.values(),
414  test_target[first_identifier_abbr] == 1,
415  test_target[first_identifier_abbr] == bkgrOutput)
416  p.finish()
417  p.save('tsne_plot.pdf')
418  graphics.add('tsne_plot.pdf', width=1.0)
419  o += graphics.finish()
420 
421 # for v in variables:
422 # variable_abbr = variable_abbreviations[v]
423 # o += b2latex.SubSection(format.string(v))
424 # graphics = b2latex.Graphics()
425 # p = plotting.VerboseDistribution(normed=True, range_in_std=3)
426 # p.add(variables_data, variable_abbr, test_target[first_identifier_abbr] == 1, label=r"${\rm Signal}$")
427 # p.add(variables_data, variable_abbr, test_target[first_identifier_abbr] == bkgrOutput, label=r"${\rm Background}$")
428 # p.finish()
429 # p.save('variable_{}.pdf'.format(hash(v)))
430 # graphics.add('variable_{}.pdf'.format(hash(v)), width=1.0)
431 # o += graphics.finish()
432 
433  o += b2latex.Section("Classifier Plot")
434  o += b2latex.String("This section contains the receiver operating characteristics (ROC), purity projection, ..."
435  "of the classifiers on training and independent data."
436  "The legend of each plot contains the shortened identifier and the area under the ROC curve"
437  "in parenthesis.")
438 
439  o += b2latex.Section("ROC Plot")
440  graphics = b2latex.Graphics()
442  for identifier in identifier_abbreviations.values():
443  auc = p.add(test_probability, identifier, test_target[identifier] == 1, test_target[identifier] == bkgrOutput)
444  o += b2latex.String("This is the Area under the ROC " + " ({:.2f})".format(auc) + ".")
445  f = open("AUCROCTest.txt", "w")
446  f.write("{:.6f}".format(auc))
447  f.close()
448  p.finish()
449  # p.axis.set_title("ROC Rejection Plot on independent data")
450  p.save('roc_plot_test.pdf')
451  graphics.add('roc_plot_test.pdf', width=1.0)
452  o += graphics.finish()
453 
454  if train_probability:
455  for i, identifier in enumerate(identifiers):
456  graphics = b2latex.Graphics()
458  identifier_abbr = identifier_abbreviations[identifier]
459  aucTrain = p.add(train_probability, identifier_abbr, train_target[identifier_abbr] == 1,
460  train_target[identifier_abbr] == bkgrOutput, label=r'{\rm Train}')
461  o += b2latex.String("This is the Area under the train ROC " + " ({:.2f})".format(aucTrain) + ". ")
462  f = open("AUCROCTrain.txt", "w")
463  f.write("{:.6f}".format(auc))
464  f.close()
465  aucTest = p.add(test_probability, identifier_abbr, test_target[identifier_abbr] == 1,
466  test_target[identifier_abbr] == bkgrOutput, label=r'{\rm Test}')
467  o += b2latex.String("This is the Area under the test ROC " + " ({:.2f})".format(aucTest) + ".")
468  p.finish()
469  # p.axis.set_title(identifier)
470  p.save('roc_test.pdf')
471  graphics.add('roc_test.pdf', width=1.0)
472  o += graphics.finish()
473 
474  o += b2latex.Section("Classification Results")
475 
476  for identifier in identifiers:
477  identifier_abbr = identifier_abbreviations[identifier]
478  o += b2latex.SubSection(format.string(identifier_abbr))
479  graphics = b2latex.Graphics()
481  p.add(0, test_probability, identifier_abbr, test_target[identifier_abbr] == 1,
482  test_target[identifier_abbr] == bkgrOutput, normed=True)
483  p.sub_plots[0].axis.set_title("Classification result in test data ")
484 
485  p.add(1, test_probability, identifier_abbr, test_target[identifier_abbr] == 1,
486  test_target[identifier_abbr] == bkgrOutput, normed=False)
487  p.sub_plots[1].axis.set_title("Classification result in test data ")
488  p.finish()
489 
490  p.save('classification_result.pdf')
491  graphics.add('classification_result.pdf', width=1)
492  o += graphics.finish()
493 
494  o += b2latex.Section("Diagonal Plot")
495  graphics = b2latex.Graphics()
496  p = plotting.Diagonal()
497  for identifier in identifiers:
498  o += b2latex.SubSection(format.string(identifier_abbr))
499  identifier_abbr = identifier_abbreviations[identifier]
500  p.add(test_probability, identifier_abbr, test_target[identifier_abbr] == 1, test_target[identifier_abbr] == bkgrOutput)
501  p.finish()
502  p.axis.set_title("Diagonal plot on independent data")
503  p.save('diagonal_plot_test.pdf')
504  graphics.add('diagonal_plot_test.pdf', width=1.0)
505  o += graphics.finish()
506 
507  if train_probability:
508  o += b2latex.SubSection("Overtraining Plot")
509  for identifier in identifiers:
510  identifier_abbr = identifier_abbreviations[identifier]
511  probability = {identifier_abbr: np.r_[train_probability[identifier_abbr], test_probability[identifier_abbr]]}
512  target = np.r_[train_target[identifier_abbr], test_target[identifier_abbr]]
513  train_mask = np.r_[np.ones(len(train_target[identifier_abbr])), np.zeros(len(test_target[identifier_abbr]))]
514  graphics = b2latex.Graphics()
516  p.add(probability, identifier_abbr,
517  train_mask == 1, train_mask == 0,
518  target == 1, target == bkgrOutput, None, bkgrOutput, isNN)
519  p.finish(xLabel=classOutputLabel)
520  # p.axis.set_title("Overtraining check for " + str(identifier))
521  p.save('overtraining_plot.pdf')
522  graphics.add('overtraining_plot.pdf', width=1.0)
523  o += graphics.finish()
524  print("Finished Overtraining plot")
525 
526  o += b2latex.Section("Spectators")
527  o += b2latex.String("This section contains the distribution and dependence on the"
528  "classifier outputs of all spectator variables.")
529 
530  table = b2latex.LongTable(r"ll", "Abbreviations of spectators", "{name} & {abbr}", r"Spectator & Abbreviation")
531  for s in spectators:
532  table.add(name=format.string(s), abbr=format.string(spectator_abbreviations[s]))
533  o += table.finish()
534 
535  for spectator in spectators:
536  spectator_abbr = spectator_abbreviations[spectator]
537  o += b2latex.SubSection(format.string(spectator))
538  graphics = b2latex.Graphics()
540  p.add(spectators_data, spectator_abbr, test_target[first_identifier_abbr] == 1, label="Signal")
541  p.add(spectators_data, spectator_abbr, test_target[first_identifier_abbr] == bkgrOutput, label="Background")
542  p.finish()
543  p.save('spectator_{}.pdf'.format(hash(spectator)))
544  graphics.add('spectator_{}.pdf'.format(hash(spectator)), width=1.0)
545  o += graphics.finish()
546 
547  for identifier in identifiers:
548  o += b2latex.SubSubSection(format.string(spectator) + " with classifier " + format.string(identifier))
549  identifier_abbr = identifier_abbreviations[identifier]
550  data = {identifier_abbr: test_probability[identifier_abbr], spectator_abbr: spectators_data[spectator_abbr]}
551  graphics = b2latex.Graphics()
553  p.add(data, spectator_abbr, identifier_abbr, list(range(10, 100, 10)),
554  test_target[identifier_abbr] == 1,
555  test_target[identifier_abbr] == bkgrOutput)
556  p.finish()
557  p.save('correlation_plot_{}_{}.pdf'.format(hash(spectator), hash(identifier)))
558  graphics.add('correlation_plot_{}_{}.pdf'.format(hash(spectator), hash(identifier)), width=1.0)
559  o += graphics.finish()
560 
561  o.save('latex.tex', compile=True)
562  os.chdir(old_cwd)
563  if args.working_directory == '':
564  shutil.copy(tempdir + '/latex.pdf', args.outputfile)
565  else:
566  shutil.copy(args.working_directory + '/latex.pdf', args.outputfile)
static std::string makeROOTCompatible(std::string str)
Remove special characters that ROOT dislikes in branch names, e.g.