Belle II Software  release-05-02-19
trainKshortClassifier.py
1 #!/usr/bin/env python3
2 # -*- coding: utf-8 -*-
3 # Author: Marcel Hohmann (marcel.hohmann@desy.de)
4 
5 import basf2
6 from modularAnalysis import inputMdst, matchMCTruth, variablesToNtuple, process, statistics
7 from stdV0s import stdKshorts
8 import sys
9 import os
10 
11 try:
12  input_file_name = str(sys.argv[1])
13 except BaseException:
14  input_file_name = '/hsm/belle2/bdata/MC/release-01-00-03/DB00000294/MC10/prod00004770/'\
15  's00/e0000/4S/r00000/mixed/mdst/sub00/mdst_00000*_prod00004770_task0000000*.root'
16 
17 try:
18  identifier = sys.argv[2]
19 except BaseException:
20  identifier = 'Kshort_FastBDT.xml' # by default train to xml then upload to localdb
21 
22 
23 tree_name = 'ks_training_variables'
24 training_file_name = 'KshortClassifierTrainingData.root'
25 
26 my_variables = ['SigM',
27  'formula( E / E_uncertainty )',
28  'formula( flightTime / flightTimeErr)',
29  'cosAngleBetweenMomentumAndVertexVector',
30  'min(abs(daughter(0, d0)),abs(daughter(1, d0)))',
31  'formula(daughter(0, pionID) + daughter(1, pionID))'
32  ]
33 
34 target_variable = 'isSignal'
35 
36 
37 # --- create training data set ---
38 training_path = basf2.core.Path()
39 inputMdst('default', input_file_name, path=training_path)
40 stdKshorts(path=training_path)
41 matchMCTruth('K_S0:merged', path=training_path)
42 
43 variablesToNtuple('K_S0:merged',
44  my_variables + [target_variable],
45  tree_name,
46  training_file_name,
47  path=training_path
48  )
49 
50 process(training_path, int(2e5))
51 print(statistics)
52 
53 
54 # --- train variables ---
55 training_string = 'basf2_mva_teacher --datafiles {data_files} --treename {tree_name}'\
56  ' --identifier {identifier} --variables {variables} --target_variable'\
57  ' {target_variable} --method FastBDT --nTrees 400 --nCutLevels 8 --nLevels 4'.format(
58  data_files=training_file_name,
59  tree_name=tree_name,
60  identifier=identifier,
61  variables=''.join([' "%s" ' % var for var in my_variables]),
62  target_variable=target_variable)
63 
64 os.system(training_string)
65 
66 ex_b = 0 # experiment begin, 0 for all of them
67 ex_e = -1 # experiment end, -1 for all of them
68 run_b = 0 # run begin, 0 for all
69 run_e = -1 # run end, -1 for all of them
70 tag_name = "development" # global tag name
71 
72 upload = False # upload to conditions database
73 remove_local_files = False # delete local db and training data
74 
75 # upload to local database from xml file
76 os.system('basf2_mva_upload --identifier {identifier} --db_identifier {identifier_db}'
77  ' --begin_experiment {ex_b} --end_experiment {ex_e} --begin_run {run_b} --end_run {run_e}'.format(
78  identifier=identifier,
79  identifier_db=identifier.split(".xml")[0],
80  ex_b=ex_b,
81  ex_e=ex_e,
82  run_b=run_b,
83  run_e=run_e))
84 
85 here = os.getcwd()
86 data_base_file = here + "/localdb/database.txt"
87 
88 # upload to global database
89 if upload:
90  os.system("conditionsdb upload {TAGNAME} {DATABASEFILE}".format(TAGNAME=tag_name, DATABASEFILE=data_base_file))
91 
92 if remove_local_files:
93  os.system('rm -r {}'.format(here + '/localdb/'))
94  os.system('rm {}/{}'.format(here, training_file_name))
95  os.system('rm {}/{}'.format(here, identifier))
variablesToNtuple
Definition: variablesToNtuple.py:1