Belle II Software  release-08-00-10
B2A711-DeepContinuumSuppression_Input.py
1 #!/usr/bin/env python3
2 
3 
10 
11 
28 
29 import basf2
30 import modularAnalysis as ma
31 from stdV0s import stdKshorts
32 from stdPi0s import stdPi0s
33 from vertex import TagV
34 import sys
35 import variables as v
36 import uproot
37 
38 basf2.set_log_level(basf2.LogLevel.ERROR)
39 
40 # --I/O----------------------------------------------------------------------------------------
41 step = 'train'
42 
43 if len(sys.argv) >= 2:
44  if sys.argv[1] not in ['train', 'test', 'apply_signal', 'apply_qqbar']:
45  sys.exit("usage:\n\tbasf2 B2A711-DeepContinuumSuppression_Input.py <train,test,apply_signal,apply_qqbar>")
46  else:
47  step = str(sys.argv[1])
48 
49 if step == 'train':
50  input_file_list = [basf2.find_file('ccbar_sample_to_train.root', 'examples', False),
51  basf2.find_file('Bd2K0spi0_to_train.root', 'examples', False)]
52 elif step == 'test':
53  input_file_list = [basf2.find_file('ccbar_sample_to_test.root', 'examples', False),
54  basf2.find_file('Bd2K0spi0_to_test.root', 'examples', False)]
55 elif step == 'apply_signal':
56  input_file_list = [basf2.find_file('Bd2K0spi0_to_test.root', 'examples', False)]
57 elif step == 'apply_qqbar':
58  input_file_list = [basf2.find_file('ccbar_sample_to_test.root', 'examples', False)]
59 else:
60  sys.exit('Step does not match any of the available samples: `train`, `test`, `apply_signal`or `apply_qqbar`')
61 
62 outfile = 'DNN_' + step + '.root'
63 # ---------------------------------------------------------------------------------------------
64 
65 # Perform analysis.
66 firstpath = basf2.Path()
67 
68 ma.inputMdstList(input_file_list, path=firstpath)
69 
70 firstpath.add_module('ProgressBar')
71 
72 # Build B candidate like in B2A701-ContinuumSuppression_Input.py
73 stdKshorts(path=firstpath)
74 stdPi0s('eff40_May2020', path=firstpath)
75 ma.reconstructDecay('B0 -> K_S0:merged pi0:eff40_May2020', '5.2 < Mbc < 5.3 and -0.3 < deltaE < 0.2', path=firstpath)
76 
77 ma.matchMCTruth('B0', path=firstpath)
78 ma.buildRestOfEvent('B0', path=firstpath)
79 
80 cleanMask = ('cleanMask', 'nCDCHits > 0 and useCMSFrame(p)<=3.2', 'p >= 0.05 and useCMSFrame(p)<=3.2')
81 ma.appendROEMasks('B0', [cleanMask], path=firstpath)
82 
83 ma.buildContinuumSuppression('B0', 'cleanMask', path=firstpath)
84 
85 # Accept only correctly reconstructed B candidates as signal
86 ma.applyCuts('B0', 'isSignal or isContinuumEvent', path=firstpath)
87 
88 # Tag B candidate for Vertex information
89 TagV('B0', path=firstpath)
90 
91 # Loop over each possible ROE (1 for every B candidate) in every event
92 roe_path = basf2.create_path()
93 
94 deadEndPath = basf2.create_path()
95 
96 ma.signalSideParticleFilter('B0', '', roe_path, deadEndPath)
97 
98 # Build particle lists for low level variables
99 ma.fillParticleList('gamma:roe', 'isInRestOfEvent == 1 and goodBelleGamma == 1', path=roe_path)
100 ma.fillParticleList('gamma:signal', 'isInRestOfEvent == 0 and goodBelleGamma == 1', path=roe_path)
101 ma.fillParticleList('pi+:chargedProe', 'isInRestOfEvent == 1', path=roe_path)
102 ma.fillParticleList('pi+:chargedPsignal', 'isInRestOfEvent == 0', path=roe_path)
103 ma.fillParticleList('pi-:chargedMroe', 'isInRestOfEvent == 1', path=roe_path)
104 ma.fillParticleList('pi-:chargedMsignal', 'isInRestOfEvent == 0', path=roe_path)
105 
106 v.variables.addAlias('cmsp', 'useCMSFrame(p)')
107 
108 ma.rankByHighest('gamma:roe', 'cmsp', path=roe_path)
109 ma.rankByHighest('gamma:signal', 'cmsp', path=roe_path)
110 ma.rankByHighest('pi+:chargedProe', 'cmsp', path=roe_path)
111 ma.rankByHighest('pi+:chargedPsignal', 'cmsp', path=roe_path)
112 ma.rankByHighest('pi-:chargedMroe', 'cmsp', path=roe_path)
113 ma.rankByHighest('pi-:chargedMsignal', 'cmsp', path=roe_path)
114 
115 # Define traditional Continuum Suppression Variables
116 contVars = [
117  'R2',
118  'thrustBm',
119  'thrustOm',
120  'cosTBTO',
121  'cosTBz',
122  'KSFWVariables(et)',
123  'KSFWVariables(mm2)',
124  'KSFWVariables(hso00)',
125  'KSFWVariables(hso02)',
126  'KSFWVariables(hso04)',
127  'KSFWVariables(hso10)',
128  'KSFWVariables(hso12)',
129  'KSFWVariables(hso14)',
130  'KSFWVariables(hso20)',
131  'KSFWVariables(hso22)',
132  'KSFWVariables(hso24)',
133  'KSFWVariables(hoo0)',
134  'KSFWVariables(hoo1)',
135  'KSFWVariables(hoo2)',
136  'KSFWVariables(hoo3)',
137  'KSFWVariables(hoo4)',
138  'CleoConeCS(1)',
139  'CleoConeCS(2)',
140  'CleoConeCS(3)',
141  'CleoConeCS(4)',
142  'CleoConeCS(5)',
143  'CleoConeCS(6)',
144  'CleoConeCS(7)',
145  'CleoConeCS(8)',
146  'CleoConeCS(9)'
147 ]
148 
149 # Define additional low level variables
150 basic_variables = ['p', 'phi', 'cosTheta', 'pErr', 'phiErr', 'cosThetaErr']
151 vertex_variables = ['distance', 'dphi', 'dcosTheta']
152 cluster_specific_variables = ['clusterNHits', 'clusterTiming', 'clusterE9E25', 'clusterReg', 'isInRestOfEvent']
153 track_specific_variables = ['kaonID', 'electronID', 'muonID', 'protonID', 'pValue', 'nCDCHits', 'isInRestOfEvent', 'charge']
154 
155 # Aliases from normal coordinates to thrustframe coordinates
156 for variablename in basic_variables + vertex_variables:
157  v.variables.addAlias('thrustsig' + variablename, 'useBThrustFrame(' + variablename + ',Signal)')
158 
159 cluster_variables = cluster_specific_variables[:]
160 for variablename in basic_variables:
161  cluster_variables.append('thrustsig' + variablename)
162 
163 track_variables = track_specific_variables
164 for variablename in basic_variables + vertex_variables:
165  track_variables.append('thrustsig' + variablename)
166 
167 # General variables and training targets, which are nice to have in the Ntuple
168 variables = ['isContinuumEvent', 'isNotContinuumEvent', 'isSignal', 'M', 'p', 'Mbc', 'DeltaZ',
169  'deltaE', 'daughter(0, M)', 'daughter(0, p)', 'daughter(1, M)', 'daughter(1, p)']
170 
171 # Aliases for variable ranks created by rankByHighest function
172 for rank in range(10):
173  for shortcut, particlelist in [('Croe', 'gamma:roe'), ('Csig', 'gamma:signal')]:
174  for variable in cluster_variables:
175  v.variables.addAlias(
176  f'{variable}_{shortcut}{rank}', f'getVariableByRank({particlelist}, cmsp, {variable}, {rank + 1})')
177  variables.append(f'{variable}_{shortcut}{rank}')
178 
179 for rank in range(5):
180  for shortcut, particlelist in [('TProe', 'pi+:chargedProe'), ('TPsig', 'pi+:chargedPsignal'),
181  ('TMroe', 'pi+:chargedMroe'), ('TMsig', 'pi+:chargedMsignal')]:
182  for variable in track_variables:
183  v.variables.addAlias(
184  f'{variable}_{shortcut}{rank}', f'getVariableByRank({particlelist}, cmsp, {variable}, {rank + 1})')
185  variables.append(f'{variable}_{shortcut}{rank}')
186 
187 # Create output file.
188 ma.variablesToNtuple('B0', variables + contVars, treename='tree', filename=outfile, path=roe_path)
189 
190 # Loop over each possible ROE (1 for every B candidate) in every event
191 firstpath.for_each('RestOfEvent', 'RestOfEvents', roe_path)
192 
193 basf2.process(firstpath)
194 print(basf2.statistics)
195 
196 # Shuffle Data. Use only if enough Ram is available
197 try:
198  with uproot.open(outfile) as outf:
199  df = outf['tree'].arrays(library='pd')
200  df = df.sample(frac=1)
201  with uproot.recreate(outfile) as outf:
202  outf['tree'] = df
203 except OSError as e:
204  print(e)