Belle II Software  release-06-01-15
B2A711-DeepContinuumSuppression_Input.py
1 #!/usr/bin/env python3
2 
3 
10 
11 
28 
29 import basf2
30 import modularAnalysis as ma
31 from stdV0s import stdKshorts
32 from stdPi0s import stdPi0s
33 from vertex import TagV
34 import glob
35 import sys
36 import numpy as np
37 import variables as v
38 from root_pandas import read_root
39 
40 basf2.set_log_level(basf2.LogLevel.ERROR)
41 
42 # --I/O----------------------------------------------------------------------------------------
43 step = 'train'
44 
45 if len(sys.argv) >= 2:
46  if sys.argv[1] not in ['train', 'test', 'apply_signal', 'apply_qqbar']:
47  sys.exit("usage:\n\tbasf2 B2A711-DeepContinuumSuppression_Input.py <train,test,apply_signal,apply_qqbar>")
48  else:
49  step = str(sys.argv[1])
50 
51 if step == 'train':
52  input_file_list = [basf2.find_file('ccbar_sample_to_train.root', 'examples', False),
53  basf2.find_file('Bd2K0spi0_to_train.root', 'examples', False)]
54 elif step == 'test':
55  input_file_list = [basf2.find_file('ccbar_sample_to_test.root', 'examples', False),
56  basf2.find_file('Bd2K0spi0_to_test.root', 'examples', False)]
57 elif step == 'apply_signal':
58  input_file_list = [basf2.find_file('Bd2K0spi0_to_test.root', 'examples', False)]
59 elif step == 'apply_qqbar':
60  input_file_list = [basf2.find_file('ccbar_sample_to_test.root', 'examples', False)]
61 else:
62  sys.exit('Step does not match any of the available samples: `train`, `test`, `apply_signal`or `apply_qqbar`')
63 
64 outfile = 'DNN_' + step + '.root'
65 # ---------------------------------------------------------------------------------------------
66 
67 # Perform analysis.
68 firstpath = basf2.Path()
69 
70 ma.inputMdstList('MC10', input_file_list, path=firstpath)
71 
72 firstpath.add_module('ProgressBar')
73 
74 # Build B candidate like in B2A701-ContinuumSuppression_Input.py
75 stdKshorts(path=firstpath)
76 stdPi0s('eff40_May2020', path=firstpath)
77 ma.reconstructDecay('B0 -> K_S0:merged pi0:eff40_May2020', '5.2 < Mbc < 5.3 and -0.3 < deltaE < 0.2', path=firstpath)
78 
79 ma.matchMCTruth('B0', path=firstpath)
80 ma.buildRestOfEvent('B0', path=firstpath)
81 
82 cleanMask = ('cleanMask', 'nCDCHits > 0 and useCMSFrame(p)<=3.2', 'p >= 0.05 and useCMSFrame(p)<=3.2')
83 ma.appendROEMasks('B0', [cleanMask], path=firstpath)
84 
85 ma.buildContinuumSuppression('B0', 'cleanMask', path=firstpath)
86 
87 # Accept only correctly reconstructed B candidates as signal
88 ma.applyCuts('B0', 'isSignal or isContinuumEvent', path=firstpath)
89 
90 # Tag B candidate for Vertex information
91 TagV('B0', path=firstpath)
92 
93 # Loop over each possible ROE (1 for every B candidate) in every event
94 roe_path = basf2.create_path()
95 
96 deadEndPath = basf2.create_path()
97 
98 ma.signalSideParticleFilter('B0', '', roe_path, deadEndPath)
99 
100 # Build particle lists for low level variables
101 ma.fillParticleList('gamma:roe', 'isInRestOfEvent == 1 and goodBelleGamma == 1', path=roe_path)
102 ma.fillParticleList('gamma:signal', 'isInRestOfEvent == 0 and goodBelleGamma == 1', path=roe_path)
103 ma.fillParticleList('pi+:chargedProe', 'isInRestOfEvent == 1', path=roe_path)
104 ma.fillParticleList('pi+:chargedPsignal', 'isInRestOfEvent == 0', path=roe_path)
105 ma.fillParticleList('pi-:chargedMroe', 'isInRestOfEvent == 1', path=roe_path)
106 ma.fillParticleList('pi-:chargedMsignal', 'isInRestOfEvent == 0', path=roe_path)
107 
108 v.variables.addAlias('cmsp', 'useCMSFrame(p)')
109 
110 ma.rankByHighest('gamma:roe', 'cmsp', path=roe_path)
111 ma.rankByHighest('gamma:signal', 'cmsp', path=roe_path)
112 ma.rankByHighest('pi+:chargedProe', 'cmsp', path=roe_path)
113 ma.rankByHighest('pi+:chargedPsignal', 'cmsp', path=roe_path)
114 ma.rankByHighest('pi-:chargedMroe', 'cmsp', path=roe_path)
115 ma.rankByHighest('pi-:chargedMsignal', 'cmsp', path=roe_path)
116 
117 # Define traditional Continuum Suppression Variables
118 contVars = [
119  'R2',
120  'thrustBm',
121  'thrustOm',
122  'cosTBTO',
123  'cosTBz',
124  'KSFWVariables(et)',
125  'KSFWVariables(mm2)',
126  'KSFWVariables(hso00)',
127  'KSFWVariables(hso02)',
128  'KSFWVariables(hso04)',
129  'KSFWVariables(hso10)',
130  'KSFWVariables(hso12)',
131  'KSFWVariables(hso14)',
132  'KSFWVariables(hso20)',
133  'KSFWVariables(hso22)',
134  'KSFWVariables(hso24)',
135  'KSFWVariables(hoo0)',
136  'KSFWVariables(hoo1)',
137  'KSFWVariables(hoo2)',
138  'KSFWVariables(hoo3)',
139  'KSFWVariables(hoo4)',
140  'CleoConeCS(1)',
141  'CleoConeCS(2)',
142  'CleoConeCS(3)',
143  'CleoConeCS(4)',
144  'CleoConeCS(5)',
145  'CleoConeCS(6)',
146  'CleoConeCS(7)',
147  'CleoConeCS(8)',
148  'CleoConeCS(9)'
149 ]
150 
151 # Define additional low level variables
152 basic_variables = ['p', 'phi', 'cosTheta', 'pErr', 'phiErr', 'cosThetaErr']
153 vertex_variables = ['distance', 'dphi', 'dcosTheta']
154 cluster_specific_variables = ['clusterNHits', 'clusterTiming', 'clusterE9E25', 'clusterReg', 'isInRestOfEvent']
155 track_specific_variables = ['kaonID', 'electronID', 'muonID', 'protonID', 'pValue', 'nCDCHits', 'isInRestOfEvent', 'charge']
156 
157 # Aliases from normal coordinates to thrustframe coordinates
158 for variablename in basic_variables + vertex_variables:
159  v.variables.addAlias('thrustsig' + variablename, 'useBThrustFrame(' + variablename + ',Signal)')
160 
161 cluster_variables = cluster_specific_variables[:]
162 for variablename in basic_variables:
163  cluster_variables.append('thrustsig' + variablename)
164 
165 track_variables = track_specific_variables
166 for variablename in basic_variables + vertex_variables:
167  track_variables.append('thrustsig' + variablename)
168 
169 # General variables and training targets, which are nice to have in the Ntuple
170 variables = ['isContinuumEvent', 'isNotContinuumEvent', 'isSignal', 'M', 'p', 'Mbc', 'DeltaZ',
171  'deltaE', 'daughter(0, M)', 'daughter(0, p)', 'daughter(1, M)', 'daughter(1, p)']
172 
173 # Aliases for variable ranks created by rankByHighest function
174 for rank in range(10):
175  for shortcut, particlelist in [('Croe', 'gamma:roe'), ('Csig', 'gamma:signal')]:
176  for variable in cluster_variables:
177  v.variables.addAlias(
178  f'{variable}_{shortcut}{rank}', f'getVariableByRank({particlelist}, cmsp, {variable}, {rank + 1})')
179  variables.append(f'{variable}_{shortcut}{rank}')
180 
181 for rank in range(5):
182  for shortcut, particlelist in [('TProe', 'pi+:chargedProe'), ('TPsig', 'pi+:chargedPsignal'),
183  ('TMroe', 'pi+:chargedMroe'), ('TMsig', 'pi+:chargedMsignal')]:
184  for variable in track_variables:
185  v.variables.addAlias(
186  f'{variable}_{shortcut}{rank}', f'getVariableByRank({particlelist}, cmsp, {variable}, {rank + 1})')
187  variables.append(f'{variable}_{shortcut}{rank}')
188 
189 # Create output file.
190 ma.variablesToNtuple('B0', variables + contVars, treename='tree', filename=outfile, path=roe_path)
191 
192 # Loop over each possible ROE (1 for every B candidate) in every event
193 firstpath.for_each('RestOfEvent', 'RestOfEvents', roe_path)
194 
195 basf2.process(firstpath)
196 print(basf2.statistics)
197 
198 # Shuffle Data. Use only if enough Ram is available
199 try:
200  df = read_root(outfile)
201  df = df.sample(frac=1)
202  df.to_root(outfile, key='tree')
203 except OSError as e:
204  print(e)