Belle II Software development
config.py
1#!/usr/bin/env python3
2
3
10
11"""
12 Configuration classes
13
14 The classes defined here are used to uniquely define a FEI training.
15 Meaning:
16 - The global configuration like database prefix, cache mode, monitoring, ... (FeiConfiguration)
17 - The reconstructed Particles (Particle)
18 - The reconstructed Channels of each particle (DecayChannel)
19 - The MVA configuration for each channel (MVAConfiguration)
20 - The Cut definitions of each channel (PreCutConfiguration)
21 - The Cut definitions of each particle (PostCutConfiguration)
22"""
23
24import collections
25import copy
26import re
27import itertools
28import typing
29import basf2
30
31# Define classes at top level to make them pickable
32# Creates new class via namedtuple, which are like a struct in C
33
34FeiConfiguration = collections.namedtuple('FeiConfiguration',
35 'prefix, cache, monitor, legacy, externTeacher, training, roundMode, monitoring_path')
36FeiConfiguration.__new__.__defaults__ = ('FEI_TEST', None, True, None, 'basf2_mva_teacher', False, 0, '')
37FeiConfiguration.__doc__ = "Fei Global Configuration class"
38FeiConfiguration.prefix.__doc__ = "The database prefix used for all weight files"
39FeiConfiguration.cache.__doc__ = "The stage which is passed as input, it is assumed that all previous stages"\
40 " do not have to be reconstructed again. Can be either a number or"\
41 " a filename containing a pickled number or"\
42 " None in this case the environment variable FEI_STAGE is used."
43FeiConfiguration.monitor.__doc__ = (
44 "Determines the level of monitoring histograms to create. "
45 "Set to False to disable monitoring. "
46 "Set to 'simple' to enable lightweight histograms. "
47 "Any other value will enable full monitoring histograms."
48)
49FeiConfiguration.legacy.__doc__ = "Pass the summary file of a legacy FEI training,"\
50 " and the algorithm will be able to apply this training."
51FeiConfiguration.externTeacher.__doc__ = "Teacher command e.g. basf2_mva_teacher, b2mva-kekcc-cluster-teacher"
52FeiConfiguration.training.__doc__ = "If you train the FEI set this to True, otherwise to False"
53FeiConfiguration.roundMode.__doc__ = "Round mode for the training. 0 default, 1 resuming, 2 finishing, 3 retraining."
54FeiConfiguration.monitoring_path.__doc__ = "Path where monitoring histograms are stored."
55
56
57MVAConfiguration = collections.namedtuple('MVAConfiguration', 'method, config, variables, target, sPlotVariable, spectators')
58MVAConfiguration.__new__.__defaults__ = ('FastBDT',
59 '--nTrees 400 --nCutLevels 10 --nLevels 3 --shrinkage 0.1 --randRatio 0.5',
60 None, 'isSignal', None, {})
61MVAConfiguration.__doc__ = "Multivariate analysis configuration class."
62MVAConfiguration.method.__doc__ = "Method used by MVAInterface."
63MVAConfiguration.config.__doc__ = "Method specific configuration string passed to basf2_mva_teacher"
64MVAConfiguration.variables.__doc__ = "List of variables from the VariableManager."\
65 " {} is expanded to one variable per daughter particle."
66MVAConfiguration.target.__doc__ = "Target variable from the VariableManager."
67MVAConfiguration.sPlotVariable.__doc__ = "Discriminating variable used by sPlot to do data-driven training."
68MVAConfiguration.spectators.__doc__ = "Dictionary of spectator variables with their ranges from the VariableManager."
69
70
71PreCutConfiguration = collections.namedtuple(
72 'PreCutConfiguration', 'userCut, vertexCut, noBackgroundSampling,'
73 'bestCandidateVariable, bestCandidateCut, bestCandidateMode, noSignalSampling, bkgSamplingFactor')
74PreCutConfiguration.__new__.__defaults__ = ('', -2, False, None, 0, 'lowest', False, 1.0)
75PreCutConfiguration.__doc__ = "PreCut configuration class. These cuts is employed before training the mva classifier."
76PreCutConfiguration.userCut.__doc__ = "The user cut is passed directly to the ParticleCombiner."\
77 " Particles which do not pass this cut are immediately discarded."
78PreCutConfiguration.vertexCut.__doc__ = "The vertex cut is passed as confidence level to the VertexFitter."
79PreCutConfiguration.noBackgroundSampling.__doc__ = "For very pure channels, the background sampling factor is too high" \
80 " and the MVA can't be trained. This disables background sampling."
81PreCutConfiguration.bestCandidateVariable.__doc__ = "Variable from the VariableManager which is used to rank all candidates."
82PreCutConfiguration.bestCandidateCut.__doc__ = "Number of best-candidates to keep after the best-candidate ranking."
83PreCutConfiguration.bestCandidateMode.__doc__ = "Either lowest or highest."
84PreCutConfiguration.noSignalSampling.__doc__ = "For channels with unknown br. frac., the signal sampling factor can be" \
85 " overestimated and you loose signal samples in the training." \
86 " This disables signal sampling."
87PreCutConfiguration.bkgSamplingFactor.__doc__ = "Add additional multiplicative bkg. sampling factor, less than 1.0 to reduce."
88
89PostCutConfiguration = collections.namedtuple('PostCutConfiguration', 'value, bestCandidateCut')
90PostCutConfiguration.__new__.__defaults__ = (0.0, 0)
91PostCutConfiguration.__doc__ = "PostCut configuration class. This cut is employed after the training of the mva classifier."
92PostCutConfiguration.value.__doc__ = "Absolute value used to cut on the SignalProbability of each candidate."
93PostCutConfiguration.bestCandidateCut.__doc__ = "Number of best-candidates to keep, ranked by SignalProbability."
94
95DecayChannel = collections.namedtuple(
96 'DecayChannel',
97 'name, label, decayString, daughters, mvaConfig, preCutConfig, decayModeID, pi0veto')
98DecayChannel.__new__.__defaults__ = (None, None, None, None, None, None, None, False)
99DecayChannel.__doc__ = "Decay channel of a Particle."
100DecayChannel.name.__doc__ = "str:Name of the channel e.g. :code:`D0:generic_0`"
101DecayChannel.label.__doc__ = "Label used to identify the decay channel e.g. for weight files independent of decayModeID"
102DecayChannel.decayString.__doc__ = "DecayDescriptor of the channel e.g. D0 -> K+ pi-"
103DecayChannel.daughters.__doc__ = "List of daughter particles of the decay channel e.g. [K+, pi-]"
104DecayChannel.mvaConfig.__doc__ = "MVAConfiguration object which is used for this channel."
105DecayChannel.preCutConfig.__doc__ = "PreCutConfiguration object which is used for this channel."
106DecayChannel.decayModeID.__doc__ = "DecayModeID of this channel. Unique ID for each channel of this particle."
107DecayChannel.pi0veto.__doc__ = "If true, additional pi0veto variables are added to the MVAs, useful only for decays with gammas."
108
109MonitoringVariableBinning = {'mcErrors': ('mcErrors', 513, -0.5, 512.5),
110 'mcParticleStatus': ('mcParticleStatus', 257, -0.5, 256.5),
111 'dM': ('dM', 100, -1.0, 1.0),
112 'dQ': ('dQ', 100, -1.0, 1.0),
113 'abs(dM)': ('abs(dM)', 100, 0.0, 1.0),
114 'abs(dQ)': ('abs(dQ)', 100, 0.0, 1.0),
115 'pionID': ('pionID', 100, 0.0, 1.0),
116 'kaonID': ('kaonID', 100, 0.0, 1.0),
117 'protonID': ('protonID', 100, 0.0, 1.0),
118 'electronID': ('electronID', 100, 0.0, 1.0),
119 'muonID': ('muonID', 100, 0.0, 1.0),
120 'isSignal': ('isSignal', 2, -0.5, 1.5),
121 'isSignalAcceptMissingNeutrino': ('isSignalAcceptMissingNeutrino', 2, -0.5, 1.5),
122 'isPrimarySignal': ('isPrimarySignal', 2, -0.5, 1.5),
123 'chiProb': ('chiProb', 100, 0.0, 1.0),
124 'Mbc': ('Mbc', 100, 5.1, 5.4),
125 'cosThetaBetweenParticleAndNominalB': ('cosThetaBetweenParticleAndNominalB', 100, -10.0, 10.0),
126 'extraInfo(SignalProbability)': ('extraInfo(SignalProbability)', 100, 0.0, 1.0),
127 'extraInfo(decayModeID)': ('extraInfo(decayModeID)', 101, -0.5, 100.5),
128 'extraInfo(uniqueSignal)': ('extraInfo(uniqueSignal)', 2, -0.5, 1.5),
129 'extraInfo(preCut_rank)': ('extraInfo(preCut_rank)', 41, -0.5, 40.5),
130 'extraInfo(postCut_rank)': ('extraInfo(postCut_rank)', 41, -0.5, 40.5),
131 'daughterProductOf(extraInfo(SignalProbability))':
132 ('daughterProductOf(extraInfo(SignalProbability))', 100, 0.0, 1.0),
133 'pValueCombinationOfDaughters(extraInfo(SignalProbability))':
134 ('pValueCombinationOfDaughters(extraInfo(SignalProbability))', 100, 0.0, 1.0),
135 }
136
137
138def variables2binnings(variables):
139 """
140 Convert given variables into a tuples which can be given to VariableToHistogram
141 """
142 return [MonitoringVariableBinning[v] if v in MonitoringVariableBinning else (v, 100, -10.0, 10.0) for v in variables]
143
144
145def variables2binnings_2d(variables):
146 """
147 Convert given variables into a tuples which can be given to VariableToHistogram
148 """
149 result = []
150 for v1, v2 in variables:
151 b1 = MonitoringVariableBinning[v1] if v1 in MonitoringVariableBinning else (v1, 100, -10.0, 10.0)
152 b2 = MonitoringVariableBinning[v2] if v2 in MonitoringVariableBinning else (v2, 100, -10.0, 10.0)
153 result.append(b1 + b2)
154 return result
155
156
157def removeJPsiSlash(string: str) -> str:
158 """
159 Remove the / in the J/psi particle name
160 """
161 return string.replace('/', '')
162
163
165
166 """
167 The Particle class is the only class the end-user gets into contact with.
168 The user creates an instance of this class for every particle he wants to reconstruct with the FEI algorithm,
169 and provides MVAConfiguration, PreCutConfiguration and PostCutConfiguration. These can be overwritten per channel.
170 """
171
172 def __init__(self, identifier: str,
173 mvaConfig: MVAConfiguration,
174 preCutConfig: PreCutConfiguration = PreCutConfiguration(),
175 postCutConfig: PostCutConfiguration = PostCutConfiguration()):
176 """
177 Creates a Particle without any decay channels. To add decay channels use addChannel method.
178 @param identifier is the pdg name of the particle as a string
179 with an optional additional user label separated by ':'
180 @param mvaConfig multivariate analysis configuration
181 @param preCutConfig intermediate pre cut configuration
182 @param postCutConfig post cut configuration
183 """
184
185 self.identifier = identifier + ':generic' if len(identifier.split(':')) < 2 else identifier
186 v = self.identifier.split(':')
187
188 self.name = v[0]
189
190 self.label = v[1]
191
192 self.mvaConfig = mvaConfig
193
194 self.channels = []
195
196 self.preCutConfig = preCutConfig
197
198 self.postCutConfig = postCutConfig
199
200 def __eq__(self, a):
201 """
202 Compares to Particle objects.
203 They are equal if their identifier, name, label, all channels, preCutConfig and postCutConfig is equal
204 @param a another Particle object
205 """
206 return (self.identifier == a.identifier and self.name == a.name and self.label == a.label and
207 self.channels == a.channels and self.preCutConfig == a.preCutConfig and self.postCutConfig == a.postCutConfig)
208
209 def __str__(self):
210 """
211 Creates a string representation of a Particle object.
212 """
213 return str((self.identifier, self.channels, self.preCutConfig, self.postCutConfig, self.mvaConfig))
214
215 def __hash__(self):
216 """
217 Creates a hash of a Particle object.
218 This is necessary to use this as a key in a dictionary
219 """
220 return hash((self.identifier, self.channels, self.preCutConfig, self.postCutConfig, self.mvaConfig))
221
222 @property
223 def daughters(self):
224 """ Property returning list of unique daughter particles of all channels """
225 return list(frozenset([daughter for channel in self.channels for daughter in channel.daughters]))
226
227 def addChannel(self,
228 daughters: typing.Sequence[str],
229 mvaConfig: MVAConfiguration = None,
230 preCutConfig: PreCutConfiguration = None,
231 pi0veto: bool = False):
232 """
233 Appends a new decay channel to the Particle object.
234 @param daughters is a list of pdg particle names e.g. ['pi+','K-']
235 @param mvaConfig multivariate analysis configuration
236 @param preCutConfig pre cut configuration object
237 @param pi0veto if true, additional pi0veto variables are added to the MVA configuration
238 """
239 # Append generic label to all defined daughters if no label was set yet
240 daughters = [d + ':generic' if ':' not in d else d for d in daughters]
241 # Use default mvaConfig of this particle if no channel-specific config is given
242 mvaConfig = copy.deepcopy(self.mvaConfig if mvaConfig is None else mvaConfig)
243 # Use default preCutConfig of this particle if no channel-specific config is given
244 preCutConfig = copy.deepcopy(self.preCutConfig if preCutConfig is None else preCutConfig)
245 # At the moment all channels must have the same target variable. Why?
246 if mvaConfig is not None and mvaConfig.target != self.mvaConfig.target:
247 basf2.B2FATAL(
248 f'Particle {self.identifier} has common target {self.mvaConfig.target}, while channel '
249 f'{" ".join(daughters)} has {mvaConfig.target}. Each particle must have exactly one target!')
250 # Replace generic-variables with ordinary variables.
251 # All instances of {} are replaced with all combinations of daughter indices
252 mvaVars = []
253 for v in mvaConfig.variables:
254 if v.count('{') == 0:
255 mvaVars.append(v)
256 continue
257 matches = re.findall(r'\{\s*\d*\s*\.\.\s*\d*\s*\}', v)
258 if len(matches) == 0 and v.count('{}') == 0:
259 mvaVars.append(v)
260 elif v.count('{}') > 0 and len(matches) > 0:
261 basf2.B2FATAL(f'Variable {v} contains both '+'{}'+f' and {matches}. Only one is allowed!')
262 elif len(matches) > 0:
263 ranges = []
264 skip = False
265 for match in matches:
266 tempRange = match[1:-1].split('..')
267 if tempRange[0] == '':
268 tempRange[0] = 0
269 else:
270 tempRange[0] = int(tempRange[0])
271 if tempRange[0] >= len(daughters):
272 basf2.B2DEBUG(11, f'Variable {v} contains index {tempRange[0]} which is more than daughters, skipping!')
273 skip = True
274 break
275 if tempRange[1] == '':
276 tempRange[1] = len(daughters)
277 else:
278 tempRange[1] = int(tempRange[1])
279 if tempRange[1] > len(daughters):
280 basf2.B2DEBUG(11, f'Variable {v} contains index {tempRange[1]} which is more than daughters, skipping!')
281 skip = True
282 break
283 ranges.append(tempRange)
284 if skip:
285 continue
286 if len(ranges) == 1:
287 mvaVars += [v.replace(matches[0], str(c)) for c in range(ranges[0][0], ranges[0][1])]
288 else:
289 for match in matches:
290 v = v.replace(match, '{}')
291 mvaVars += [v.format(*c) for c in itertools.product(*[range(r[0], r[1]) for r in ranges])]
292 elif v.count('{}') <= len(daughters):
293 mvaVars += [v.format(*c) for c in itertools.combinations(list(range(0, len(daughters))), v.count('{}'))]
294 elif v.count('{}') > len(daughters):
295 basf2.B2DEBUG(11, f'Variable {v} contains more brackets than daughters, which is why it will be ignored!')
296 continue
297 else:
298 basf2.B2FATAL(f'Something went wrong with variable {v}!')
299 mvaConfig = mvaConfig._replace(variables=mvaVars)
300 # Add new channel
301 decayModeID = len(self.channels)
302 self.channels.append(DecayChannel(name=self.identifier + '_' + str(decayModeID),
303 label=removeJPsiSlash(self.identifier + ' ==> ' + ' '.join(daughters)),
304 decayString=self.identifier + '_' + str(decayModeID) + ' -> ' + ' '.join(daughters),
305 daughters=daughters,
306 mvaConfig=mvaConfig,
307 preCutConfig=preCutConfig,
308 decayModeID=decayModeID,
309 pi0veto=pi0veto))
310 return self
mvaConfig
multivariate analysis configuration (see MVAConfiguration)
Definition config.py:192
label
Additional label like hasMissing or has2Daughters.
Definition config.py:190
postCutConfig
post cut configuration (see PostCutConfiguration)
Definition config.py:198
preCutConfig
intermediate cut configuration (see PreCutConfiguration)
Definition config.py:196
__init__(self, str identifier, MVAConfiguration mvaConfig, PreCutConfiguration preCutConfig=PreCutConfiguration(), PostCutConfiguration postCutConfig=PostCutConfiguration())
Definition config.py:175
name
The name of the particle as correct pdg name e.g.
Definition config.py:188
list channels
DecayChannel objects added by addChannel() method.
Definition config.py:194
addChannel(self, typing.Sequence[str] daughters, MVAConfiguration mvaConfig=None, PreCutConfiguration preCutConfig=None, bool pi0veto=False)
Definition config.py:231
__eq__(self, a)
Definition config.py:200
str identifier
pdg name of the particle with an optional additional user label separated by :
Definition config.py:185