Belle II Software development
config.py
1#!/usr/bin/env python3
2
3
10
11# @cond SUPPRESS_DOXYGEN
12
13"""
14 Configuration classes
15
16 The classes defined here are used to uniquely define a FEI training.
17 Meaning:
18 - The global configuration like database prefix, cache mode, monitoring, ... (FeiConfiguration)
19 - The reconstructed Particles (Particle)
20 - The reconstructed Channels of each particle (DecayChannel)
21 - The MVA configuration for each channel (MVAConfiguration)
22 - The Cut definitions of each channel (PreCutConfiguration)
23 - The Cut definitions of each particle (PostCutConfiguration)
24"""
25
26import collections
27import copy
28import itertools
29import typing
30import basf2
31
32# Define classes at top level to make them pickable
33# Creates new class via namedtuple, which are like a struct in C
34
35FeiConfiguration = collections.namedtuple('FeiConfiguration', 'prefix, cache, monitor, legacy, externTeacher, training')
36FeiConfiguration.__new__.__defaults__ = ('FEI_TEST', None, True, None, 'basf2_mva_teacher', False)
37FeiConfiguration.__doc__ = "Fei Global Configuration class"
38FeiConfiguration.prefix.__doc__ = "The database prefix used for all weight files"
39FeiConfiguration.cache.__doc__ = "The stage which is passed as input, it is assumed that all previous stages"\
40 " do not have to be reconstructed again. Can be either a number or"\
41 " a filename containing a pickled number or"\
42 " None in this case the environment variable FEI_STAGE is used."
43FeiConfiguration.monitor.__doc__ = "If true, monitor histograms are created"
44FeiConfiguration.legacy.__doc__ = "Pass the summary file of a legacy FEI training,"\
45 " and the algorithm will be able to apply this training."
46FeiConfiguration.externTeacher.__doc__ = "Teacher command e.g. basf2_mva_teacher, b2mva-kekcc-cluster-teacher"
47FeiConfiguration.training.__doc__ = "If you train the FEI set this to True, otherwise to False"
48
49
50MVAConfiguration = collections.namedtuple('MVAConfiguration', 'method, config, variables, target, sPlotVariable')
51MVAConfiguration.__new__.__defaults__ = ('FastBDT',
52 '--nTrees 400 --nCutLevels 10 --nLevels 3 --shrinkage 0.1 --randRatio 0.5',
53 None, 'isSignal', None)
54MVAConfiguration.__doc__ = "Multivariate analysis configuration class."
55MVAConfiguration.method.__doc__ = "Method used by MVAInterface."
56MVAConfiguration.config.__doc__ = "Method specific configuration string passed to basf2_mva_teacher"
57MVAConfiguration.variables.__doc__ = "List of variables from the VariableManager."\
58 " {} is expanded to one variable per daughter particle."
59MVAConfiguration.target.__doc__ = "Target variable from the VariableManager."
60MVAConfiguration.sPlotVariable.__doc__ = "Discriminating variable used by sPlot to do data-driven training."
61
62
63PreCutConfiguration = collections.namedtuple('PreCutConfiguration', 'userCut, vertexCut, noBackgroundSampling,'
64 'bestCandidateVariable, bestCandidateCut, bestCandidateMode')
65PreCutConfiguration.__new__.__defaults__ = ('', -2, False, None, 0, 'lowest')
66PreCutConfiguration.__doc__ = "PreCut configuration class. These cuts is employed before training the mva classifier."
67PreCutConfiguration.userCut.__doc__ = "The user cut is passed directly to the ParticleCombiner."\
68 " Particles which do not pass this cut are immediately discarded."
69PreCutConfiguration.vertexCut.__doc__ = "The vertex cut is passed as confidence level to the VertexFitter."
70PreCutConfiguration.noBackgroundSampling.__doc__ = "For very pure channels, the background sampling factor is too high" \
71 " and the MVA can't be trained. This disables background sampling."
72PreCutConfiguration.bestCandidateVariable.__doc__ = "Variable from the VariableManager which is used to rank all candidates."
73PreCutConfiguration.bestCandidateMode.__doc__ = "Either lowest or highest."
74PreCutConfiguration.bestCandidateCut.__doc__ = "Number of best-candidates to keep after the best-candidate ranking."
75
76PostCutConfiguration = collections.namedtuple('PostCutConfiguration', 'value, bestCandidateCut')
77PostCutConfiguration.__new__.__defaults__ = (0.0, 0)
78PostCutConfiguration.__doc__ = "PostCut configuration class. This cut is employed after the training of the mva classifier."
79PostCutConfiguration.value.__doc__ = "Absolute value used to cut on the SignalProbability of each candidate."
80PostCutConfiguration.bestCandidateCut.__doc__ = "Number of best-candidates to keep, ranked by SignalProbability."
81
82DecayChannel = collections.namedtuple('DecayChannel', 'name, label, decayString, daughters, mvaConfig, preCutConfig, decayModeID')
83DecayChannel.__new__.__defaults__ = (None, None, None, None, None, None, None)
84DecayChannel.__doc__ = "Decay channel of a Particle."
85DecayChannel.name.__doc__ = "str:Name of the channel e.g. :code:`D0:generic_0`"
86DecayChannel.label.__doc__ = "Label used to identify the decay channel e.g. for weight files independent of decayModeID"
87DecayChannel.decayString.__doc__ = "DecayDescriptor of the channel e.g. D0 -> K+ pi-"
88DecayChannel.daughters.__doc__ = "List of daughter particles of the decay channel e.g. [K+, pi-]"
89DecayChannel.mvaConfig.__doc__ = "MVAConfiguration object which is used for this channel."
90DecayChannel.preCutConfig.__doc__ = "PreCutConfiguration object which is used for this channel."
91DecayChannel.decayModeID.__doc__ = "DecayModeID of this channel. Unique ID for each channel of this particle."
92
93
94MonitoringVariableBinning = {'mcErrors': ('mcErrors', 513, -0.5, 512.5),
95 'mcParticleStatus': ('mcParticleStatus', 257, -0.5, 256.5),
96 'dM': ('dM', 100, -1.0, 1.0),
97 'dQ': ('dQ', 100, -1.0, 1.0),
98 'abs(dM)': ('abs(dM)', 100, 0.0, 1.0),
99 'abs(dQ)': ('abs(dQ)', 100, 0.0, 1.0),
100 'pionID': ('pionID', 100, 0.0, 1.0),
101 'kaonID': ('kaonID', 100, 0.0, 1.0),
102 'protonID': ('protonID', 100, 0.0, 1.0),
103 'electronID': ('electronID', 100, 0.0, 1.0),
104 'muonID': ('muonID', 100, 0.0, 1.0),
105 'isSignal': ('isSignal', 2, -0.5, 1.5),
106 'isSignalAcceptMissingNeutrino': ('isSignalAcceptMissingNeutrino', 2, -0.5, 1.5),
107 'isPrimarySignal': ('isPrimarySignal', 2, -0.5, 1.5),
108 'chiProb': ('chiProb', 100, 0.0, 1.0),
109 'Mbc': ('Mbc', 100, 5.1, 5.4),
110 'cosThetaBetweenParticleAndNominalB': ('cosThetaBetweenParticleAndNominalB', 100, -10.0, 10.0),
111 'extraInfo(SignalProbability)': ('extraInfo(SignalProbability)', 100, 0.0, 1.0),
112 'extraInfo(decayModeID)': ('extraInfo(decayModeID)', 101, -0.5, 100.5),
113 'extraInfo(uniqueSignal)': ('extraInfo(uniqueSignal)', 2, -0.5, 1.5),
114 'extraInfo(preCut_rank)': ('extraInfo(preCut_rank)', 41, -0.5, 40.5),
115 'extraInfo(postCut_rank)': ('extraInfo(postCut_rank)', 41, -0.5, 40.5),
116 'daughterProductOf(extraInfo(SignalProbability))':
117 ('daughterProductOf(extraInfo(SignalProbability))', 100, 0.0, 1.0),
118 }
119
120
121def variables2binnings(variables):
122 """
123 Convert given variables into a tuples which can be given to VariableToHistogram
124 """
125 return [MonitoringVariableBinning[v] if v in MonitoringVariableBinning else (v, 100, -10.0, 10.0) for v in variables]
126
127
128def variables2binnings_2d(variables):
129 """
130 Convert given variables into a tuples which can be given to VariableToHistogram
131 """
132 result = []
133 for v1, v2 in variables:
134 b1 = MonitoringVariableBinning[v1] if v1 in MonitoringVariableBinning else (v1, 100, -10.0, 10.0)
135 b2 = MonitoringVariableBinning[v2] if v2 in MonitoringVariableBinning else (v2, 100, -10.0, 10.0)
136 result.append(b1 + b2)
137 return result
138
139
140def removeJPsiSlash(string: str) -> str:
141 """
142 Remove the / in the J/psi particle name
143 """
144 return string.replace('/', '')
145
146
147class Particle:
148
149 """
150 The Particle class is the only class the end-user gets into contact with.
151 The user creates an instance of this class for every particle he wants to reconstruct with the FEI algorithm, and provides MVAConfiguration, PreCutConfiguration and PostCutConfiguration. These can be overwritten per channel.
152 """
153
154 def __init__(self, identifier: str,
155 mvaConfig: MVAConfiguration,
156 preCutConfig: PreCutConfiguration = PreCutConfiguration(),
157 postCutConfig: PostCutConfiguration = PostCutConfiguration()):
158 """
159 Creates a Particle without any decay channels. To add decay channels use addChannel method.
160 @param identifier is the pdg name of the particle as a string
161 with an optional additional user label separated by ':'
162 @param mvaConfig multivariate analysis configuration
163 @param preCutConfig intermediate pre cut configuration
164 @param postCutConfig post cut configuration
165 """
166
167 self.identifier = identifier + ':generic' if len(identifier.split(':')) < 2 else identifier
168 v = self.identifier.split(':')
169
170 self.name = v[0]
171
172 self.label = v[1]
173
174 self.mvaConfig = mvaConfig
175
176 self.channels = []
177
178 self.preCutConfig = preCutConfig
179
180 self.postCutConfig = postCutConfig
181
182 def __eq__(self, a):
183 """
184 Compares to Particle objects.
185 They are equal if their identifier, name, label, all channels, preCutConfig and postCutConfig is equal
186 @param a another Particle object
187 """
188 return (self.identifier == a.identifier and self.name == a.name and self.label == a.label and
189 self.channels == a.channels and self.preCutConfig == a.preCutConfig and self.postCutConfig == a.postCutConfig)
190
191 def __str__(self):
192 """
193 Creates a string representation of a Particle object.
194 """
195 return str((self.identifier, self.channels, self.preCutConfig, self.postCutConfig, self.mvaConfig))
196
197 def __hash__(self):
198 """
199 Creates a hash of a Particle object.
200 This is necessary to use this as a key in a dictionary
201 """
202 return hash((self.identifier, self.channels, self.preCutConfig, self.postCutConfig, self.mvaConfig))
203
204 @property
205 def daughters(self):
206 """ Property returning list of unique daughter particles of all channels """
207 return list(frozenset([daughter for channel in self.channels for daughter in channel.daughters]))
208
209 def addChannel(self,
210 daughters: typing.Sequence[str],
211 mvaConfig: MVAConfiguration = None,
212 preCutConfig: PreCutConfiguration = None):
213 """
214 Appends a new decay channel to the Particle object.
215 @param daughters is a list of pdg particle names e.g. ['pi+','K-']
216 @param mvaConfig multivariate analysis configuration
217 @param preCutConfig pre cut configuration object
218 """
219 # Append generic label to all defined daughters if no label was set yet
220 daughters = [d + ':generic' if ':' not in d else d for d in daughters]
221 # Use default mvaConfig of this particle if no channel-specific config is given
222 mvaConfig = copy.deepcopy(self.mvaConfig if mvaConfig is None else mvaConfig)
223 # Use default preCutConfig of this particle if no channel-specific config is given
224 preCutConfig = copy.deepcopy(self.preCutConfig if preCutConfig is None else preCutConfig)
225 # At the moment all channels must have the same target variable. Why?
226 if mvaConfig is not None and mvaConfig.target != self.mvaConfig.target:
227 basf2.B2FATAL(
228 f'Particle {self.identifier} has common target {self.mvaConfig.target}, while channel '
229 f'{" ".join(daughters)} has {mvaConfig.target}. Each particle must have exactly one target!')
230 # Replace generic-variables with ordinary variables.
231 # All instances of {} are replaced with all combinations of daughter indices
232 mvaVars = []
233 for v in mvaConfig.variables:
234 if v.count('{}') <= len(daughters):
235 mvaVars += [v.format(*c) for c in itertools.combinations(list(range(0, len(daughters))), v.count('{}'))]
236 mvaConfig = mvaConfig._replace(variables=mvaVars)
237 # Add new channel
238 decayModeID = len(self.channels)
239 self.channels.append(DecayChannel(name=self.identifier + '_' + str(decayModeID),
240 label=removeJPsiSlash(self.identifier + ' ==> ' + ' '.join(daughters)),
241 decayString=self.identifier + '_' + str(decayModeID) + ' -> ' + ' '.join(daughters),
242 daughters=daughters,
243 mvaConfig=mvaConfig,
244 preCutConfig=preCutConfig,
245 decayModeID=decayModeID))
246 return self
247
248# @endcond
249