Belle II Software  release-08-01-10
config.py
1 #!/usr/bin/env python3
2 
3 
10 
11 # @cond SUPPRESS_DOXYGEN
12 
13 """
14  Configuration classes
15 
16  The classes defined here are used to uniquely define a FEI training.
17  Meaning:
18  - The global configuration like database prefix, cache mode, monitoring, ... (FeiConfiguration)
19  - The reconstructed Particles (Particle)
20  - The reconstructed Channels of each particle (DecayChannel)
21  - The MVA configuration for each channel (MVAConfiguration)
22  - The Cut definitions of each channel (PreCutConfiguration)
23  - The Cut definitions of each particle (PostCutConfiguration)
24 """
25 
26 import collections
27 import copy
28 import itertools
29 import typing
30 import basf2
31 
32 # Define classes at top level to make them pickable
33 # Creates new class via namedtuple, which are like a struct in C
34 
35 FeiConfiguration = collections.namedtuple('FeiConfiguration', 'prefix, cache, monitor, legacy, externTeacher, training')
36 FeiConfiguration.__new__.__defaults__ = ('FEI_TEST', None, True, None, 'basf2_mva_teacher', False)
37 FeiConfiguration.__doc__ = "Fei Global Configuration class"
38 FeiConfiguration.prefix.__doc__ = "The database prefix used for all weight files"
39 FeiConfiguration.cache.__doc__ = "The stage which is passed as input, it is assumed that all previous stages"\
40  " do not have to be reconstructed again. Can be either a number or"\
41  " a filename containing a pickled number or"\
42  " None in this case the environment variable FEI_STAGE is used."
43 FeiConfiguration.monitor.__doc__ = "If true, monitor histograms are created"
44 FeiConfiguration.legacy.__doc__ = "Pass the summary file of a legacy FEI training,"\
45  " and the algorithm will be able to apply this training."
46 FeiConfiguration.externTeacher.__doc__ = "Teacher command e.g. basf2_mva_teacher, b2mva-kekcc-cluster-teacher"
47 FeiConfiguration.training.__doc__ = "If you train the FEI set this to True, otherwise to False"
48 
49 
50 MVAConfiguration = collections.namedtuple('MVAConfiguration', 'method, config, variables, target, sPlotVariable')
51 MVAConfiguration.__new__.__defaults__ = ('FastBDT',
52  '--nTrees 400 --nCutLevels 10 --nLevels 3 --shrinkage 0.1 --randRatio 0.5',
53  None, 'isSignal', None)
54 MVAConfiguration.__doc__ = "Multivariate analysis configuration class."
55 MVAConfiguration.method.__doc__ = "Method used by MVAInterface."
56 MVAConfiguration.config.__doc__ = "Method specific configuration string passed to basf2_mva_teacher"
57 MVAConfiguration.variables.__doc__ = "List of variables from the VariableManager."\
58  " {} is expanded to one variable per daughter particle."
59 MVAConfiguration.target.__doc__ = "Target variable from the VariableManager."
60 MVAConfiguration.sPlotVariable.__doc__ = "Discriminating variable used by sPlot to do data-driven training."
61 
62 
63 PreCutConfiguration = collections.namedtuple('PreCutConfiguration', 'userCut, vertexCut, noBackgroundSampling,'
64  'bestCandidateVariable, bestCandidateCut, bestCandidateMode')
65 PreCutConfiguration.__new__.__defaults__ = ('', -2, False, None, 0, 'lowest')
66 PreCutConfiguration.__doc__ = "PreCut configuration class. These cuts is employed before training the mva classifier."
67 PreCutConfiguration.userCut.__doc__ = "The user cut is passed directly to the ParticleCombiner."\
68  " Particles which do not pass this cut are immediately discarded."
69 PreCutConfiguration.vertexCut.__doc__ = "The vertex cut is passed as confidence level to the VertexFitter."
70 PreCutConfiguration.noBackgroundSampling.__doc__ = "For very pure channels, the background sampling factor is too high" \
71  " and the MVA can't be trained. This disables background sampling."
72 PreCutConfiguration.bestCandidateVariable.__doc__ = "Variable from the VariableManager which is used to rank all candidates."
73 PreCutConfiguration.bestCandidateMode.__doc__ = "Either lowest or highest."
74 PreCutConfiguration.bestCandidateCut.__doc__ = "Number of best-candidates to keep after the best-candidate ranking."
75 
76 PostCutConfiguration = collections.namedtuple('PostCutConfiguration', 'value, bestCandidateCut')
77 PostCutConfiguration.__new__.__defaults__ = (0.0, 0)
78 PostCutConfiguration.__doc__ = "PostCut configuration class. This cut is employed after the training of the mva classifier."
79 PostCutConfiguration.value.__doc__ = "Absolute value used to cut on the SignalProbability of each candidate."
80 PostCutConfiguration.bestCandidateCut.__doc__ = "Number of best-candidates to keep, ranked by SignalProbability."
81 
82 DecayChannel = collections.namedtuple('DecayChannel', 'name, label, decayString, daughters, mvaConfig, preCutConfig, decayModeID')
83 DecayChannel.__new__.__defaults__ = (None, None, None, None, None, None, None)
84 DecayChannel.__doc__ = "Decay channel of a Particle."
85 DecayChannel.name.__doc__ = "str:Name of the channel e.g. :code:`D0:generic_0`"
86 DecayChannel.label.__doc__ = "Label used to identify the decay channel e.g. for weight files independent of decayModeID"
87 DecayChannel.decayString.__doc__ = "DecayDescriptor of the channel e.g. D0 -> K+ pi-"
88 DecayChannel.daughters.__doc__ = "List of daughter particles of the decay channel e.g. [K+, pi-]"
89 DecayChannel.mvaConfig.__doc__ = "MVAConfiguration object which is used for this channel."
90 DecayChannel.preCutConfig.__doc__ = "PreCutConfiguration object which is used for this channel."
91 DecayChannel.decayModeID.__doc__ = "DecayModeID of this channel. Unique ID for each channel of this particle."
92 
93 
94 MonitoringVariableBinning = {'mcErrors': ('mcErrors', 513, -0.5, 512.5),
95  'mcParticleStatus': ('mcParticleStatus', 257, -0.5, 256.5),
96  'dM': ('dM', 100, -1.0, 1.0),
97  'dQ': ('dQ', 100, -1.0, 1.0),
98  'abs(dM)': ('abs(dM)', 100, 0.0, 1.0),
99  'abs(dQ)': ('abs(dQ)', 100, 0.0, 1.0),
100  'pionID': ('pionID', 100, 0.0, 1.0),
101  'kaonID': ('kaonID', 100, 0.0, 1.0),
102  'protonID': ('protonID', 100, 0.0, 1.0),
103  'electronID': ('electronID', 100, 0.0, 1.0),
104  'muonID': ('muonID', 100, 0.0, 1.0),
105  'isSignal': ('isSignal', 2, -0.5, 1.5),
106  'isSignalAcceptMissingNeutrino': ('isSignalAcceptMissingNeutrino', 2, -0.5, 1.5),
107  'isPrimarySignal': ('isPrimarySignal', 2, -0.5, 1.5),
108  'chiProb': ('chiProb', 100, 0.0, 1.0),
109  'Mbc': ('Mbc', 100, 5.1, 5.4),
110  'cosThetaBetweenParticleAndNominalB': ('cosThetaBetweenParticleAndNominalB', 100, -10.0, 10.0),
111  'extraInfo(SignalProbability)': ('extraInfo(SignalProbability)', 100, 0.0, 1.0),
112  'extraInfo(decayModeID)': ('extraInfo(decayModeID)', 101, -0.5, 100.5),
113  'extraInfo(uniqueSignal)': ('extraInfo(uniqueSignal)', 2, -0.5, 1.5),
114  'extraInfo(preCut_rank)': ('extraInfo(preCut_rank)', 41, -0.5, 40.5),
115  'extraInfo(postCut_rank)': ('extraInfo(postCut_rank)', 41, -0.5, 40.5),
116  'daughterProductOf(extraInfo(SignalProbability))':
117  ('daughterProductOf(extraInfo(SignalProbability))', 100, 0.0, 1.0),
118  }
119 
120 
121 def variables2binnings(variables):
122  """
123  Convert given variables into a tuples which can be given to VariableToHistogram
124  """
125  return [MonitoringVariableBinning[v] if v in MonitoringVariableBinning else (v, 100, -10.0, 10.0) for v in variables]
126 
127 
128 def variables2binnings_2d(variables):
129  """
130  Convert given variables into a tuples which can be given to VariableToHistogram
131  """
132  result = []
133  for v1, v2 in variables:
134  b1 = MonitoringVariableBinning[v1] if v1 in MonitoringVariableBinning else (v1, 100, -10.0, 10.0)
135  b2 = MonitoringVariableBinning[v2] if v2 in MonitoringVariableBinning else (v2, 100, -10.0, 10.0)
136  result.append(b1 + b2)
137  return result
138 
139 
140 def removeJPsiSlash(string: str) -> str:
141  """
142  Remove the / in the J/psi particle name
143  """
144  return string.replace('/', '')
145 
146 
147 class Particle:
148 
149  """
150  The Particle class is the only class the end-user gets into contact with.
151  The user creates an instance of this class for every particle he wants to reconstruct with the FEI algorithm,
152  and provides MVAConfiguration, PreCutConfiguration and PostCutConfiguration. These can be overwritten per channel.
153  """
154 
155  def __init__(self, identifier: str,
156  mvaConfig: MVAConfiguration,
157  preCutConfig: PreCutConfiguration = PreCutConfiguration(),
158  postCutConfig: PostCutConfiguration = PostCutConfiguration()):
159  """
160  Creates a Particle without any decay channels. To add decay channels use addChannel method.
161  @param identifier is the pdg name of the particle as a string
162  with an optional additional user label separated by ':'
163  @param mvaConfig multivariate analysis configuration
164  @param preCutConfig intermediate pre cut configuration
165  @param postCutConfig post cut configuration
166  """
167 
168  self.identifier = identifier + ':generic' if len(identifier.split(':')) < 2 else identifier
169  v = self.identifier.split(':')
170 
171  self.name = v[0]
172 
173  self.label = v[1]
174 
175  self.mvaConfig = mvaConfig
176 
177  self.channels = []
178 
179  self.preCutConfig = preCutConfig
180 
181  self.postCutConfig = postCutConfig
182 
183  def __eq__(self, a):
184  """
185  Compares to Particle objects.
186  They are equal if their identifier, name, label, all channels, preCutConfig and postCutConfig is equal
187  @param a another Particle object
188  """
189  return (self.identifier == a.identifier and self.name == a.name and self.label == a.label and
190  self.channels == a.channels and self.preCutConfig == a.preCutConfig and self.postCutConfig == a.postCutConfig)
191 
192  def __str__(self):
193  """
194  Creates a string representation of a Particle object.
195  """
196  return str((self.identifier, self.channels, self.preCutConfig, self.postCutConfig, self.mvaConfig))
197 
198  def __hash__(self):
199  """
200  Creates a hash of a Particle object.
201  This is necessary to use this as a key in a dictionary
202  """
203  return hash((self.identifier, self.channels, self.preCutConfig, self.postCutConfig, self.mvaConfig))
204 
205  @property
206  def daughters(self):
207  """ Property returning list of unique daughter particles of all channels """
208  return list(frozenset([daughter for channel in self.channels for daughter in channel.daughters]))
209 
210  def addChannel(self,
211  daughters: typing.Sequence[str],
212  mvaConfig: MVAConfiguration = None,
213  preCutConfig: PreCutConfiguration = None):
214  """
215  Appends a new decay channel to the Particle object.
216  @param daughters is a list of pdg particle names e.g. ['pi+','K-']
217  @param mvaConfig multivariate analysis configuration
218  @param preCutConfig pre cut configuration object
219  """
220  # Append generic label to all defined daughters if no label was set yet
221  daughters = [d + ':generic' if ':' not in d else d for d in daughters]
222  # Use default mvaConfig of this particle if no channel-specific config is given
223  mvaConfig = copy.deepcopy(self.mvaConfig if mvaConfig is None else mvaConfig)
224  # Use default preCutConfig of this particle if no channel-specific config is given
225  preCutConfig = copy.deepcopy(self.preCutConfig if preCutConfig is None else preCutConfig)
226  # At the moment all channels must have the same target variable. Why?
227  if mvaConfig is not None and mvaConfig.target != self.mvaConfig.target:
228  basf2.B2FATAL(
229  f'Particle {self.identifier} has common target {self.mvaConfig.target}, while channel '
230  f'{" ".join(daughters)} has {mvaConfig.target}. Each particle must have exactly one target!')
231  # Replace generic-variables with ordinary variables.
232  # All instances of {} are replaced with all combinations of daughter indices
233  mvaVars = []
234  for v in mvaConfig.variables:
235  if v.count('{}') <= len(daughters):
236  mvaVars += [v.format(*c) for c in itertools.combinations(list(range(0, len(daughters))), v.count('{}'))]
237  mvaConfig = mvaConfig._replace(variables=mvaVars)
238  # Add new channel
239  decayModeID = len(self.channels)
240  self.channels.append(DecayChannel(name=self.identifier + '_' + str(decayModeID),
241  label=removeJPsiSlash(self.identifier + ' ==> ' + ' '.join(daughters)),
242  decayString=self.identifier + '_' + str(decayModeID) + ' -> ' + ' '.join(daughters),
243  daughters=daughters,
244  mvaConfig=mvaConfig,
245  preCutConfig=preCutConfig,
246  decayModeID=decayModeID))
247  return self
248 
249 # @endcond