Belle II Software  release-05-01-25
config.py
1 #!/usr/bin/env python3
2 # -*- coding: utf-8 -*-
3 
4 # @cond SUPPRESS_DOXYGEN
5 
6 # Thomas Keck 2016
7 
8 """
9  Configuration classes
10 
11  The classes defined here are used to uniquely define a FEI training.
12  Meaning:
13  - The global configuration like database prefix, cache mode, monitoring, ... (FeiConfiguration)
14  - The reconstructed Particles (Particle)
15  - The reconstructed Channels of each particle (DecayChannel)
16  - The MVA configuration for each channel (MVAConfiguration)
17  - The Cut definitions of each channel (PreCutConfiguration)
18  - The Cut definitions of each particle (PostCutConfiguration)
19 """
20 
21 import collections
22 import copy
23 import itertools
24 import typing
25 import basf2
26 
27 # Define classes at top level to make them pickable
28 # Creates new class via namedtuple, which are like a struct in C
29 
30 FeiConfiguration = collections.namedtuple('FeiConfiguration', 'prefix, cache, monitor, legacy, externTeacher, training')
31 FeiConfiguration.__new__.__defaults__ = ('FEI_TEST', None, True, None, 'basf2_mva_teacher', False)
32 FeiConfiguration.__doc__ = "Fei Global Configuration class"
33 FeiConfiguration.prefix.__doc__ = "The database prefix used for all weight files"
34 FeiConfiguration.cache.__doc__ = "The stage which is passed as input, it is assumed that all previous stages"\
35  "do not have to be reconstructed again. Can be either a number or"\
36  " a filename containing a pickled number or"\
37  " None in this case the environment variable FEI_STAGE is used."
38 FeiConfiguration.monitor.__doc__ = "If true, monitor histograms are created"
39 FeiConfiguration.legacy.__doc__ = "Pass the summary file of a legacy FEI training,"\
40  "and the algorithm will be able to apply this training."
41 FeiConfiguration.externTeacher.__doc__ = "Teacher command e.g. basf2_mva_teacher, externClusterTeacher"
42 FeiConfiguration.training.__doc__ = "If you train the FEI set this to True, otherwise to False"
43 
44 
45 MVAConfiguration = collections.namedtuple('MVAConfiguration', 'method, config, variables, target, sPlotVariable')
46 MVAConfiguration.__new__.__defaults__ = ('FastBDT',
47  '--nTrees 400 --nCutLevels 10 --nLevels 3 --shrinkage 0.1 --randRatio 0.5',
48  None, 'isSignal', None)
49 MVAConfiguration.__doc__ = "Multivariate analysis configuration class."
50 MVAConfiguration.method.__doc__ = "Method used by MVAInterface."
51 MVAConfiguration.config.__doc__ = "Method specific configuration string passed to basf2_mva_teacher"
52 MVAConfiguration.variables.__doc__ = "List of variables from the VariableManager."\
53  "{} is expanded to one variable per daughter particle."
54 MVAConfiguration.target.__doc__ = "Target variable from the VariableManager."
55 MVAConfiguration.sPlotVariable.__doc__ = "Discriminating variable used by sPlot to do data-driven training."
56 
57 
58 PreCutConfiguration = collections.namedtuple('PreCutConfiguration', 'userCut, vertexCut, noBackgroundSampling,'
59  'bestCandidateVariable, bestCandidateCut, bestCandidateMode')
60 PreCutConfiguration.__new__.__defaults__ = ('', -2, False, None, 0, 'lowest')
61 PreCutConfiguration.__doc__ = "PreCut configuration class. These cuts is employed before training the mva classifier."
62 PreCutConfiguration.userCut.__doc__ = "The user cut is passed directly to the ParticleCombiner."\
63  "Particles which do not pass this cut are immediately discarded."
64 PreCutConfiguration.vertexCut.__doc__ = "The vertex cut is passed as confidence level to the VertexFitter."
65 PreCutConfiguration.noBackgroundSampling.__doc__ = "For very pure channels, the background sampling factor is too high" \
66  " and the MVA can't be trained. This disables background sampling."
67 PreCutConfiguration.bestCandidateVariable.__doc__ = "Variable from the VariableManager which is used to rank all candidates."
68 PreCutConfiguration.bestCandidateMode.__doc__ = "Either lowest or highest."
69 PreCutConfiguration.bestCandidateCut.__doc__ = "Number of best-candidates to keep after the best-candidate ranking."
70 
71 PostCutConfiguration = collections.namedtuple('PostCutConfiguration', 'value, bestCandidateCut')
72 PostCutConfiguration.__new__.__defaults__ = (0.0, 0)
73 PostCutConfiguration.__doc__ = "PostCut configuration class. This cut is employed after the training of the mva classifier."
74 PostCutConfiguration.value.__doc__ = "Absolute value used to cut on the SignalProbability of each candidate."
75 PostCutConfiguration.bestCandidateCut.__doc__ = "Number of best-candidates to keep, ranked by SignalProbability."
76 
77 DecayChannel = collections.namedtuple('DecayChannel', 'name, label, decayString, daughters, mvaConfig, preCutConfig, decayModeID')
78 DecayChannel.__new__.__defaults__ = (None, None, None, None, None, None, None)
79 DecayChannel.__doc__ = "Decay channel of a Particle."
80 DecayChannel.name.__doc__ = "str:Name of the channel e.g. :code:`D0:generic_0`"
81 DecayChannel.label.__doc__ = "Label used to identify the decay channel e.g. for weight files independent of decayModeID"
82 DecayChannel.decayString.__doc__ = "DecayDescriptor of the channel e.g. D0 -> K+ pi-"
83 DecayChannel.daughters.__doc__ = "List of daughter particles of the decay channel e.g. [K+, pi-]"
84 DecayChannel.mvaConfig.__doc__ = "MVAConfiguration object which is used for this channel."
85 DecayChannel.preCutConfig.__doc__ = "PreCutConfiguration object which is used for this channel."
86 DecayChannel.decayModeID.__doc__ = "DecayModeID of this channel. Unique ID for each channel of this particle."
87 
88 
89 MonitoringVariableBinning = {'mcErrors': ('mcErrors', 513, -0.5, 512.5),
90  'mcParticleStatus': ('mcParticleStatus', 257, -0.5, 256.5),
91  'dM': ('dM', 100, -1.0, 1.0),
92  'dQ': ('dQ', 100, -1.0, 1.0),
93  'abs(dM)': ('abs(dM)', 100, 0.0, 1.0),
94  'abs(dQ)': ('abs(dQ)', 100, 0.0, 1.0),
95  'pionID': ('pionID', 100, 0.0, 1.0),
96  'kaonID': ('kaonID', 100, 0.0, 1.0),
97  'protonID': ('protonID', 100, 0.0, 1.0),
98  'electronID': ('electronID', 100, 0.0, 1.0),
99  'muonID': ('muonID', 100, 0.0, 1.0),
100  'isSignal': ('isSignal', 2, -0.5, 1.5),
101  'isSignalAcceptMissingNeutrino': ('isSignalAcceptMissingNeutrino', 2, -0.5, 1.5),
102  'isPrimarySignal': ('isPrimarySignal', 2, -0.5, 1.5),
103  'chiProb': ('chiProb', 100, 0.0, 1.0),
104  'Mbc': ('Mbc', 100, 5.1, 5.4),
105  'cosThetaBetweenParticleAndNominalB': ('cosThetaBetweenParticleAndNominalB', 100, -10.0, 10.0),
106  'extraInfo(SignalProbability)': ('extraInfo(SignalProbability)', 100, 0.0, 1.0),
107  'extraInfo(decayModeID)': ('extraInfo(decayModeID)', 101, -0.5, 100.5),
108  'extraInfo(uniqueSignal)': ('extraInfo(uniqueSignal)', 2, -0.5, 1.5),
109  'extraInfo(preCut_rank)': ('extraInfo(preCut_rank)', 41, -0.5, 40.5),
110  'extraInfo(postCut_rank)': ('extraInfo(postCut_rank)', 41, -0.5, 40.5),
111  'daughterProductOf(extraInfo(SignalProbability))':
112  ('daughterProductOf(extraInfo(SignalProbability))', 100, 0.0, 1.0),
113  }
114 
115 
116 def variables2binnings(variables):
117  """
118  Convert given variables into a tuples which can be given to VariableToHistogram
119  """
120  return [MonitoringVariableBinning[v] if v in MonitoringVariableBinning else (v, 100, -10.0, 10.0) for v in variables]
121 
122 
123 def variables2binnings_2d(variables):
124  """
125  Convert given variables into a tuples which can be given to VariableToHistogram
126  """
127  result = []
128  for v1, v2 in variables:
129  b1 = MonitoringVariableBinning[v1] if v1 in MonitoringVariableBinning else (v1, 100, -10.0, 10.0)
130  b2 = MonitoringVariableBinning[v2] if v2 in MonitoringVariableBinning else (v2, 100, -10.0, 10.0)
131  result.append(b1 + b2)
132  return result
133 
134 
135 def removeJPsiSlash(string: str) -> str:
136  """
137  Remove the / in the J/psi particle name
138  """
139  return string.replace('/', '')
140 
141 
142 class Particle(object):
143 
144  """
145  The Particle class is the only class the end-user gets into contact with.
146  The user creates an instance of this class for every particle he wants to reconstruct with the FEI algorithm,
147  and provides MVAConfiguration, PreCutConfiguration and PostCutConfiguration. These can be overwritten per channel.
148  """
149 
150  def __init__(self, identifier: str,
151  mvaConfig: MVAConfiguration,
152  preCutConfig: PreCutConfiguration = PreCutConfiguration(),
153  postCutConfig: PostCutConfiguration = PostCutConfiguration()):
154  """
155  Creates a Particle without any decay channels. To add decay channels use addChannel method.
156  @param identifier is the pdg name of the particle as a string
157  with an optional additional user label separated by ':'
158  @param mvaConfig multivariate analysis configuration
159  @param preCutConfig intermediate pre cut configuration
160  @param postCutConfig post cut configuration
161  """
162 
163  self.identifier = identifier + ':generic' if len(identifier.split(':')) < 2 else identifier
164  v = self.identifier.split(':')
165 
166  self.name = v[0]
167 
168  self.label = v[1]
169 
170  self.mvaConfig = mvaConfig
171 
172  self.channels = []
173 
174  self.preCutConfig = preCutConfig
175 
176  self.postCutConfig = postCutConfig
177 
178  def __eq__(self, a):
179  """
180  Compares to Particle objects.
181  They are equal if their identifier, name, label, all channels, preCutConfig and postCutConfig is equal
182  @param a another Particle object
183  """
184  return (self.identifier == a.identifier and self.name == a.name and self.label == a.label and
185  self.channels == a.channels and self.preCutConfig == a.preCutConfig and self.postCutConfig == a.postCutConfig)
186 
187  def __str__(self):
188  """
189  Creates a string representation of a Particle object.
190  """
191  return str((self.identifier, self.channels, self.preCutConfig, self.postCutConfig, self.mvaConfig))
192 
193  def __hash__(self):
194  """
195  Creates a hash of a Particle object.
196  This is necessary to use this as a key in a dictionary
197  """
198  return hash((self.identifier, self.channels, self.preCutConfig, self.postCutConfig, self.mvaConfig))
199 
200  @property
201  def daughters(self):
202  """ Property returning list of unique daughter particles of all channels """
203  return list(frozenset([daughter for channel in self.channels for daughter in channel.daughters]))
204 
205  def addChannel(self,
206  daughters: typing.Sequence[str],
207  mvaConfig: MVAConfiguration = None,
208  preCutConfig: PreCutConfiguration = None):
209  """
210  Appends a new decay channel to the Particle object.
211  @param daughters is a list of pdg particle names e.g. ['pi+','K-']
212  @param mvaConfig multivariate analysis configuration
213  @param preCutConfig pre cut configuration object
214  """
215  # Append generic label to all defined daughters if no label was set yet
216  daughters = [d + ':generic' if ':' not in d else d for d in daughters]
217  # Use default mvaConfig of this particle if no channel-specific config is given
218  mvaConfig = copy.deepcopy(self.mvaConfig if mvaConfig is None else mvaConfig)
219  # Use default preCutConfig of this particle if no channel-specific config is given
220  preCutConfig = copy.deepcopy(self.preCutConfig if preCutConfig is None else preCutConfig)
221  # At the moment all channels must have the same target variable. Why?
222  if mvaConfig is not None and mvaConfig.target != self.mvaConfig.target:
223  basf2.B2FATAL(
224  f'Particle {self.identifier} has common target {self.mvaConfig.target}, while channel '
225  f'{" ".join(daughters)} has {mvaConfig.target}. Each particle must have exactly one target!')
226  # Replace generic-variables with ordinary variables.
227  # All instances of {} are replaced with all combinations of daughter indices
228  mvaVars = []
229  for v in mvaConfig.variables:
230  if v.count('{}') <= len(daughters):
231  mvaVars += [v.format(*c) for c in itertools.combinations(list(range(0, len(daughters))), v.count('{}'))]
232  mvaConfig = mvaConfig._replace(variables=mvaVars)
233  # Add new channel
234  decayModeID = len(self.channels)
235  self.channels.append(DecayChannel(name=self.identifier + '_' + str(decayModeID),
236  label=removeJPsiSlash(self.identifier + ' ==> ' + ' '.join(daughters)),
237  decayString=self.identifier + '_' + str(decayModeID) + ' -> ' + ' '.join(daughters),
238  daughters=daughters,
239  mvaConfig=mvaConfig,
240  preCutConfig=preCutConfig,
241  decayModeID=decayModeID))
242  return self
243 
244 # @endcond
dft.TfData.TfDataBasf2Stub.__init__
def __init__(self, batch_size, feature_number, event_number, train_fraction)
Definition: TfData.py:115