Belle II Software  release-08-01-10
utils.py
1 
8 
9 import functools
10 import collections
11 import re
12 import variables
13 from typing import Iterable, Union, List, Tuple, Optional
14 
15 
16 def create_aliases(list_of_variables: Iterable[str], wrapper: str, prefix="") -> List[str]:
17  """
18  The function creates aliases for variables from the variables list with given wrapper
19  and returns list of the aliases.
20 
21  If the variables in the list have arguments (like ``useLabFrame(p)``) all
22  non-alphanumeric characters in the variable will be replaced by underscores
23  (for example ``useLabFrame_x``) for the alias name.
24 
25  >>> list_of_variables = ['M','p','matchedMC(useLabFrame(px))']
26  >>> wrapper = 'daughter(1,{variable})'
27  >>> prefix = 'pref'
28  >>> print(create_aliases(list_of_variables, wrapper, prefix))
29  ['pref_M', 'pref_p', 'pref_matchedMC_useLabFrame_px']
30  >>> from variables import variables
31  >>> variables.printAliases()
32  [INFO] =====================================
33  [INFO] The following aliases are registered:
34  [INFO] pref_M --> daughter(1,M)
35  [INFO] pref_matchedMC_useLabFrame_px --> daughter(1,matchedMC(useLabFrame(px)))
36  [INFO] pref_p --> daughter(1,p)
37  [INFO] =====================================
38 
39  Parameters:
40  list_of_variables (list(str)): list of variable names
41  wrapper (str): metafunction taking variables from list_of_variables as a parameter \
42  (``<metafunction>(<some configs>, {variable} ,<some other configs>)``
43  prefix (str): alias prefix used for wrapped variables.
44 
45  Returns:
46  list(str): new variables list
47  """
48  replacement = re.compile('[^a-zA-Z0-9]+')
49  aliases = []
50  for var in list_of_variables:
51  # replace all non-safe characters for alias name with _ (but remove from the end)
52  safe = replacement.sub("_", var).strip("_")
53  aliases.append(f"{prefix}_{safe}" if prefix else f"{safe}")
54  variables.variables.addAlias(aliases[-1], wrapper.format(variable=var))
55 
56  return aliases
57 
58 
59 def get_hierarchy_of_decay(decay_string: str) -> List[List[Tuple[int, str]]]:
60  """
61  This function returns paths of the particles selected in decay string. For
62  each selected particle return a list of (index, name) tuples which indicate
63  which daughter index to choose to arrive at the selected particle.
64 
65  For example for the decay string ``B+ -> [ D+ -> pi0 ^K+ ] pi0`` the
66  resulting path for the K+ would be ``[(0, 'D'), (1, 'K')]``: The K is the
67  second daughter of the first daughter of the B+
68 
69  >>> get_hierarchy_of_decay('B+ -> [ D+ -> ^K+ pi0 ] pi0')
70  [[(0, 'D'), (0, 'K')]]
71 
72  Every selected particle has its own path so if multiple particles are
73  collected a list of paths is returned
74 
75  >>> get_hierarchy_of_decay('B+ -> [ D+ -> ^K+ pi0 ] ^pi0')
76  [[(0, 'D'), (0, 'K')], [(1, 'pi0')]]
77 
78  If the mother particle is selected an empty list will be returned as its path
79 
80  >>> get_hierarchy_of_decay('^B+ -> ^pi+ pi-')
81  [[], [(0, 'pi')]
82 
83  Parameters:
84  decay_string (str): Decay string with selected particles
85 
86  Returns:
87  list(list(tuple(int, str))): list of hierarchies of selected particles.
88  """
89  from ROOT import Belle2
91  if not d.init(decay_string):
92  raise ValueError("Invalid decay string")
93 
94  selected_particles = []
95  for path in d.getHierarchyOfSelected():
96  selected_particles.append([tuple(e) for e in path[1:]])
97  return selected_particles
98 
99 
100 def create_daughter_aliases(
101  list_of_variables: Iterable[str],
102  indices: Union[int, Iterable[int]],
103  prefix="", include_indices=True
104 ) -> List[str]:
105  """Create Aliases for all variables for a given daughter hierarchy
106 
107  Arguments:
108  list_of_variables (list(str)): list of variables to create aliases for
109  indices (int or list(int)): index of the daughter, grand-daughter, grand-grand-daughter,
110  and so forth
111  prefix (str): optional prefix to prepend to the aliases
112  include_indices(bool): if set to True (default) the aliases will contain
113  the daughter indices as dX_dY_dZ...
114 
115  Returns:
116  list(str): new variables list
117 
118  * create aliases for the second daughter as "d1_E", "d1_M" (daughters start at 0)
119 
120  >>> create_daughter_aliases(["E", "m"], 1)
121  ['d1_E', 'd1_m']
122  >>> from variables import variables
123  >>> variables.printAliases()
124  [INFO] =========================
125  [INFO] Following aliases exists:
126  [INFO] 'd1_E' --> 'daughter(1,E)'
127  [INFO] 'd1_m' --> 'daughter(1,m)'
128  [INFO] =========================
129 
130 
131  * create aliases for the first grand daughter of the second daughter,
132  starting with "my" and without including the indices, resulting in "my_E", "my_m"
133 
134  >>> create_daughter_aliases(["E", "m"], [1, 0], prefix="my", include_indices=False)
135  ['my_E', 'my_m']
136  >>> from variables import variables
137  >>> variables.printAliases()
138  [INFO] =========================
139  [INFO] Following aliases exists:
140  [INFO] 'my_E' --> 'daughter(1,daughter(0,E))'
141  [INFO] 'my_m' --> 'daughter(1,daughter(0,m))'
142  [INFO] =========================
143 
144  * create aliases for the second grand grand daughter of the third grand
145  daughter of the fifth daugther, starting with my and including the
146  indices, resulting in "my_d4_d2_d1_E", "my_d4_d2_d1_m"
147 
148  >>> create_daughter_aliases(["E", "m"], [4, 2, 1], prefix="my")
149  ['my_d4_d2_d1_E', 'my_d4_d2_d1_m']
150  >>> from variables import variables
151  >>> variables.printAliases()
152  [INFO] =========================
153  [INFO] Following aliases exists:
154  [INFO] 'my_d4_d2_d1_E' --> 'daughter(4,daughter(2,daughter(1,E))'
155  [INFO] 'my_d4_d2_d1_m' --> 'daughter(4,daughter(2,daughter(1,m))'
156  [INFO] =========================
157 
158  """
159 
160  if not isinstance(indices, collections.abc.Iterable):
161  indices = [indices]
162 
163  if include_indices:
164  prefix = functools.reduce(lambda x, y: f"{x}_d{y}", indices, prefix).lstrip("_")
165 
166  template = functools.reduce(lambda x, y: f"daughter({y},{x})", reversed(indices), "{variable}")
167  return create_aliases(list_of_variables, template, prefix)
168 
169 
171  """
172  Class to present selected particles from a DecayString as tree structure.
173  For each node of the tree we safe the name of the particle, whether it is
174  selected and a dictionary of all children (as mapping decayIndex -> Node)
175  """
176 
177  def __init__(self, name):
178  """Just set default values"""
179 
180  self.namename = name
181 
182  self.selectedselected = False
183 
184  self.childrenchildren = {}
185 
186  def get_prefixes(self, always_include_indices=False, use_relative_indices=False):
187  """
188  Recursively walk through the tree of selected particles and return a list
189  of prefixes for aliases and a tuple of decay indexes for that prefix.
190 
191  For example for ``B0 -> [D0 -> ^pi+] ^pi0`` it might return
192 
193  >>> DecayParticleNode.build('^B0 -> [D0 -> ^pi+] ^pi0').get_prefixes()
194  [ ("", None), ("D0_pi", (0, 0)), ("pi0", (1,)) ]
195 
196  and to create aliases from these one would use the indices as arguments for
197  the b2:var:`daughter` meta variable.
198 
199  This function will make sure that prefix names are unique: If there are
200  multiple siblings of one node with the same particle name they will be
201  distinguised by either suffixing them with the decay index (if
202  ``use_relative_indices=False``) or they will just be enumerated
203  starting at 0 otherwise.
204 
205  Arguments:
206  always_include_indices (bool): If True always add the index of the
207  particle to the prefix, otherwise the index is only added if
208  more than one sibling of the same particle exist.
209  use_relative_indices (bool): If True the indices used will **not**
210  be the daughter indices in the full decay string but just the
211  relative indices: If multiple sibling particles with the same
212  name they will be just numbered starting at zero as they appear
213  in the aliases.
214  """
215  return self.__walk__walk(always_include_indices, use_relative_indices, "", tuple())
216 
217  def __walk(self, always_include_indices, use_relative_indices, current_prefix, current_path):
218  """Recursively walk the tree and collect all prefixes
219 
220  See:
221  `get_prefixes`
222 
223  Arguments:
224  always_include_indices (bool): see `get_prefixes()`
225  use_relative_indices (bool): see `get_prefixes()`
226  current_prefix: the current prefix so far collected from any parent
227  particle.
228  current_path: the current path of indices so far collected from any
229  parent particle.
230  """
231 
232  result = []
233  # are we the mother particle and selected selected? if so, add a "no-prefix" to the output
234  if not current_path and self.selectedselected:
235  result.append(("", None))
236 
237  # count the particle names of all daughters so that we know which ones we
238  # have to index
239  names = collections.Counter(e.name for e in self.childrenchildren.values())
240  # if we use relative indices start counting them at zero
241  relative_indices = collections.defaultdict(int)
242 
243  # now loop over all children
244  for index, c in sorted(self.childrenchildren.items()):
245  # prepare the full index path
246  full_path = current_path + (index,)
247  # and prepare the prefix
248  prefix = current_prefix + c.name
249  # is this particle name ambiguous or are all indices requested? add index
250  if always_include_indices or names[c.name] > 1:
251  prefix += "_{}".format(relative_indices[c.name] if use_relative_indices else index)
252  # always increase the relative indices
253  relative_indices[c.name] += 1
254 
255  # if the particle is selected add the prefix and the path
256  if c.selected:
257  result.append((prefix, full_path))
258 
259  # but in any case also process all children recursively
260  result += c.__walk(always_include_indices, use_relative_indices, prefix + "_", full_path)
261 
262  # done, return all prefixes and their paths
263  return result
264 
265  @classmethod
266  def build(cls, decay_string):
267  """Build a tree of selected particles from a `DecayString`
268 
269  This will return a `DecayParticleNode` instance which is the top of a
270  tree of all the selected particles from the decat string.
271 
272  Arguments:
273  decay_string (str): `DecayString` containing at least one selected particle
274  """
275  selected = get_hierarchy_of_decay(decay_string)
276  if not selected:
277  raise ValueError("No particle selected in decay string")
278  # create the top of the tree
279  top = cls("")
280  # now loop over all selected particles
281  for path in selected:
282  current = top
283  # and walk through the path
284  for index, name in path:
285  # creating tree children as needed
286  if index not in current.children:
287  current.children[index] = cls(name)
288  # and update the pointer
289  current = current.children[index]
290  # after walking the tree the pointer is at the selected particle so
291  # just set the selected to True
292  current.selected = True
293 
294  # done, return the tree
295  return top
296 
297 
298 def create_aliases_for_selected(
299  list_of_variables: List[str],
300  decay_string: str,
301  prefix: Optional[Union[str, List[str]]] = None,
302  *,
303  use_names=True,
304  always_include_indices=False,
305  use_relative_indices=False
306 ) -> List[str]:
307  """
308  The function creates list of aliases for given variables so that they are calculated for
309  particles selected in decay string. That is for each particle selected in
310  the decay string an alias is created to calculate each variable in the
311  ``list_of_variables``.
312 
313  If ``use_names=True`` (the default) then the names of the aliases are assigned as follows:
314 
315  * If names are unambiguous, it's semi-laconic :doc:`DecayString` style: The
316  aliases will be prefixed with the names of all parent particle names
317  separated by underscore. For example given the decay string ``B0 -> [D0 -> ^pi+ K-] pi0``
318  the aliases for the ``pi+` will start with ``D0_pi_`` followed by the
319  variable name.
320 
321  >>> list_of_variables = ['M','p']
322  >>> decay_string = 'B0 -> [D0 -> ^pi+ K-] pi0'
323  >>> create_aliases_for_selected(list_of_variables, decay_string)
324  ['D0_pi_M', 'D0_pi_p']
325  >>> from variables import variables
326  >>> variables.printAliases()
327  [INFO] =========================
328  [INFO] Following aliases exists:
329  [INFO] 'D0_pi_M' --> 'daughter(0,daughter(0,M))'
330  [INFO] 'D0_pi_p' --> 'daughter(0,daughter(0,p))'
331  [INFO] =========================
332 
333 
334  * If names are ambiguous because there are multiple daughters with the same
335  name these particles will be followed by their daughter index. For example
336  given the decay string ``B0 -> [D0 -> ^pi+:1 ^pi-:2 ^pi0:3 ] ^pi0:4``
337  will create aliases with the following prefixes for particle with the
338  corresponding number as list name:
339 
340  1. ``D0_pi_0_``
341  2. ``D0_pi_1_``
342  3. ``D0_pi0_``
343  4. ``pi0_``
344 
345  >>> list_of_variables = ['M','p']
346  >>> decay_string = 'B0 -> [D0 -> ^pi+ ^pi- ^pi0] ^pi0'
347  >>> create_aliases_for_selected(list_of_variables, decay_string)
348  ['D0_pi_0_M', 'D0_pi_0_p', 'D0_pi_1_M', 'D0_pi_1_p',
349  'D0_pi0_M', 'D0_pi0_p', 'pi0_M', 'pi0_p']
350  >>> from variables import variables
351  >>> variables.printAliases()
352  [INFO] =========================
353  [INFO] Following aliases exists:
354  [INFO] 'D0_pi0_M' --> 'daughter(0,daughter(2,M))'
355  [INFO] 'D0_pi0_p' --> 'daughter(0,daughter(2,p))'
356  [INFO] 'D0_pi_0_M' --> 'daughter(0,daughter(0,M))'
357  [INFO] 'D0_pi_0_p' --> 'daughter(0,daughter(0,p))'
358  [INFO] 'D0_pi_1_M' --> 'daughter(0,daughter(1,M))'
359  [INFO] 'D0_pi_1_p' --> 'daughter(0,daughter(1,p))'
360  [INFO] 'D0_pi_M' --> 'daughter(0,daughter(0,M))'
361  [INFO] 'D0_pi_p' --> 'daughter(0,daughter(0,p))'
362  [INFO] 'pi0_M' --> 'daughter(1,M)'
363  [INFO] 'pi0_p' --> 'daughter(1,p)'
364  [INFO] =========================
365 
366  * The user can select to always include the index even for unambiguous
367  particles by passing ``always_include_indices=True``
368 
369  * The user can choose two different numbering schemes: If
370  ``use_relative_indices=False`` the original decay string indices will be
371  used if a index is added to a particle name.
372 
373  But if ``use_relative_indices=True`` the indices will just start at zero for each
374  particle which is part of the prefixes. For example for ``B0-> e+ ^e-``
375 
376  >>> create_aliases_for_selected(['M'], 'B0-> mu+ e- ^e+ ^e-', use_relative_indices=False)
377  ['e_2_M', 'e_3_M']
378  >>> create_aliases_for_selected(['M'], 'B0-> mu+ e- ^e+ ^e-', use_relative_indices=True)
379  ['e_0_M', 'e_1_M']
380 
381  If ``use_names=False`` the aliases will just start with the daughter indices
382  of all parent particles prefixed with a ``d`` and separated by underscore. So
383  for the previous example ``B0 -> [D0 -> ^pi+:1 ^pi-:2 ^pi0:3 ] ^pi0:4``
384  this would result in aliases starting with
385 
386  1. ``d0_d0_``
387  2. ``d0_d1_``
388  3. ``d0_d2_``
389  4. ``d1_``
390 
391  In this case the ``always_include_indices`` and ``use_relative_indices``
392  arguments are ignored.
393 
394  The naming can be modified by providing a custom prefix for each selected
395  particle. In this case the parameter ``prefix`` needs to be either a simple
396  string if only one particle is selected or a list of strings with one
397  prefix for each selected particle.
398 
399  >>> list_of_variables = ['M','p']
400  >>> decay_string = 'B0 -> [D0 -> ^pi+ ^pi- pi0] pi0'
401  >>> create_aliases_for_selected(list_of_variables, decay_string, prefix=['pip', 'pim'])
402  ['pip_M', 'pip_p', 'pim_M', 'pim_p']
403  >>> from variables import variables
404  >>> variables.printAliases()
405  [INFO] =========================
406  [INFO] Following aliases exists:
407  [INFO] 'pim_M' --> 'daughter(0,daughter(1,M))'
408  [INFO] 'pim_p' --> 'daughter(0,daughter(1,p))'
409  [INFO] 'pip_M' --> 'daughter(0,daughter(0,M))'
410  [INFO] 'pip_p' --> 'daughter(0,daughter(0,p))'
411  [INFO] =========================
412 
413  If the mother particle itself is selected the input list of variables will
414  also be added to the returned list of created aliases. If custom prefixes
415  are supplied then aliases will be created for the mother particle as well:
416 
417  >>> create_aliases_for_selected(['M', 'p'], '^B0 -> pi+ ^pi-')
418  ['M', 'p', 'pi_M', 'pi_p']
419  >>> create_aliases_for_selected(['M', 'p'], '^B0 -> pi+ ^pi-', prefix=['MyB', 'MyPi'])
420  ['MyB_M', 'MyB_p', 'MyPi_M', 'MyPi_p']
421 
422  Parameters:
423  list_of_variables (list(str)): list of variable names
424  decay_string (str): Decay string with selected particles
425  prefix (str, list(str)): Custom prefix for all selected particles
426  use_names (bool): Include the names of the particles in the aliases
427  always_include_indices (bool): If ``use_names=True`` include the decay
428  index of the particles in the alias name even if the particle could
429  be uniquely identified without them.
430  use_relative_indices (bool): If ``use_names=True`` use a relative
431  indicing which always starts at 0 for each particle appearing in
432  the alias names independent of the absolute position in the decay
433  string
434 
435  Returns:
436  list(str): new variables list
437  """
438 
439  selected_particles = DecayParticleNode.build(decay_string)
440  prefixes = selected_particles.get_prefixes(always_include_indices, use_relative_indices)
441  # transpose -> convert [(prefix, path), (prefix, path) ...] into
442  # (prefix, prefix...), (path, path, ...)
443  prefixes, paths = zip(*prefixes)
444 
445  alias_list = []
446  # in most cases we don't want to add daughter indices `dM_dN` to the
447  # aliases
448  include_indices = False
449  # but we might have custom prefixes
450  if prefix is not None:
451  # check custom prefix to be a list
452  if isinstance(prefix, str):
453  prefix = [prefix]
454  # and make sure we have the correct amount
455  if len(prefix) != len(prefixes):
456  raise ValueError("Number of selected particles does not match number of supplied custom prefixes")
457  # final check: make sure we don't have duplicate prefixes in here
458  prefix_counts = collections.Counter(prefix)
459  if max(prefix_counts.values()) > 1:
460  raise ValueError("Prefixes need to be unique")
461  # ok, just override calculated prefixes
462  prefixes = prefix
463  elif not use_names:
464  # otherwise, if we don't use names we just override the prefixes
465  # containing the names to be empty and make sure we include the
466  # daughter indices
467  prefixes = [""] * len(prefixes)
468  include_indices = True
469 
470  for prefix, path in zip(prefixes, paths):
471  if path is None:
472  # mother particle selected, just create custom aliases which
473  # don't do anything special if the prefix is not empty
474  if prefix:
475  alias_list += create_aliases(list_of_variables, "{variable}", prefix)
476  else:
477  # but if prefix is empty just return the variables
478  alias_list += list_of_variables
479  else:
480  # mother particle selected, just create custom aliases
481  alias_list += create_daughter_aliases(list_of_variables, path, prefix, include_indices)
482 
483  return alias_list
484 
485 
486 def create_mctruth_aliases(
487  list_of_variables: Iterable[str],
488  prefix="mc"
489 ) -> List[str]:
490  """
491  The function wraps variables from the list with 'matchedMC()'.
492 
493  >>> list_of_variables = ['M','p']
494  >>> create_mctruth_aliases(list_of_variables)
495  ['mc_M', 'mc_p']
496  >>> from variables import variables
497  >>> variables.printAliases()
498  [INFO] =========================
499  [INFO] Following aliases exists:
500  [INFO] 'mc_M' --> 'matchedMC(M)'
501  [INFO] 'mc_p' --> 'matchedMC(p)'
502  [INFO] =========================
503 
504 
505  Parameters:
506  list_of_variables (list(str)): list of variable names
507 
508  Returns:
509  list(str): list of created aliases
510  """
511  return create_aliases(list_of_variables, 'matchedMC({variable})', prefix)
512 
513 
514 def add_collection(list_of_variables: Iterable[str], collection_name: str) -> str:
515  """
516  The function creates variable collection from the given list of variables
517  It wraps the `VariableManager.addCollection` method which is not particularly user-friendly.
518 
519  Example:
520 
521  Defining the collection
522  >>> variables.utils.add_collection(['p','E'], "my_collection")
523 
524  Passing it as an argument to variablesToNtuple
525  >>> modularAnalysis.variablesToNtuple(variables=['my_collection'], ...)
526 
527  Parameters:
528  list_of_variables (list(str)): list of variable names
529  collection_name (str): name of the collection
530 
531  Returns:
532  str: name of the variable collection
533  """
534 
535  variables.variables.addCollection(collection_name, variables.std_vector(*tuple(list_of_variables)))
536  return collection_name
537 
538 
539 def create_isSignal_alias(aliasName, flags):
540  """
541  Make a `VariableManager` alias for a customized :b2:var:`isSignal`, which accepts specified mc match errors.
542 
543  .. seealso:: see :doc:`MCMatching` for a definition of the mc match error flags.
544 
545  The following code defines a new variable ``isSignalAcceptMissingGammaAndMissingNeutrino``, which is same
546  as :b2:var:`isSignal`, but also accepts missing gamma and missing neutrino
547 
548  >>> create_isSignal_alias("isSignalAcceptMissingGammaAndMissingNeutrino", [16, 8])
549 
550  Logically, this
551  ``isSignalAcceptMissingGammaAndMissingNeutrino`` =
552  :b2:var:`isSignalAcceptMissingGamma` || :b2:var:`isSignalAcceptMissingNeutrino`.
553 
554  In the example above, create_isSignal_alias() creates ``isSignalAcceptMissingGammaAndMissingNeutrino`` by
555  unmasking (setting bits to zero)
556  the ``c_MissGamma`` bit (16 or 0b00010000) and ``c_MissNeutrino`` bit (8 or 0b00001000) in mcErrors.
557 
558  For more information, please check this
559  `example script
560  <https://gitlab.desy.de/belle2/software/basf2/-/tree/main/analysis/examples/VariableManager/isSignalAcceptFlags.py>`_.
561 
562  Parameters:
563  aliasName (str): the name of the alias to be set
564  flags (list(int)): a list of the bits to unmask
565  """
566 
567  mask = 0
568  for flag in flags:
569  if isinstance(flag, int):
570  mask |= flag
571  else:
572  informationString = "The type of input flags of create_isSignal_alias() should be integer."
573  informationString += "Now one of the input flags is " + str(int) + " ."
574  raise ValueError(informationString)
575 
576  variables.variables.addAlias(aliasName, "passesCut(unmask(mcErrors, %d) == %d)" % (mask, 0))
def std_vector(*args)
Definition: __init__.py:134
The DecayDescriptor stores information about a decay tree or parts of a decay tree.
children
mapping of children decayIndex->Node
Definition: utils.py:184
selected
whether or not this particle is selected
Definition: utils.py:182
def __walk(self, always_include_indices, use_relative_indices, current_prefix, current_path)
Definition: utils.py:217
def get_prefixes(self, always_include_indices=False, use_relative_indices=False)
Definition: utils.py:186
name
name of the particle
Definition: utils.py:180
def __init__(self, name)
Definition: utils.py:177
def build(cls, decay_string)
Definition: utils.py:266