Belle II Software  release-06-02-00
utils.py
1 #!/usr/bin/env python3
2 
3 
10 import functools
11 import collections
12 import re
13 from variables import variables as _variablemanager
14 from variables import std_vector as _std_vector
15 from typing import Iterable, Union, List, Tuple, Optional
16 
17 
18 def create_aliases(list_of_variables: Iterable[str], wrapper: str, prefix: str) -> List[str]:
19  """
20  The function creates aliases for variables from the variables list with given wrapper
21  and returns list of the aliases.
22 
23  If the variables in the list have arguments (like ``useLabFrame(p)``) all
24  non-alphanumeric characters in the variable will be replaced by underscores
25  (for example ``useLabFrame_x``) for the alias name.
26 
27  >>> list_of_variables = ['M','p','matchedMC(useLabFrame(px))']
28  >>> wrapper = 'daughter(1,{variable})'
29  >>> prefix = 'pref'
30  >>> print(create_aliases(list_of_variables, wrapper, prefix))
31  ['pref_M', 'pref_p', 'pref_matchedMC_useLabFrame_px']
32  >>> from variables import variables
33  >>> variables.printAliases()
34  [INFO] =====================================
35  [INFO] The following aliases are registered:
36  [INFO] pref_M --> daughter(1,M)
37  [INFO] pref_matchedMC_useLabFrame_px --> daughter(1,matchedMC(useLabFrame(px)))
38  [INFO] pref_p --> daughter(1,p)
39  [INFO] =====================================
40 
41  Parameters:
42  list_of_variables (list(str)): list of variable names
43  wrapper (str): metafunction taking variables from list_of_variables as a parameter \
44  (``<metafunction>(<some configs>, {variable} ,<some other configs>)``
45  prefix (str): alias prefix used for wrapped variables.
46 
47  Returns:
48  list(str): new variables list
49  """
50  replacement = re.compile('[^a-zA-Z0-9]+')
51  aliases = []
52  for var in list_of_variables:
53  # replace all non-safe characters for alias name with _ (but remove from the end)
54  safe = replacement.sub("_", var).strip("_")
55  aliases.append(f"{prefix}_{safe}")
56  _variablemanager.addAlias(aliases[-1], wrapper.format(variable=var))
57 
58  return aliases
59 
60 
61 def get_hierarchy_of_decay(decay_string: str) -> List[List[Tuple[int, str]]]:
62  """
63  This function returns paths of the particles selected in decay string. For
64  each selected particle return a list of (index, name) tuples which indicate
65  which daughter index to choose to arrive at the selected particle.
66 
67  For example for the decay string ``B+ -> [ D+ -> pi0 ^K+ ] pi0`` the
68  resulting path for the K+ would be ``[(0, 'D'), (1, 'K')]``: The K is the
69  second daughter of the first daughter of the B+
70 
71  >>> get_hierarchy_of_decay('B+ -> [ D+ -> ^K+ pi0 ] pi0')
72  [[(0, 'D'), (0, 'K')]]
73 
74  Every selected particle has its own path so if multiple particles are
75  collected a list of paths is returned
76 
77  >>> get_hierarchy_of_decay('B+ -> [ D+ -> ^K+ pi0 ] ^pi0')
78  [[(0, 'D'), (0, 'K')], [(1, 'pi0')]]
79 
80  If the mother particle is selected an empty list will be returned as its path
81 
82  >>> get_hierarchy_of_decay('^B+ -> ^pi+ pi-')
83  [[], [(0, 'pi')]
84 
85  Parameters:
86  decay_string (str): Decay string with selected particles
87 
88  Returns:
89  list(list(tuple(int, str))): list of hierarchies of selected particles.
90  """
91  from ROOT import Belle2
93  if not d.init(decay_string):
94  raise ValueError("Invalid decay string")
95 
96  selected_particles = []
97  for path in d.getHierarchyOfSelected():
98  selected_particles.append([tuple(e) for e in path[1:]])
99  return selected_particles
100 
101 
102 def create_daughter_aliases(
103  list_of_variables: Iterable[str],
104  indices: Union[int, Iterable[int]],
105  prefix="", include_indices=True
106 ) -> List[str]:
107  """Create Aliases for all variables for a given daughter hierarchy
108 
109  Arguments:
110  list_of_variables (list(str)): list of variables to create aliases for
111  indices (int or list(int)): index of the daughter, grand-daughter, grand-grand-daughter,
112  and so forth
113  prefix (str): optional prefix to prepend to the aliases
114  include_indices(bool): if set to True (default) the aliases will contain
115  the daughter indices as dX_dY_dZ...
116 
117  Returns:
118  list(str): new variables list
119 
120  * create aliases for the second daughter as "d1_E", "d1_M" (daughters start at 0)
121 
122  >>> create_daughter_aliases(["E", "m"], 1)
123  ['d1_E', 'd1_m']
124  >>> from variables import variables
125  >>> variables.printAliases()
126  [INFO] =========================
127  [INFO] Following aliases exists:
128  [INFO] 'd1_E' --> 'daughter(1,E)'
129  [INFO] 'd1_m' --> 'daughter(1,m)'
130  [INFO] =========================
131 
132 
133  * create aliases for the first grand daughter of the second daughter,
134  starting with "my" and without including the indices, resulting in "my_E", "my_m"
135 
136  >>> create_daughter_aliases(["E", "m"], [1, 0], prefix="my", include_indices=False)
137  ['my_E', 'my_m']
138  >>> from variables import variables
139  >>> variables.printAliases()
140  [INFO] =========================
141  [INFO] Following aliases exists:
142  [INFO] 'my_E' --> 'daughter(1,daughter(0,E))'
143  [INFO] 'my_m' --> 'daughter(1,daughter(0,m))'
144  [INFO] =========================
145 
146  * create aliases for the second grand grand daughter of the third grand
147  daughter of the fifth daugther, starting with my and including the
148  indices, resulting in "my_d4_d2_d1_E", "my_d4_d2_d1_m"
149 
150  >>> create_daughter_aliases(["E", "m"], [4, 2, 1], prefix="my")
151  ['my_d4_d2_d1_E', 'my_d4_d2_d1_m']
152  >>> from variables import variables
153  >>> variables.printAliases()
154  [INFO] =========================
155  [INFO] Following aliases exists:
156  [INFO] 'my_d4_d2_d1_E' --> 'daughter(4,daughter(2,daughter(1,E))'
157  [INFO] 'my_d4_d2_d1_m' --> 'daughter(4,daughter(2,daughter(1,m))'
158  [INFO] =========================
159 
160  """
161 
162  if not isinstance(indices, collections.Iterable):
163  indices = [indices]
164 
165  if include_indices:
166  prefix = functools.reduce(lambda x, y: f"{x}_d{y}", indices, prefix).lstrip("_")
167 
168  template = functools.reduce(lambda x, y: f"daughter({y},{x})", reversed(indices), "{variable}")
169  return create_aliases(list_of_variables, template, prefix)
170 
171 
173  """
174  Class to present selected particles from a DecayString as tree structure.
175  For each node of the tree we safe the name of the particle, whether it is
176  selected and a dictionary of all children (as mapping decayIndex -> Node)
177  """
178 
179  def __init__(self, name):
180  """Just set default values"""
181 
182  self.namename = name
183 
184  self.selectedselected = False
185 
186  self.childrenchildren = {}
187 
188  def get_prefixes(self, always_include_indices=False, use_relative_indices=False):
189  """
190  Recursively walk through the tree of selected particles and return a list
191  of prefixes for aliases and a tuple of decay indexes for that prefix.
192 
193  For example for ``B0 -> [D0 -> ^pi+] ^pi0`` it might return
194 
195  >>> DecayParticleNode.build('^B0 -> [D0 -> ^pi+] ^pi0').get_prefixes()
196  [ ("", None), ("D0_pi", (0, 0)), ("pi0", (1,)) ]
197 
198  and to create aliases from these one would use the indices as arguments for
199  the b2:var:`daughter` meta variable.
200 
201  This function will make sure that prefix names are unique: If there are
202  multiple siblings of one node with the same particle name they will be
203  distinguised by either suffixing them with the decay index (if
204  ``use_relative_indices=False``) or they will just be enumerated
205  starting at 0 otherwise.
206 
207  Arguments:
208  always_include_indices (bool): If True always add the index of the
209  particle to the prefix, otherwise the index is only added if
210  more than one sibling of the same particle exist.
211  use_relative_indices (bool): If True the indices used will **not**
212  be the daughter indices in the full decay string but just the
213  relative indices: If multiple sibling particles with the same
214  name they will be just numbered starting at zero as they appear
215  in the aliases.
216  """
217  return self.__walk__walk(always_include_indices, use_relative_indices, "", tuple())
218 
219  def __walk(self, always_include_indices, use_relative_indices, current_prefix, current_path):
220  """Recursively walk the tree and collect all prefixes
221 
222  See:
223  `get_prefixes`
224 
225  Arguments:
226  always_include_indices (bool): see `get_prefixes()`
227  use_relative_indices (bool): see `get_prefixes()`
228  current_prefix: the current prefix so far collected from any parent
229  particle.
230  current_path: the current path of indices so far collected from any
231  parent particle.
232  """
233 
234  result = []
235  # are we the mother particle and selected selected? if so, add a "no-prefix" to the output
236  if not current_path and self.selectedselected:
237  result.append(("", None))
238 
239  # count the particle names of all daughters so that we know which ones we
240  # have to index
241  names = collections.Counter(e.name for e in self.childrenchildren.values())
242  # if we use relative indices start counting them at zero
243  relative_indices = collections.defaultdict(int)
244 
245  # now loop over all children
246  for index, c in sorted(self.childrenchildren.items()):
247  # prepare the full index path
248  full_path = current_path + (index,)
249  # and prepare the prefix
250  prefix = current_prefix + c.name
251  # is this particle name ambiguous or are all indices requested? add index
252  if always_include_indices or names[c.name] > 1:
253  prefix += "_{}".format(relative_indices[c.name] if use_relative_indices else index)
254  # always increase the relative indices
255  relative_indices[c.name] += 1
256 
257  # if the particle is selected add the prefix and the path
258  if c.selected:
259  result.append((prefix, full_path))
260 
261  # but in any case also process all children recursively
262  result += c.__walk(always_include_indices, use_relative_indices, prefix + "_", full_path)
263 
264  # done, return all prefixes and their paths
265  return result
266 
267  @classmethod
268  def build(cls, decay_string):
269  """Build a tree of selected particles from a `DecayString`
270 
271  This will return a `DecayParticleNode` instance which is the top of a
272  tree of all the selected particles from the decat string.
273 
274  Arguments:
275  decay_string (str): `DecayString` containing at least one selected particle
276  """
277  selected = get_hierarchy_of_decay(decay_string)
278  if not selected:
279  raise ValueError("No particle selected in decay string")
280  # create the top of the tree
281  top = cls("")
282  # now loop over all selected particles
283  for path in selected:
284  current = top
285  # and walk through the path
286  for index, name in path:
287  # creating tree children as needed
288  if index not in current.children:
289  current.children[index] = cls(name)
290  # and update the pointer
291  current = current.children[index]
292  # after walking the tree the pointer is at the selected particle so
293  # just set the selected to True
294  current.selected = True
295 
296  # done, return the tree
297  return top
298 
299 
300 def create_aliases_for_selected(
301  list_of_variables: List[str],
302  decay_string: str,
303  prefix: Optional[Union[str, List[str]]] = None,
304  *,
305  use_names=True,
306  always_include_indices=False,
307  use_relative_indices=False
308 ) -> List[str]:
309  """
310  The function creates list of aliases for given variables so that they are calculated for
311  particles selected in decay string. That is for each particle selected in
312  the decay string an alias is created to calculate each variable in the
313  ``list_of_variables``.
314 
315  If ``use_names=True`` (the default) then the names of the aliases are assigned as follows:
316 
317  * If names are unambiguous, it's semi-laconic :doc:`DecayString` style: The
318  aliases will be prefixed with the names of all parent particle names
319  separated by underscore. For example given the decay string ``B0 -> [D0 -> ^pi+ K-] pi0``
320  the aliases for the ``pi+` will start with ``D0_pi_`` followed by the
321  variable name.
322 
323  >>> list_of_variables = ['M','p']
324  >>> decay_string = 'B0 -> [D0 -> ^pi+ K-] pi0'
325  >>> create_aliases_for_selected(list_of_variables, decay_string)
326  ['D0_pi_M', 'D0_pi_p']
327  >>> from variables import variables
328  >>> variables.printAliases()
329  [INFO] =========================
330  [INFO] Following aliases exists:
331  [INFO] 'D0_pi_M' --> 'daughter(0,daughter(0,M))'
332  [INFO] 'D0_pi_p' --> 'daughter(0,daughter(0,p))'
333  [INFO] =========================
334 
335 
336  * If names are ambiguous because there are multiple daughters with the same
337  name these particles will be followed by their daughter index. For example
338  given the decay string ``B0 -> [D0 -> ^pi+:1 ^pi-:2 ^pi0:3 ] ^pi0:4``
339  will create aliases with the following prefixes for particle with the
340  corresponding number as list name:
341 
342  1. ``D0_pi_0_``
343  2. ``D0_pi_1_``
344  3. ``D0_pi0_``
345  4. ``pi0_``
346 
347  >>> list_of_variables = ['M','p']
348  >>> decay_string = 'B0 -> [D0 -> ^pi+ ^pi- ^pi0] ^pi0'
349  >>> create_aliases_for_selected(list_of_variables, decay_string)
350  ['D0_pi_0_M', 'D0_pi_0_p', 'D0_pi_1_M', 'D0_pi_1_p',
351  'D0_pi0_M', 'D0_pi0_p', 'pi0_M', 'pi0_p']
352  >>> from variables import variables
353  >>> variables.printAliases()
354  [INFO] =========================
355  [INFO] Following aliases exists:
356  [INFO] 'D0_pi0_M' --> 'daughter(0,daughter(2,M))'
357  [INFO] 'D0_pi0_p' --> 'daughter(0,daughter(2,p))'
358  [INFO] 'D0_pi_0_M' --> 'daughter(0,daughter(0,M))'
359  [INFO] 'D0_pi_0_p' --> 'daughter(0,daughter(0,p))'
360  [INFO] 'D0_pi_1_M' --> 'daughter(0,daughter(1,M))'
361  [INFO] 'D0_pi_1_p' --> 'daughter(0,daughter(1,p))'
362  [INFO] 'D0_pi_M' --> 'daughter(0,daughter(0,M))'
363  [INFO] 'D0_pi_p' --> 'daughter(0,daughter(0,p))'
364  [INFO] 'pi0_M' --> 'daughter(1,M)'
365  [INFO] 'pi0_p' --> 'daughter(1,p)'
366  [INFO] =========================
367 
368  * The user can select to always include the index even for unambiguous
369  particles by passing ``always_include_indices=True``
370 
371  * The user can choose two different numbering schemes: If
372  ``use_relative_indices=False`` the original decay string indices will be
373  used if a index is added to a particle name.
374 
375  But if ``use_relative_indices=True`` the indices will just start at zero for each
376  particle which is part of the prefixes. For example for ``B0-> e+ ^e-``
377 
378  >>> create_aliases_for_selected(['M'], 'B0-> mu+ e- ^e+ ^e-', use_relative_indices=False)
379  ['e_2_M', 'e_3_M']
380  >>> create_aliases_for_selected(['M'], 'B0-> mu+ e- ^e+ ^e-', use_relative_indices=True)
381  ['e_0_M', 'e_1_M']
382 
383  If ``use_names=False`` the aliases will just start with the daughter indices
384  of all parent particles prefixed with a ``d`` and separated by underscore. So
385  for the previous example ``B0 -> [D0 -> ^pi+:1 ^pi-:2 ^pi0:3 ] ^pi0:4``
386  this would result in aliases starting with
387 
388  1. ``d0_d0_``
389  2. ``d0_d1_``
390  3. ``d0_d2_``
391  4. ``d1_``
392 
393  In this case the ``always_include_indices`` and ``use_relative_indices``
394  arguments are ignored.
395 
396  The naming can be modified by providing a custom prefix for each selected
397  particle. In this case the parameter ``prefix`` needs to be either a simple
398  string if only one particle is selected or a list of strings with one
399  prefix for each selected particle.
400 
401  >>> list_of_variables = ['M','p']
402  >>> decay_string = 'B0 -> [D0 -> ^pi+ ^pi- pi0] pi0'
403  >>> create_aliases_for_selected(list_of_variables, decay_string, prefix=['pip', 'pim'])
404  ['pip_M', 'pip_p', 'pim_M', 'pim_p']
405  >>> from variables import variables
406  >>> variables.printAliases()
407  [INFO] =========================
408  [INFO] Following aliases exists:
409  [INFO] 'pim_M' --> 'daughter(0,daughter(1,M))'
410  [INFO] 'pim_p' --> 'daughter(0,daughter(1,p))'
411  [INFO] 'pip_M' --> 'daughter(0,daughter(0,M))'
412  [INFO] 'pip_p' --> 'daughter(0,daughter(0,p))'
413  [INFO] =========================
414 
415  If the mother particle itself is selected the input list of variables will
416  also be added to the returned list of created aliases. If custom prefixes
417  are supplied then aliases will be created for the mother particle as well:
418 
419  >>> create_aliases_for_selected(['M', 'p'], '^B0 -> pi+ ^pi-')
420  ['M', 'p', 'pi_M', 'pi_p']
421  >>> create_aliases_for_selected(['M', 'p'], '^B0 -> pi+ ^pi-', prefix=['MyB', 'MyPi'])
422  ['MyB_M', 'MyB_p', 'MyPi_M', 'MyPi_p']
423 
424  Parameters:
425  list_of_variables (list(str)): list of variable names
426  decay_string (str): Decay string with selected particles
427  prefix (str, list(str)): Custom prefix for all selected particles
428  use_names (bool): Include the names of the particles in the aliases
429  always_include_indices (bool): If ``use_names=True`` include the decay
430  index of the particles in the alias name even if the particle could
431  be uniquely identified without them.
432  use_relative_indices (bool): If ``use_names=True`` use a relative
433  indicing which always starts at 0 for each particle appearing in
434  the alias names independent of the absolute position in the decay
435  string
436 
437  Returns:
438  list(str): new variables list
439  """
440 
441  selected_particles = DecayParticleNode.build(decay_string)
442  prefixes = selected_particles.get_prefixes(always_include_indices, use_relative_indices)
443  # transpose -> convert [(prefix, path), (prefix, path) ...] into
444  # (prefix, prefix...), (path, path, ...)
445  prefixes, paths = zip(*prefixes)
446 
447  alias_list = []
448  # in most cases we don't want to add daughter indices `dM_dN` to the
449  # aliases
450  include_indices = False
451  # but we might have custom prefixes
452  if prefix is not None:
453  # check custom prefix to be a list
454  if isinstance(prefix, str):
455  prefix = [prefix]
456  # and make sure we have the correct amount
457  if len(prefix) != len(prefixes):
458  raise ValueError("Number of selected particles does not match number of supplied custom prefixes")
459  # final check: make sure we don't have duplicate prefixes in here
460  prefix_counts = collections.Counter(prefix)
461  if max(prefix_counts.values()) > 1:
462  raise ValueError("Prefixes need to be unique")
463  # ok, just override calculated prefixes
464  prefixes = prefix
465  elif not use_names:
466  # otherwise, if we don't use names we just override the prefixes
467  # containing the names to be empty and make sure we include the
468  # daughter indices
469  prefixes = [""] * len(prefixes)
470  include_indices = True
471 
472  for prefix, path in zip(prefixes, paths):
473  if path is None:
474  # mother particle selected, just create custom aliases which
475  # don't do anything special if the prefix is not empty
476  if prefix:
477  alias_list += create_aliases(list_of_variables, "{variable}", prefix)
478  else:
479  # but if prefix is empty just return the variables
480  alias_list += list_of_variables
481  else:
482  # mother particle selected, just create custom aliases
483  alias_list += create_daughter_aliases(list_of_variables, path, prefix, include_indices)
484 
485  return alias_list
486 
487 
488 def create_mctruth_aliases(
489  list_of_variables: Iterable[str],
490  prefix="mc"
491 ) -> List[str]:
492  """
493  The function wraps variables from the list with 'matchedMC()'.
494 
495  >>> list_of_variables = ['M','p']
496  >>> create_mctruth_aliases(list_of_variables)
497  ['mc_M', 'mc_p']
498  >>> from variables import variables
499  >>> variables.printAliases()
500  [INFO] =========================
501  [INFO] Following aliases exists:
502  [INFO] 'mc_M' --> 'matchedMC(M)'
503  [INFO] 'mc_p' --> 'matchedMC(p)'
504  [INFO] =========================
505 
506 
507  Parameters:
508  list_of_variables (list(str)): list of variable names
509 
510  Returns:
511  list(str): list of created aliases
512  """
513  return create_aliases(list_of_variables, 'matchedMC({variable})', prefix)
514 
515 
516 def add_collection(list_of_variables: Iterable[str], collection_name: str) -> str:
517  """
518  The function creates variable collection from the given list of variables
519  It wraps the `VariableManager.addCollection` method which is not particularly user-friendly.
520 
521  Example:
522 
523  Defining the collection
524  >>> variables.utils.add_collection(['p','E'], "my_collection")
525 
526  Passing it as an argument to variablesToNtuple
527  >>> modularAnalysis.variablesToNtuple(variables=['my_collection'], ...)
528 
529  Parameters:
530  list_of_variables (list(str)): list of variable names
531  collection_name (str): name of the collection
532 
533  Returns:
534  str: name of the variable collection
535  """
536 
537  _variablemanager.addCollection(collection_name, _std_vector(*tuple(list_of_variables)))
538  return collection_name
539 
540 
541 def create_isSignal_alias(aliasName, flags):
542  """
543  Make a `VariableManager` alias for a customized :b2:var:`isSignal`, which accepts specified mc match errors.
544 
545  .. seealso:: see :doc:`MCMatching` for a definition of the mc match error flags.
546 
547  The following code defines a new variable ``isSignalAcceptMissingGammaAndMissingNeutrino``, which is same
548  as :b2:var:`isSignal`, but also accepts missing gamma and missing neutrino
549 
550  >>> create_isSignal_alias("isSignalAcceptMissingGammaAndMissingNeutrino", [16, 8])
551 
552  Logically, this
553  ``isSignalAcceptMissingGammaAndMissingNeutrino`` =
554  :b2:var:`isSignalAcceptMissingGamma` || :b2:var:`isSignalAcceptMissingNeutrino`.
555 
556  In the example above, create_isSignal_alias() creates ``isSignalAcceptMissingGammaAndMissingNeutrino`` by
557  unmasking (setting bits to zero)
558  the ``c_MissGamma`` bit (16 or 0b00010000) and ``c_MissNeutrino`` bit (8 or 0b00001000) in mcErrors.
559 
560  For more information, please check this `example script <https://stash.desy.de/projects/B2/repos/basf2/
561  browse/analysis/examples/VariableManager/isSignalAcceptFlags.py>`_.
562 
563  Parameters:
564  aliasName (str): the name of the alias to be set
565  flags (list(int)): a list of the bits to unmask
566  """
567 
568  mask = 0
569  for flag in flags:
570  if isinstance(flag, int):
571  mask |= flag
572  else:
573  informationString = "The type of input flags of create_isSignal_alias() should be integer."
574  informationString += "Now one of the input flags is " + str(int) + " ."
575  raise ValueError(informationString)
576 
577  _variablemanager.addAlias(aliasName, "passesCut(unmask(mcErrors, %d) == %d)" % (mask, 0))
The DecayDescriptor stores information about a decay tree or parts of a decay tree.
children
mapping of children decayIndex->Node
Definition: utils.py:186
selected
whether or not this particle is selected
Definition: utils.py:184
def __walk(self, always_include_indices, use_relative_indices, current_prefix, current_path)
Definition: utils.py:219
def get_prefixes(self, always_include_indices=False, use_relative_indices=False)
Definition: utils.py:188
name
name of the particle
Definition: utils.py:182
def __init__(self, name)
Definition: utils.py:179
def build(cls, decay_string)
Definition: utils.py:268