Belle II Software  release-05-02-19
utils.py
1 #!/usr/bin/env python3
2 # -*- coding: utf-8 -*-
3 import functools
4 import collections
5 import re
6 from variables import variables as _variablemanager
7 from variables import std_vector as _std_vector
8 from typing import Iterable, Union, List, Tuple, Optional
9 
10 
11 def create_aliases(list_of_variables: Iterable[str], wrapper: str, prefix: str) -> List[str]:
12  """
13  The function creates aliases for variables from the variables list with given wrapper
14  and returns list of the aliases.
15 
16  If the variables in the list have arguments (like ``useLabFrame(p)``) all
17  non-alphanumeric characters in the variable will be replaced by underscores
18  (for example ``useLabFrame_x``) for the alias name.
19 
20  >>> list_of_variables = ['M','p','matchedMC(useLabFrame(px))']
21  >>> wrapper = 'daughter(1,{variable})'
22  >>> prefix = 'pref'
23  >>> print(create_aliases(list_of_variables, wrapper, prefix))
24  ['pref_M', 'pref_p', 'pref_matchedMC_useLabFrame_px']
25  >>> from variables import variables
26  >>> variables.printAliases()
27  [INFO] =====================================
28  [INFO] The following aliases are registered:
29  [INFO] pref_M --> daughter(1,M)
30  [INFO] pref_matchedMC_useLabFrame_px --> daughter(1,matchedMC(useLabFrame(px)))
31  [INFO] pref_p --> daughter(1,p)
32  [INFO] =====================================
33 
34  Parameters:
35  list_of_variables (list(str)): list of variable names
36  wrapper (str): metafunction taking variables from list_of_variables as a parameter \
37  (``<metafunction>(<some configs>, {variable} ,<some other configs>)``
38  prefix (str): alias prefix used for wrapped variables.
39 
40  Returns:
41  list(str): new variables list
42  """
43  replacement = re.compile('[^a-zA-Z0-9]+')
44  aliases = []
45  for var in list_of_variables:
46  # replace all non-safe characters for alias name with _ (but remove from the end)
47  safe = replacement.sub("_", var).strip("_")
48  aliases.append(f"{prefix}_{safe}")
49  _variablemanager.addAlias(aliases[-1], wrapper.format(variable=var))
50 
51  return aliases
52 
53 
54 def get_hierarchy_of_decay(decay_string: str) -> List[List[Tuple[int, str]]]:
55  """
56  This function returns paths of the particles selected in decay string. For
57  each selected particle return a list of (index, name) tuples which indicate
58  which daughter index to choose to arrive at the selected particle.
59 
60  For example for the decay string ``B+ -> [ D+ -> pi0 ^K+ ] pi0`` the
61  resulting path for the K+ would be ``[(0, 'D'), (1, 'K')]``: The K is the
62  second daughter of the first daughter of the B+
63 
64  >>> get_hierarchy_of_decay('B+ -> [ D+ -> ^K+ pi0 ] pi0')
65  [[(0, 'D'), (0, 'K')]]
66 
67  Every selected particle has its own path so if multiple particles are
68  collected a list of paths is returned
69 
70  >>> get_hierarchy_of_decay('B+ -> [ D+ -> ^K+ pi0 ] ^pi0')
71  [[(0, 'D'), (0, 'K')], [(1, 'pi0')]]
72 
73  If the mother particle is selected an empty list will be returned as its path
74 
75  >>> get_hierarchy_of_decay('^B+ -> ^pi+ pi-')
76  [[], [(0, 'pi')]
77 
78  Parameters:
79  decay_string (str): Decay string with selected particles
80 
81  Returns:
82  list(list(tuple(int, str))): list of hierarchies of selected particles.
83  """
84  from ROOT import Belle2
86  if not d.init(decay_string):
87  raise ValueError("Invalid decay string")
88 
89  selected_particles = []
90  for path in d.getHierarchyOfSelected():
91  selected_particles.append([tuple(e) for e in path[1:]])
92  return selected_particles
93 
94 
95 def create_daughter_aliases(
96  list_of_variables: Iterable[str],
97  indices: Union[int, Iterable[int]],
98  prefix="", include_indices=True
99 ) -> List[str]:
100  """Create Aliases for all variables for a given daughter hierarchy
101 
102  Arguments:
103  list_of_variables (list(str)): list of variables to create aliases for
104  indices (int or list(int)): index of the daughter, grand-daughter, grand-grand-daughter,
105  and so forth
106  prefix (str): optional prefix to prepend to the aliases
107  include_indices(bool): if set to True (default) the aliases will contain
108  the daughter indices as dX_dY_dZ...
109 
110  Returns:
111  list(str): new variables list
112 
113  * create aliases for the second daughter as "d1_E", "d1_M" (daughters start at 0)
114 
115  >>> create_daughter_aliases(["E", "m"], 1)
116  ['d1_E', 'd1_m']
117  >>> from variables import variables
118  >>> variables.printAliases()
119  [INFO] =========================
120  [INFO] Following aliases exists:
121  [INFO] 'd1_E' --> 'daughter(1,E)'
122  [INFO] 'd1_m' --> 'daughter(1,m)'
123  [INFO] =========================
124 
125 
126  * create aliases for the first grand daughter of the second daughter,
127  starting with "my" and without including the indices, resulting in "my_E", "my_m"
128 
129  >>> create_daughter_aliases(["E", "m"], [1, 0], prefix="my", include_indices=False)
130  ['my_E', 'my_m']
131  >>> from variables import variables
132  >>> variables.printAliases()
133  [INFO] =========================
134  [INFO] Following aliases exists:
135  [INFO] 'my_E' --> 'daughter(1,daughter(0,E))'
136  [INFO] 'my_m' --> 'daughter(1,daughter(0,m))'
137  [INFO] =========================
138 
139  * create aliases for the second grand grand daughter of the third grand
140  daughter of the fifth daugther, starting with my and including the
141  indices, resulting in "my_d4_d2_d1_E", "my_d4_d2_d1_m"
142 
143  >>> create_daughter_aliases(["E", "m"], [4, 2, 1], prefix="my")
144  ['my_d4_d2_d1_E', 'my_d4_d2_d1_m']
145  >>> from variables import variables
146  >>> variables.printAliases()
147  [INFO] =========================
148  [INFO] Following aliases exists:
149  [INFO] 'my_d4_d2_d1_E' --> 'daughter(4,daughter(2,daughter(1,E))'
150  [INFO] 'my_d4_d2_d1_m' --> 'daughter(4,daughter(2,daughter(1,m))'
151  [INFO] =========================
152 
153  """
154 
155  if not isinstance(indices, collections.Iterable):
156  indices = [indices]
157 
158  if include_indices:
159  prefix = functools.reduce(lambda x, y: f"{x}_d{y}", indices, prefix).lstrip("_")
160 
161  template = functools.reduce(lambda x, y: f"daughter({y},{x})", reversed(indices), "{variable}")
162  return create_aliases(list_of_variables, template, prefix)
163 
164 
166  """
167  Class to present selected particles from a DecayString as tree structure.
168  For each node of the tree we safe the name of the particle, whether it is
169  selected and a dictionary of all children (as mapping decayIndex -> Node)
170  """
171 
172  def __init__(self, name):
173  """Just set default values"""
174 
175  self.name = name
176 
177  self.selected = False
178 
179  self.children = {}
180 
181  def get_prefixes(self, always_include_indices=False, use_relative_indices=False):
182  """
183  Recursively walk through the tree of selected particles and return a list
184  of prefixes for aliases and a tuple of decay indexes for that prefix.
185 
186  For example for ``B0 -> [D0 -> ^pi+] ^pi0`` it might return
187 
188  >>> DecayParticleNode.build('^B0 -> [D0 -> ^pi+] ^pi0').get_prefixes()
189  [ ("", None), ("D0_pi", (0, 0)), ("pi0", (1,)) ]
190 
191  and to create aliases from these one would use the indices as arguments for
192  te b2:var:`daughter` meta variable.
193 
194  This function will make sure that prefix names are unique: If there are
195  multiple siblings of one node with the same particle name they will be
196  distinguised by either suffixing them with the decay index (if
197  ``use_relative_indices=False``) or they will just be enumerated
198  starting at 0 otherwise.
199 
200  Arguments:
201  always_include_indices (bool): If True always add the index of the
202  particle to the prefix, otherwise the index is only added if
203  more than one sibling of the same particle exist.
204  use_relative_indices (bool): If True the indices used will **not**
205  be the daughter indices in the full decay string but just the
206  relative indices: If multiple sibling particles with the same
207  name they will be just numbered starting at zero as they appear
208  in the aliases.
209  """
210  return self.__walk(always_include_indices, use_relative_indices, "", tuple())
211 
212  def __walk(self, always_include_indices, use_relative_indices, current_prefix, current_path):
213  """Recursively walk the tree and collect all prefixes
214 
215  See:
216  `get_prefixes`
217 
218  Arguments:
219  always_include_indices (bool): see `get_prefixes()`
220  use_relative_indices (bool): see `get_prefixes()`
221  current_prefix: the current prefix so far collected from any parent
222  particle.
223  current_path: the current path of indices so far collected from any
224  parent particle.
225  """
226 
227  result = []
228  # are we the mother particle and selected selected? if so, add a "no-prefix" to the output
229  if not current_path and self.selected:
230  result.append(("", None))
231 
232  # count the particle names of all daughters so that we know which ones we
233  # have to index
234  names = collections.Counter(e.name for e in self.children.values())
235  # if we use relative indices start counting them at zero
236  relative_indices = collections.defaultdict(int)
237 
238  # now loop over all children
239  for index, c in sorted(self.children.items()):
240  # prepare the full index path
241  full_path = current_path + (index,)
242  # and prepare the prefix
243  prefix = current_prefix + c.name
244  # is this particle name ambigious or are all indices requested? add index
245  if always_include_indices or names[c.name] > 1:
246  prefix += "_{}".format(relative_indices[c.name] if use_relative_indices else index)
247  # always increase the relative indices
248  relative_indices[c.name] += 1
249 
250  # if the particle is selected add the prefix and the path
251  if c.selected:
252  result.append((prefix, full_path))
253 
254  # but in any case also process all children recursively
255  result += c.__walk(always_include_indices, use_relative_indices, prefix + "_", full_path)
256 
257  # done, return all prefixes and their paths
258  return result
259 
260  @classmethod
261  def build(cls, decay_string):
262  """Build a tree of selected particles from a `DecayString`
263 
264  This will return a `DecayParticleNode` instance which is the top of a
265  tree of all the selected particles from the decat string.
266 
267  Arguments:
268  decay_string (str): `DecayString` containing at least one selected particle
269  """
270  selected = get_hierarchy_of_decay(decay_string)
271  if not selected:
272  raise ValueError("No particle selected in decay string")
273  # create the top of the tree
274  top = cls("")
275  # now loop over all selected particles
276  for path in selected:
277  current = top
278  # and walk through the path
279  for index, name in path:
280  # creating tree children as needed
281  if index not in current.children:
282  current.children[index] = cls(name)
283  # and update the pointer
284  current = current.children[index]
285  # after walking the tree the pointer is at the selected particle so
286  # just set the selected to True
287  current.selected = True
288 
289  # done, return the tree
290  return top
291 
292 
293 def create_aliases_for_selected(
294  list_of_variables: List[str],
295  decay_string: str,
296  prefix: Optional[Union[str, List[str]]] = None,
297  *,
298  use_names=True,
299  always_include_indices=False,
300  use_relative_indices=False
301 ) -> List[str]:
302  """
303  The function creates list of aliases for given variables so that they are calculated for
304  particles selected in decay string. That is for each particle selected in
305  the decay string an alias is created to calculate each variable in the
306  ``list_of_variables``.
307 
308  If ``use_names=True`` (the default) then the names of the aliases are assigned as follows:
309 
310  * If names are unambiguous, it's semi-laconic :doc:`DecayString` style: The
311  aliases will be prefixed with the names of all parent particle names
312  separated by underscore. For example given the decay string ``B0 -> [D0 -> ^pi+ K-] pi0``
313  the aliases for the ``pi+` will start with ``D0_pi_`` followed by the
314  variable name.
315 
316  >>> list_of_variables = ['M','p']
317  >>> decay_string = 'B0 -> [D0 -> ^pi+ K-] pi0'
318  >>> create_aliases_for_selected(list_of_variables, decay_string)
319  ['D0_pi_M', 'D0_pi_p']
320  >>> from variables import variables
321  >>> variables.printAliases()
322  [INFO] =========================
323  [INFO] Following aliases exists:
324  [INFO] 'D0_pi_M' --> 'daughter(0,daughter(0,M))'
325  [INFO] 'D0_pi_p' --> 'daughter(0,daughter(0,p))'
326  [INFO] =========================
327 
328 
329  * If names are ambiguous because there are multiple daughters with the same
330  name these particles will be followed by their daughter index. For example
331  given the decay string ``B0 -> [D0 -> ^pi+:1 ^pi-:2 ^pi0:3 ] ^pi0:4``
332  will create aliases with the following prefixes for particle with the
333  corresponding number as list name:
334 
335  1. ``D0_pi_0_``
336  2. ``D0_pi_1_``
337  3. ``D0_pi0_``
338  4. ``pi0_``
339 
340  >>> list_of_variables = ['M','p']
341  >>> decay_string = 'B0 -> [D0 -> ^pi+ ^pi- ^pi0] ^pi0'
342  >>> create_aliases_for_selected(list_of_variables, decay_string)
343  ['D0_pi_0_M', 'D0_pi_0_p', 'D0_pi_1_M', 'D0_pi_1_p',
344  'D0_pi0_M', 'D0_pi0_p', 'pi0_M', 'pi0_p']
345  >>> from variables import variables
346  >>> variables.printAliases()
347  [INFO] =========================
348  [INFO] Following aliases exists:
349  [INFO] 'D0_pi0_M' --> 'daughter(0,daughter(2,M))'
350  [INFO] 'D0_pi0_p' --> 'daughter(0,daughter(2,p))'
351  [INFO] 'D0_pi_0_M' --> 'daughter(0,daughter(0,M))'
352  [INFO] 'D0_pi_0_p' --> 'daughter(0,daughter(0,p))'
353  [INFO] 'D0_pi_1_M' --> 'daughter(0,daughter(1,M))'
354  [INFO] 'D0_pi_1_p' --> 'daughter(0,daughter(1,p))'
355  [INFO] 'D0_pi_M' --> 'daughter(0,daughter(0,M))'
356  [INFO] 'D0_pi_p' --> 'daughter(0,daughter(0,p))'
357  [INFO] 'pi0_M' --> 'daughter(1,M)'
358  [INFO] 'pi0_p' --> 'daughter(1,p)'
359  [INFO] =========================
360 
361  * The user can select to always include the index even for unambiguous
362  particles by passing ``always_include_indices=True``
363 
364  * The user can choose two different numbering schemes: If
365  ``use_relative_indices=False`` the original decay string indices will be
366  used if a index is added to a particle name.
367 
368  But if ``use_relative_indices=True`` the indices will just start at zero for each
369  particle which is part of the prefixes. For example for ``B0-> e+ ^e-``
370 
371  >>> create_aliases_for_selected(['M'], 'B0-> mu+ e- ^e+ ^e-', use_relative_indices=False)
372  ['e_2_M', 'e_3_M']
373  >>> create_aliases_for_selected(['M'], 'B0-> mu+ e- ^e+ ^e-', use_relative_indices=True)
374  ['e_0_M', 'e_1_M']
375 
376  If ``use_names=False`` the aliases will just start with the daughter indices
377  of all parent particles prefixed with a ``d`` and separated by underscore. So
378  for the previous example ``B0 -> [D0 -> ^pi+:1 ^pi-:2 ^pi0:3 ] ^pi0:4``
379  this would result in aliases starting with
380 
381  1. ``d0_d0_``
382  2. ``d0_d1_``
383  3. ``d0_d2_``
384  4. ``d1_``
385 
386  In this case the ``always_include_indices`` and ``use_relative_indices``
387  arguments are ignored.
388 
389  The naming can be modified by providing a custom prefix for each selected
390  particle. In this case the parameter ``prefix`` needs to be either a simple
391  string if only one particle is selected or a list of strings with one
392  prefix for each selected particle.
393 
394  >>> list_of_variables = ['M','p']
395  >>> decay_string = 'B0 -> [D0 -> ^pi+ ^pi- pi0] pi0'
396  >>> create_aliases_for_selected(list_of_variables, decay_string, prefix=['pip', 'pim'])
397  ['pip_M', 'pip_p', 'pim_M', 'pim_p']
398  >>> from variables import variables
399  >>> variables.printAliases()
400  [INFO] =========================
401  [INFO] Following aliases exists:
402  [INFO] 'pim_M' --> 'daughter(0,daughter(1,M))'
403  [INFO] 'pim_p' --> 'daughter(0,daughter(1,p))'
404  [INFO] 'pip_M' --> 'daughter(0,daughter(0,M))'
405  [INFO] 'pip_p' --> 'daughter(0,daughter(0,p))'
406  [INFO] =========================
407 
408  If the mother particle itself is selected the input list of variables will
409  also be added to the returned list of created aliases. If custom prefixes
410  are supplied then aliases will be created for the mother particle as well:
411 
412  >>> create_aliases_for_selected(['M', 'p'], '^B0 -> pi+ ^pi-')
413  ['M', 'p', 'pi_M', 'pi_p']
414  >>> create_aliases_for_selected(['M', 'p'], '^B0 -> pi+ ^pi-', prefix=['MyB', 'MyPi'])
415  ['MyB_M', 'MyB_p', 'MyPi_M', 'MyPi_p']
416 
417  Parameters:
418  list_of_variables (list(str)): list of variable names
419  decay_string (str): Decay string with selected particles
420  prefix (str, list(str)): Custom prefix for all selected particles
421  use_names (bool): Include the names of the particles in the aliases
422  always_include_indices (bool): If ``use_names=True`` include the decay
423  index of the particles in the alias name even if the particle could
424  be uniquely identified without them.
425  use_relative_indices (bool): If ``use_names=True`` use a relative
426  indicing which always starts at 0 for each particle appearing in
427  the alias names independent of the absolute position in the decay
428  string
429 
430  Returns:
431  list(str): new variables list
432  """
433 
434  selected_particles = DecayParticleNode.build(decay_string)
435  prefixes = selected_particles.get_prefixes(always_include_indices, use_relative_indices)
436  # transpose -> convert [(prefix, path), (prefix, path) ...] into
437  # (prefix, prefix...), (path, path, ...)
438  prefixes, paths = zip(*prefixes)
439 
440  alias_list = []
441  # in most cases we don't want to add daughter indices `dM_dN` to the
442  # aliases
443  include_indices = False
444  # but we might have custom prefixes
445  if prefix is not None:
446  # check custom prefix to be a list
447  if isinstance(prefix, str):
448  prefix = [prefix]
449  # and make sure we have the correct amount
450  if len(prefix) != len(prefixes):
451  raise ValueError("Number of selected particles does not match number of supplied custom prefixes")
452  # final check: make sure we don't have duplicate prefixes in here
453  prefix_counts = collections.Counter(prefix)
454  if max(prefix_counts.values()) > 1:
455  raise ValueError("Prefixes need to be unique")
456  # ok, just override calculated prefixes
457  prefixes = prefix
458  elif not use_names:
459  # otherwise, if we don't use names we just override the prefixes
460  # containing the names to be empty and make sure we include the
461  # daughter indices
462  prefixes = [""] * len(prefixes)
463  include_indices = True
464 
465  for prefix, path in zip(prefixes, paths):
466  if path is None:
467  # mother particle selected, just create custom aliases which
468  # don't do anything special if the prefix is not empty
469  if prefix:
470  alias_list += create_aliases(list_of_variables, "{variable}", prefix)
471  else:
472  # but if prefix is empty just return the variables
473  alias_list += list_of_variables
474  else:
475  # mother particle selected, just create custom aliases
476  alias_list += create_daughter_aliases(list_of_variables, path, prefix, include_indices)
477 
478  return alias_list
479 
480 
481 def create_mctruth_aliases(
482  list_of_variables: Iterable[str],
483  prefix="mc"
484 ) -> List[str]:
485  """
486  The function wraps variables from the list with 'matchedMC()'.
487 
488  >>> list_of_variables = ['M','p']
489  >>> create_mctruth_aliases(list_of_variables)
490  ['mc_M', 'mc_p']
491  >>> from variables import variables
492  >>> variables.printAliases()
493  [INFO] =========================
494  [INFO] Following aliases exists:
495  [INFO] 'mc_M' --> 'matchedMC(M)'
496  [INFO] 'mc_p' --> 'matchedMC(p)'
497  [INFO] =========================
498 
499 
500  Parameters:
501  list_of_variables (list(str)): list of variable names
502 
503  Returns:
504  list(str): list of created aliases
505  """
506  return create_aliases(list_of_variables, 'matchedMC({variable})', prefix)
507 
508 
509 def add_collection(list_of_variables: Iterable[str], collection_name: str) -> str:
510  """
511  The function creates variable collection from the given list of variables
512  It wraps the `VariableManager.addCollection` method which is not particularly user-friendly.
513 
514  Example:
515 
516  Defining the collection
517  >>> variables.utils.add_collection(['p','E'], "my_collection")
518 
519  Passing it as an argument to variablesToNtuple
520  >>> modularAnalysis.variablesToNtuple(variables=['my_collection'], ...)
521 
522  Parameters:
523  list_of_variables (list(str)): list of variable names
524  collection_name (str): name of the collection
525 
526  Returns:
527  str: name of the variable collection
528  """
529 
530  _variablemanager.addCollection(collection_name, _std_vector(*tuple(list_of_variables)))
531  return collection_name
532 
533 
534 def create_isSignal_alias(aliasName, flags):
535  """
536  Make a `VariableManager` alias for a customized :b2:var:`isSignal`, which accepts specified mc match errors.
537 
538  .. seealso:: see :doc:`MCMatching` for a definition of the mc match error flags.
539 
540  The following code defines a new variable ``isSignalAcceptMissingGammaAndMissingNeutrino``, which is same
541  as :b2:var:`isSignal`, but also accepts missing gamma and missing neutrino
542 
543  >>> create_isSignal_alias("isSignalAcceptMissingGammaAndMissingNeutrino", [16, 8])
544 
545  Logically, this
546  ``isSignalAcceptMissingGammaAndMissingNeutrino`` =
547  :b2:var:`isSignalAcceptMissingGamma` || :b2:var:`isSignalAcceptMissingNeutrino`.
548 
549  In the example above, create_isSignal_alias() creates ``isSignalAcceptMissingGammaAndMissingNeutrino`` by
550  unmasking (setting bits to zero)
551  the ``c_MissGamma`` bit (16 or 0b00010000) and ``c_MissNeutrino`` bit (8 or 0b00001000) in mcErrors.
552 
553  For more information, please check this `example script <https://stash.desy.de/projects/B2/repos/software/
554  browse/analysis/examples/VariableManager/isSignalAcceptFlags.py>`_.
555 
556  Parameters:
557  aliasName (str): the name of the alias to be set
558  flags (list(int)): a list of the bits to unmask
559  """
560 
561  mask = 0
562  for flag in flags:
563  if isinstance(flag, int):
564  mask |= flag
565  else:
566  informationString = "The type of input flags of create_isSignal_alias() should be integer."
567  informationString += "Now one of the input flags is " + str(int) + " ."
568  raise ValueError(informationString)
569 
570  _variablemanager.addAlias(aliasName, "passesCut(unmask(mcErrors, %d) == %d)" % (mask, 0))
variables.utils.DecayParticleNode.__walk
def __walk(self, always_include_indices, use_relative_indices, current_prefix, current_path)
Definition: utils.py:212
variables.utils.DecayParticleNode.name
name
name of the particle
Definition: utils.py:175
variables.utils.DecayParticleNode
Definition: utils.py:165
variables.utils.DecayParticleNode.__init__
def __init__(self, name)
Definition: utils.py:172
variables.utils.DecayParticleNode.selected
selected
whether or not this particle is selected
Definition: utils.py:177
Belle2::DecayDescriptor
The DecayDescriptor stores information about a decay tree or parts of a decay tree.
Definition: DecayDescriptor.h:43
variables.utils.DecayParticleNode.get_prefixes
def get_prefixes(self, always_include_indices=False, use_relative_indices=False)
Definition: utils.py:181
variables.utils.DecayParticleNode.build
def build(cls, decay_string)
Definition: utils.py:261
variables.utils.DecayParticleNode.children
children
mapping of children decayIndex->Node
Definition: utils.py:179