Belle II Software development
utils.py
1
8
9import functools
10import collections
11import re
12import variables
13from typing import Iterable, Union, List, Tuple, Optional
14
15
16def create_aliases(list_of_variables: Iterable[str], wrapper: str, prefix="") -> List[str]:
17 """
18 The function creates aliases for variables from the variables list with given wrapper
19 and returns list of the aliases.
20
21 If the variables in the list have arguments (like ``useLabFrame(p)``) all
22 non-alphanumeric characters in the variable will be replaced by underscores
23 (for example ``useLabFrame_x``) for the alias name.
24
25 >>> list_of_variables = ['M','p','matchedMC(useLabFrame(px))']
26 >>> wrapper = 'daughter(1,{variable})'
27 >>> prefix = 'pref'
28 >>> print(create_aliases(list_of_variables, wrapper, prefix))
29 ['pref_M', 'pref_p', 'pref_matchedMC_useLabFrame_px']
30 >>> from variables import variables
31 >>> variables.printAliases()
32 [INFO] =====================================
33 [INFO] The following aliases are registered:
34 [INFO] pref_M --> daughter(1,M)
35 [INFO] pref_matchedMC_useLabFrame_px --> daughter(1,matchedMC(useLabFrame(px)))
36 [INFO] pref_p --> daughter(1,p)
37 [INFO] =====================================
38
39 Parameters:
40 list_of_variables (list(str)): list of variable names
41 wrapper (str): metafunction taking variables from list_of_variables as a parameter \
42 (``<metafunction>(<some configs>, {variable} ,<some other configs>)``
43 prefix (str): alias prefix used for wrapped variables.
44
45 Returns:
46 list(str): new variables list
47 """
48 replacement = re.compile('[^a-zA-Z0-9]+')
49 aliases = []
50 for var in list_of_variables:
51 # replace all non-safe characters for alias name with _ (but remove from the end)
52 safe = replacement.sub("_", var).strip("_")
53 aliases.append(f"{prefix}_{safe}" if prefix else f"{safe}")
54 variables.variables.addAlias(aliases[-1], wrapper.format(variable=var))
55
56 return aliases
57
58
59def get_hierarchy_of_decay(decay_string: str) -> List[List[Tuple[int, str]]]:
60 """
61 This function returns paths of the particles selected in decay string. For
62 each selected particle return a list of (index, name) tuples which indicate
63 which daughter index to choose to arrive at the selected particle.
64
65 For example for the decay string ``B+ -> [ D+ -> pi0 ^K+ ] pi0`` the
66 resulting path for the K+ would be ``[(0, 'D'), (1, 'K')]``: The K is the
67 second daughter of the first daughter of the B+
68
69 >>> get_hierarchy_of_decay('B+ -> [ D+ -> ^K+ pi0 ] pi0')
70 [[(0, 'D'), (0, 'K')]]
71
72 Every selected particle has its own path so if multiple particles are
73 collected a list of paths is returned
74
75 >>> get_hierarchy_of_decay('B+ -> [ D+ -> ^K+ pi0 ] ^pi0')
76 [[(0, 'D'), (0, 'K')], [(1, 'pi0')]]
77
78 If the mother particle is selected an empty list will be returned as its path
79
80 >>> get_hierarchy_of_decay('^B+ -> ^pi+ pi-')
81 [[], [(0, 'pi')]
82
83 Parameters:
84 decay_string (str): Decay string with selected particles
85
86 Returns:
87 list(list(tuple(int, str))): list of hierarchies of selected particles.
88 """
89 from ROOT import Belle2
91 if not d.init(decay_string):
92 raise ValueError("Invalid decay string")
93
94 selected_particles = []
95 for path in d.getHierarchyOfSelected():
96 selected_particles.append([tuple(e) for e in path[1:]])
97 return selected_particles
98
99
100def create_daughter_aliases(
101 list_of_variables: Iterable[str],
102 indices: Union[int, Iterable[int]],
103 prefix="", include_indices=True
104) -> List[str]:
105 """Create Aliases for all variables for a given daughter hierarchy
106
107 Arguments:
108 list_of_variables (list(str)): list of variables to create aliases for
109 indices (int or list(int)): index of the daughter, grand-daughter, grand-grand-daughter,
110 and so forth
111 prefix (str): optional prefix to prepend to the aliases
112 include_indices(bool): if set to True (default) the aliases will contain
113 the daughter indices as dX_dY_dZ...
114
115 Returns:
116 list(str): new variables list
117
118 * create aliases for the second daughter as "d1_E", "d1_M" (daughters start at 0)
119
120 >>> create_daughter_aliases(["E", "m"], 1)
121 ['d1_E', 'd1_m']
122 >>> from variables import variables
123 >>> variables.printAliases()
124 [INFO] =========================
125 [INFO] Following aliases exists:
126 [INFO] 'd1_E' --> 'daughter(1,E)'
127 [INFO] 'd1_m' --> 'daughter(1,m)'
128 [INFO] =========================
129
130
131 * create aliases for the first grand daughter of the second daughter,
132 starting with "my" and without including the indices, resulting in "my_E", "my_m"
133
134 >>> create_daughter_aliases(["E", "m"], [1, 0], prefix="my", include_indices=False)
135 ['my_E', 'my_m']
136 >>> from variables import variables
137 >>> variables.printAliases()
138 [INFO] =========================
139 [INFO] Following aliases exists:
140 [INFO] 'my_E' --> 'daughter(1,daughter(0,E))'
141 [INFO] 'my_m' --> 'daughter(1,daughter(0,m))'
142 [INFO] =========================
143
144 * create aliases for the second grand grand daughter of the third grand
145 daughter of the fifth daughter, starting with my and including the
146 indices, resulting in "my_d4_d2_d1_E", "my_d4_d2_d1_m"
147
148 >>> create_daughter_aliases(["E", "m"], [4, 2, 1], prefix="my")
149 ['my_d4_d2_d1_E', 'my_d4_d2_d1_m']
150 >>> from variables import variables
151 >>> variables.printAliases()
152 [INFO] =========================
153 [INFO] Following aliases exists:
154 [INFO] 'my_d4_d2_d1_E' --> 'daughter(4,daughter(2,daughter(1,E))'
155 [INFO] 'my_d4_d2_d1_m' --> 'daughter(4,daughter(2,daughter(1,m))'
156 [INFO] =========================
157
158 """
159
160 if not isinstance(indices, collections.abc.Iterable):
161 indices = [indices]
162
163 if include_indices:
164 prefix = functools.reduce(lambda x, y: f"{x}_d{y}", indices, prefix).lstrip("_")
165
166 template = functools.reduce(lambda x, y: f"daughter({y},{x})", reversed(indices), "{variable}")
167 return create_aliases(list_of_variables, template, prefix)
168
169
171 """
172 Class to present selected particles from a DecayString as tree structure.
173 For each node of the tree we safe the name of the particle, whether it is
174 selected and a dictionary of all children (as mapping decayIndex -> Node)
175 """
176
177 def __init__(self, name):
178 """Just set default values"""
179
180 self.name = name
181
182 self.selected = False
183
184 self.children = {}
185
186 def get_prefixes(self, always_include_indices=False, use_relative_indices=False):
187 """
188 Recursively walk through the tree of selected particles and return a list
189 of prefixes for aliases and a tuple of decay indexes for that prefix.
190
191 For example for ``B0 -> [D0 -> ^pi+] ^pi0`` it might return
192
193 >>> DecayParticleNode.build('^B0 -> [D0 -> ^pi+] ^pi0').get_prefixes()
194 [ ("", None), ("D0_pi", (0, 0)), ("pi0", (1,)) ]
195
196 and to create aliases from these one would use the indices as arguments for
197 the b2:var:`daughter` meta variable.
198
199 This function will make sure that prefix names are unique: If there are
200 multiple siblings of one node with the same particle name they will be
201 distinguished by either suffixing them with the decay index (if
202 ``use_relative_indices=False``) or they will just be enumerated
203 starting at 0 otherwise.
204
205 Arguments:
206 always_include_indices (bool): If True always add the index of the
207 particle to the prefix, otherwise the index is only added if
208 more than one sibling of the same particle exist.
209 use_relative_indices (bool): If True the indices used will **not**
210 be the daughter indices in the full decay string but just the
211 relative indices: If multiple sibling particles with the same
212 name they will be just numbered starting at zero as they appear
213 in the aliases.
214 """
215 return self.__walk(always_include_indices, use_relative_indices, "", tuple())
216
217 def __walk(self, always_include_indices, use_relative_indices, current_prefix, current_path):
218 """Recursively walk the tree and collect all prefixes
219
220 See:
221 `get_prefixes`
222
223 Arguments:
224 always_include_indices (bool): see `get_prefixes()`
225 use_relative_indices (bool): see `get_prefixes()`
226 current_prefix: the current prefix so far collected from any parent
227 particle.
228 current_path: the current path of indices so far collected from any
229 parent particle.
230 """
231
232 result = []
233 # are we the mother particle and selected selected? if so, add a "no-prefix" to the output
234 if not current_path and self.selected:
235 result.append(("", None))
236
237 # count the particle names of all daughters so that we know which ones we
238 # have to index
239 names = collections.Counter(e.name for e in self.children.values())
240 # if we use relative indices start counting them at zero
241 relative_indices = collections.defaultdict(int)
242
243 # now loop over all children
244 for index, c in sorted(self.children.items()):
245 # prepare the full index path
246 full_path = current_path + (index,)
247 # and prepare the prefix
248 prefix = current_prefix + c.name
249 # is this particle name ambiguous or are all indices requested? add index
250 if always_include_indices or names[c.name] > 1:
251 prefix += f"_{relative_indices[c.name] if use_relative_indices else index}"
252 # always increase the relative indices
253 relative_indices[c.name] += 1
254
255 # if the particle is selected add the prefix and the path
256 if c.selected:
257 result.append((prefix, full_path))
258
259 # but in any case also process all children recursively
260 result += c.__walk(always_include_indices, use_relative_indices, prefix + "_", full_path)
261
262 # done, return all prefixes and their paths
263 return result
264
265 @classmethod
266 def build(cls, decay_string):
267 """Build a tree of selected particles from a `DecayString`
268
269 This will return a `DecayParticleNode` instance which is the top of a
270 tree of all the selected particles from the decat string.
271
272 Arguments:
273 decay_string (str): `DecayString` containing at least one selected particle
274 """
275 selected = get_hierarchy_of_decay(decay_string)
276 if not selected:
277 raise ValueError("No particle selected in decay string")
278 # create the top of the tree
279 top = cls("")
280 # now loop over all selected particles
281 for path in selected:
282 current = top
283 # and walk through the path
284 for index, name in path:
285 # creating tree children as needed
286 if index not in current.children:
287 current.children[index] = cls(name)
288 # and update the pointer
289 current = current.children[index]
290 # after walking the tree the pointer is at the selected particle so
291 # just set the selected to True
292 current.selected = True
293
294 # done, return the tree
295 return top
296
297
298def create_aliases_for_selected(
299 list_of_variables: List[str],
300 decay_string: str,
301 prefix: Optional[Union[str, List[str]]] = None,
302 *,
303 use_names=True,
304 always_include_indices=False,
305 use_relative_indices=False
306) -> List[str]:
307 """
308 The function creates list of aliases for given variables so that they are calculated for
309 particles selected in decay string. That is for each particle selected in
310 the decay string an alias is created to calculate each variable in the
311 ``list_of_variables``.
312
313 If ``use_names=True`` (the default) then the names of the aliases are assigned as follows:
314
315 * If names are unambiguous, it's semi-laconic :doc:`DecayString` style: The
316 aliases will be prefixed with the names of all parent particle names
317 separated by underscore. For example given the decay string ``B0 -> [D0 -> ^pi+ K-] pi0``
318 the aliases for the ``pi+` will start with ``D0_pi_`` followed by the
319 variable name.
320
321 >>> list_of_variables = ['M','p']
322 >>> decay_string = 'B0 -> [D0 -> ^pi+ K-] pi0'
323 >>> create_aliases_for_selected(list_of_variables, decay_string)
324 ['D0_pi_M', 'D0_pi_p']
325 >>> from variables import variables
326 >>> variables.printAliases()
327 [INFO] =========================
328 [INFO] Following aliases exists:
329 [INFO] 'D0_pi_M' --> 'daughter(0,daughter(0,M))'
330 [INFO] 'D0_pi_p' --> 'daughter(0,daughter(0,p))'
331 [INFO] =========================
332
333
334 * If names are ambiguous because there are multiple daughters with the same
335 name these particles will be followed by their daughter index. For example
336 given the decay string ``B0 -> [D0 -> ^pi+:1 ^pi-:2 ^pi0:3 ] ^pi0:4``
337 will create aliases with the following prefixes for particle with the
338 corresponding number as list name:
339
340 1. ``D0_pi_0_``
341 2. ``D0_pi_1_``
342 3. ``D0_pi0_``
343 4. ``pi0_``
344
345 >>> list_of_variables = ['M','p']
346 >>> decay_string = 'B0 -> [D0 -> ^pi+ ^pi- ^pi0] ^pi0'
347 >>> create_aliases_for_selected(list_of_variables, decay_string)
348 ['D0_pi_0_M', 'D0_pi_0_p', 'D0_pi_1_M', 'D0_pi_1_p',
349 'D0_pi0_M', 'D0_pi0_p', 'pi0_M', 'pi0_p']
350 >>> from variables import variables
351 >>> variables.printAliases()
352 [INFO] =========================
353 [INFO] Following aliases exists:
354 [INFO] 'D0_pi0_M' --> 'daughter(0,daughter(2,M))'
355 [INFO] 'D0_pi0_p' --> 'daughter(0,daughter(2,p))'
356 [INFO] 'D0_pi_0_M' --> 'daughter(0,daughter(0,M))'
357 [INFO] 'D0_pi_0_p' --> 'daughter(0,daughter(0,p))'
358 [INFO] 'D0_pi_1_M' --> 'daughter(0,daughter(1,M))'
359 [INFO] 'D0_pi_1_p' --> 'daughter(0,daughter(1,p))'
360 [INFO] 'D0_pi_M' --> 'daughter(0,daughter(0,M))'
361 [INFO] 'D0_pi_p' --> 'daughter(0,daughter(0,p))'
362 [INFO] 'pi0_M' --> 'daughter(1,M)'
363 [INFO] 'pi0_p' --> 'daughter(1,p)'
364 [INFO] =========================
365
366 * The user can select to always include the index even for unambiguous
367 particles by passing ``always_include_indices=True``
368
369 * The user can choose two different numbering schemes: If
370 ``use_relative_indices=False`` the original decay string indices will be
371 used if a index is added to a particle name.
372
373 But if ``use_relative_indices=True`` the indices will just start at zero for each
374 particle which is part of the prefixes. For example for ``B0-> e+ ^e-``
375
376 >>> create_aliases_for_selected(['M'], 'B0-> mu+ e- ^e+ ^e-', use_relative_indices=False)
377 ['e_2_M', 'e_3_M']
378 >>> create_aliases_for_selected(['M'], 'B0-> mu+ e- ^e+ ^e-', use_relative_indices=True)
379 ['e_0_M', 'e_1_M']
380
381 If ``use_names=False`` the aliases will just start with the daughter indices
382 of all parent particles prefixed with a ``d`` and separated by underscore. So
383 for the previous example ``B0 -> [D0 -> ^pi+:1 ^pi-:2 ^pi0:3 ] ^pi0:4``
384 this would result in aliases starting with
385
386 1. ``d0_d0_``
387 2. ``d0_d1_``
388 3. ``d0_d2_``
389 4. ``d1_``
390
391 In this case the ``always_include_indices`` and ``use_relative_indices``
392 arguments are ignored.
393
394 The naming can be modified by providing a custom prefix for each selected
395 particle. In this case the parameter ``prefix`` needs to be either a simple
396 string if only one particle is selected or a list of strings with one
397 prefix for each selected particle.
398
399 >>> list_of_variables = ['M','p']
400 >>> decay_string = 'B0 -> [D0 -> ^pi+ ^pi- pi0] pi0'
401 >>> create_aliases_for_selected(list_of_variables, decay_string, prefix=['pip', 'pim'])
402 ['pip_M', 'pip_p', 'pim_M', 'pim_p']
403 >>> from variables import variables
404 >>> variables.printAliases()
405 [INFO] =========================
406 [INFO] Following aliases exists:
407 [INFO] 'pim_M' --> 'daughter(0,daughter(1,M))'
408 [INFO] 'pim_p' --> 'daughter(0,daughter(1,p))'
409 [INFO] 'pip_M' --> 'daughter(0,daughter(0,M))'
410 [INFO] 'pip_p' --> 'daughter(0,daughter(0,p))'
411 [INFO] =========================
412
413 If the mother particle itself is selected the input list of variables will
414 also be added to the returned list of created aliases. If custom prefixes
415 are supplied then aliases will be created for the mother particle as well:
416
417 >>> create_aliases_for_selected(['M', 'p'], '^B0 -> pi+ ^pi-')
418 ['M', 'p', 'pi_M', 'pi_p']
419 >>> create_aliases_for_selected(['M', 'p'], '^B0 -> pi+ ^pi-', prefix=['MyB', 'MyPi'])
420 ['MyB_M', 'MyB_p', 'MyPi_M', 'MyPi_p']
421
422 Parameters:
423 list_of_variables (list(str)): list of variable names
424 decay_string (str): Decay string with selected particles
425 prefix (str, list(str)): Custom prefix for all selected particles
426 use_names (bool): Include the names of the particles in the aliases
427 always_include_indices (bool): If ``use_names=True`` include the decay
428 index of the particles in the alias name even if the particle could
429 be uniquely identified without them.
430 use_relative_indices (bool): If ``use_names=True`` use a relative
431 indicing which always starts at 0 for each particle appearing in
432 the alias names independent of the absolute position in the decay
433 string
434
435 Returns:
436 list(str): new variables list
437 """
438
439 selected_particles = DecayParticleNode.build(decay_string)
440 prefixes = selected_particles.get_prefixes(always_include_indices, use_relative_indices)
441 # transpose -> convert [(prefix, path), (prefix, path) ...] into
442 # (prefix, prefix...), (path, path, ...)
443 prefixes, paths = zip(*prefixes)
444
445 alias_list = []
446 # in most cases we don't want to add daughter indices `dM_dN` to the
447 # aliases
448 include_indices = False
449 # but we might have custom prefixes
450 if prefix is not None:
451 # check custom prefix to be a list
452 if isinstance(prefix, str):
453 prefix = [prefix]
454 # and make sure we have the correct amount
455 if len(prefix) != len(prefixes):
456 raise ValueError("Number of selected particles does not match number of supplied custom prefixes")
457 # final check: make sure we don't have duplicate prefixes in here
458 prefix_counts = collections.Counter(prefix)
459 if max(prefix_counts.values()) > 1:
460 raise ValueError("Prefixes need to be unique")
461 # ok, just override calculated prefixes
462 prefixes = prefix
463 elif not use_names:
464 # otherwise, if we don't use names we just override the prefixes
465 # containing the names to be empty and make sure we include the
466 # daughter indices
467 prefixes = [""] * len(prefixes)
468 include_indices = True
469
470 for prefix, path in zip(prefixes, paths):
471 if path is None:
472 # mother particle selected, just create custom aliases which
473 # don't do anything special if the prefix is not empty
474 if prefix:
475 alias_list += create_aliases(list_of_variables, "{variable}", prefix)
476 else:
477 # but if prefix is empty just return the variables
478 alias_list += list_of_variables
479 else:
480 # mother particle selected, just create custom aliases
481 alias_list += create_daughter_aliases(list_of_variables, path, prefix, include_indices)
482
483 return alias_list
484
485
486def create_mctruth_aliases(
487 list_of_variables: Iterable[str],
488 prefix="mc"
489) -> List[str]:
490 """
491 The function wraps variables from the list with 'matchedMC()'.
492
493 >>> list_of_variables = ['M','p']
494 >>> create_mctruth_aliases(list_of_variables)
495 ['mc_M', 'mc_p']
496 >>> from variables import variables
497 >>> variables.printAliases()
498 [INFO] =========================
499 [INFO] Following aliases exists:
500 [INFO] 'mc_M' --> 'matchedMC(M)'
501 [INFO] 'mc_p' --> 'matchedMC(p)'
502 [INFO] =========================
503
504
505 Parameters:
506 list_of_variables (list(str)): list of variable names
507
508 Returns:
509 list(str): list of created aliases
510 """
511 return create_aliases(list_of_variables, 'matchedMC({variable})', prefix)
512
513
514def add_collection(list_of_variables: Iterable[str], collection_name: str) -> str:
515 """
516 The function creates variable collection from the given list of variables
517 It wraps the `VariableManager.addCollection` method which is not particularly user-friendly.
518
519 Example:
520
521 Defining the collection
522 >>> variables.utils.add_collection(['p','E'], "my_collection")
523
524 Passing it as an argument to variablesToNtuple
525 >>> modularAnalysis.variablesToNtuple(variables=['my_collection'], ...)
526
527 Parameters:
528 list_of_variables (list(str)): list of variable names
529 collection_name (str): name of the collection
530
531 Returns:
532 str: name of the variable collection
533 """
534
535 variables.variables.addCollection(collection_name, variables.std_vector(*tuple(list_of_variables)))
536 return collection_name
537
538
539def create_isSignal_alias(aliasName, flags):
540 """
541 Make a `VariableManager` alias for a customized :b2:var:`isSignal`, which accepts specified mc match errors.
542
543 .. seealso:: see :doc:`MCMatching` for a definition of the mc match error flags.
544
545 The following code defines a new variable ``isSignalAcceptMissingGammaAndMissingNeutrino``, which is same
546 as :b2:var:`isSignal`, but also accepts missing gamma and missing neutrino
547
548 >>> create_isSignal_alias("isSignalAcceptMissingGammaAndMissingNeutrino", [16, 8])
549
550 Logically, this
551 ``isSignalAcceptMissingGammaAndMissingNeutrino`` =
552 :b2:var:`isSignalAcceptMissingGamma` || :b2:var:`isSignalAcceptMissingNeutrino`.
553
554 In the example above, create_isSignal_alias() creates ``isSignalAcceptMissingGammaAndMissingNeutrino`` by
555 unmasking (setting bits to zero)
556 the ``c_MissGamma`` bit (16 or 0b00010000) and ``c_MissNeutrino`` bit (8 or 0b00001000) in mcErrors.
557
558 For more information, please check this
559 `example script
560 <https://gitlab.desy.de/belle2/software/basf2/-/tree/main/analysis/examples/VariableManager/isSignalAcceptFlags.py>`_.
561
562 Parameters:
563 aliasName (str): the name of the alias to be set
564 flags (list(int)): a list of the bits to unmask
565 """
566
567 mask = 0
568 for flag in flags:
569 if isinstance(flag, int):
570 mask |= flag
571 else:
572 informationString = "The type of input flags of create_isSignal_alias() should be integer."
573 informationString += "Now one of the input flags is " + str(int) + " ."
574 raise ValueError(informationString)
575
576 variables.variables.addAlias(aliasName, f"passesCut(unmask(mcErrors, {int(mask)}) == {int(0)})")
def std_vector(*args)
Definition: __init__.py:135
str add_collection(Iterable[str] list_of_variables, str collection_name)
Definition: utils.py:514
The DecayDescriptor stores information about a decay tree or parts of a decay tree.
children
mapping of children decayIndex->Node
Definition: utils.py:184
selected
whether or not this particle is selected
Definition: utils.py:182
def __walk(self, always_include_indices, use_relative_indices, current_prefix, current_path)
Definition: utils.py:217
def get_prefixes(self, always_include_indices=False, use_relative_indices=False)
Definition: utils.py:186
name
name of the particle
Definition: utils.py:180
def __init__(self, name)
Definition: utils.py:177
def build(cls, decay_string)
Definition: utils.py:266
def variablesToNtuple(decayString, variables, treename='variables', filename='ntuple.root', path=None, basketsize=1600, signalSideParticleList="", filenameSuffix="", useFloat=False, storeEventType=True, ignoreCommandLineOverride=False)