Belle II Software  light-2403-persian
LCASaverModule.py
1 
8 
9 
10 import basf2 as b2
11 import numpy as np
12 import copy
13 from itertools import combinations
14 from variables import variables as vm
15 
16 
17 def get_object_list(pointerVec):
18  """
19  Workaround to avoid memory problems in basf2.
20 
21  Args:
22  pointerVec: Input particle list.
23 
24  Returns:
25  list: Output python list.
26  """
27  from ROOT import Belle2
28 
29  objList = []
30  size = (
31  pointerVec.getListSize()
32  if isinstance(pointerVec, Belle2.ParticleList)
33  else len(pointerVec)
34  )
35  for i in range(size):
36  objList.append(pointerVec[i])
37  return objList
38 
39 
40 def pdg_to_lca_converter(pdg):
41  """
42  Converts PDG code to LCAS classes.
43 
44  .. tip:: If you want to modify the LCAS classes, it's here.
45  Don't forget to update the number of edge classes accordingly in the yaml file.
46 
47  Args:
48  pdg (int): PDG code to convert.
49 
50  Returns:
51  int or None: Corresponding LCAS class, or None if PDG not present in ``pdg_lca_match``.
52  """
53  # MC PDG code
54  pdg_lca_match = {
55  443: 1, # J/psi
56  111: 1, # pi^0
57  310: 2, # K_S^0
58  # 130: 2, #K_L^0
59  421: 3, # D^0
60  411: 3, # D^+
61  431: 3, # D_s^+
62  413: 4, # D^+*
63  423: 4, # D^0*
64  433: 4, # D_s^*+
65  521: 5, # B^+
66  511: 5, # B^0
67  300553: 6, # Upsilon(4S)
68  }
69 
70  pdg = abs(pdg)
71  if pdg in pdg_lca_match:
72  return pdg_lca_match[pdg]
73  else:
74  return None
75 
76 
77 def _update_levels(levels, hist, pdg):
78  """
79  Assigns LCAS level to each particle in the decay tree.
80 
81  Arguments are automatically set.
82  """
83  for i, n in enumerate(hist):
84  if n in levels.keys():
85  continue
86 
87  lca = pdg_to_lca_converter(pdg[n])
88  if lca:
89  levels[n] = lca
90  else:
91  for j in range(i + 1):
92  lca = pdg_to_lca_converter(pdg[hist[i - j]])
93  if lca:
94  levels[n] = lca
95  break
96 
97  return levels
98 
99 
100 def write_hist(
101  particle,
102  leaf_hist={},
103  levels={},
104  hist=[],
105  pdg={},
106  leaf_pdg={},
107  semilep_flag=False,
108 ):
109  """
110  Recursive function to traverse down to the leaves saving the history.
111 
112  Args:
113  particle: The current particle being inspected.
114  Other arguments are automatically set.
115  """
116 
117  neutrino_pdgs = [12, 14, 16, 18]
118 
119  # Check if there's any neutrinos in the tree and set semileptonic flag
120  # Need to only include primary particle since they're what couns in the LCA
121  if (abs(particle.getPDG()) in neutrino_pdgs) and particle.isPrimaryParticle():
122  semilep_flag = True
123 
124  # Need to create a true copy, no just a ref to the object
125  hist = copy.deepcopy(hist)
126  leaf_hist = copy.deepcopy(leaf_hist)
127  leaf_pdg = copy.deepcopy(leaf_pdg)
128  levels = copy.deepcopy(levels)
129  pdg = copy.deepcopy(pdg)
130 
131  # Below is deciding what to save to the LCA matrix
132  # If primary or secondary with no daughters:
133  # save
134  # If primary with no primary daughters:
135  # save and continue down daughters
136  # else:
137  # continue down daughters
138 
139  # Check if we're a primary particle with no primary daughters
140  prim_no_prim_daughters = (
141  len(
142  [d for d in get_object_list(particle.getDaughters()) if d.isPrimaryParticle()]
143  )
144  == 0
145  )
146 
147  # If it's a leaf we save it
148  # Also check that it has no primary daughters
149  if (
150  # particle.isPrimaryParticle() and
151  (particle.getNDaughters() == 0)
152  or prim_no_prim_daughters # Even if saving secondaries we need to record primaries with no primary daughters
153  ):
154  # Won't save neutrinos or very low energy photons to LCA
155  if abs(particle.getPDG()) not in neutrino_pdgs and not (
156  particle.getPDG() == 22 and particle.getEnergy() < 1e-4
157  ):
158  # Leaves get their history added
159  leaf_hist[particle.getArrayIndex()] = hist
160  leaf_pdg[particle.getArrayIndex()] = particle.getPDG()
161 
162  # And now that we have a full history down to the leaf
163  # we can update the levels
164  levels = _update_levels(levels, hist, pdg)
165 
166  # Here is deciding whether to continue traversing down the decay tree
167  # Don't want to do this if all daughters are secondaries since we're not saving them
168  if (
169  particle.getNDaughters() != 0
170  ) and not prim_no_prim_daughters: # Don't travers if all daughters are secondaries
171  # Only append primaries to history
172  if particle.isPrimaryParticle():
173  hist.append(particle.getArrayIndex())
174  # Save all PDG values of the in between primary particles
175  pdg[particle.getArrayIndex()] = particle.getPDG()
176 
177  # Now iterate over daughters passing history down
178  daughters = get_object_list(particle.getDaughters())
179  for daughter in daughters:
180  (
181  leaf_hist,
182  levels,
183  pdg,
184  leaf_pdg,
185  semilep_flag,
186  ) = write_hist(
187  daughter,
188  leaf_hist,
189  levels,
190  hist,
191  pdg,
192  leaf_pdg,
193  semilep_flag,
194  )
195 
196  return (
197  leaf_hist,
198  levels,
199  pdg,
200  leaf_pdg,
201  semilep_flag,
202  )
203 
204 
205 class LCASaverModule(b2.Module):
206  """
207  Save Lowest Common Ancestor matrix of each MC Particle in the given list.
208 
209  Args:
210  particle_lists (list): Name of particle lists to save features of.
211  features (list): List of features to save for each particle.
212  mcparticle_list (str): Name of particle list to build LCAs from (will use as root).
213  output_file (str): Path to output file to save.
214  """
215 
216  def __init__(
217  self,
218  particle_lists,
219  features,
220  mcparticle_list,
221  output_file,
222  ):
223  """
224  Initialization.
225  """
226  super().__init__()
227 
228  self.particle_listsparticle_lists = particle_lists
229 
230  self.featuresfeatures = features
231 
232  self.mcparticle_listmcparticle_list = mcparticle_list
233 
234  self.output_fileoutput_file = output_file
235 
236 
239  self.max_particlesmax_particles = 500
240 
241  def initialize(self):
242  """
243  Called once at the beginning.
244  """
245  from ROOT import Belle2, TFile, TTree
246 
247 
248  self.eventinfoeventinfo = Belle2.PyStoreObj("EventMetaData")
249 
250 
251  self.root_outfileroot_outfile = TFile(self.output_fileoutput_file, "recreate")
252 
253  self.treetree = TTree("Tree", "tree")
254 
255 
256  self.event_numevent_num = np.zeros(1, dtype=np.int32)
257  self.treetree.Branch("event", self.event_numevent_num, "event/I")
258 
259  self.isBisB = np.zeros(1, dtype=bool)
260  self.treetree.Branch("isB", self.isBisB, "isB/b")
261 
262 
263  self.truth_dicttruth_dict = {}
264 
265  # We assume at most two LCA matrices for event
266  for i in [1, 2]:
267  self.truth_dicttruth_dict[f"n_LCA_leaves_{i}"] = np.zeros(1, dtype=np.int32)
268  self.truth_dicttruth_dict[f"LCA_leaves_{i}"] = np.zeros(
269  self.max_particlesmax_particles, dtype=np.int32
270  )
271  self.treetree.Branch(
272  f"n_LCA_leaves_{i}",
273  self.truth_dicttruth_dict[f"n_LCA_leaves_{i}"],
274  f"n_LCA_leaves_{i}/I",
275  )
276  self.treetree.Branch(
277  f"LCA_leaves_{i}",
278  self.truth_dicttruth_dict[f"LCA_leaves_{i}"],
279  f"LCA_leaves_{i}[n_LCA_leaves_{i}]/I",
280  )
281 
282  self.truth_dicttruth_dict[f"n_LCA_{i}"] = np.zeros(1, dtype=np.int32)
283  self.truth_dicttruth_dict[f"LCAS_{i}"] = np.zeros(
284  self.max_particlesmax_particles**2, dtype=np.uint8
285  )
286  self.treetree.Branch(f"n_LCA_{i}", self.truth_dicttruth_dict[f"n_LCA_{i}"], f"n_LCA_{i}/I")
287  self.treetree.Branch(
288  f"LCAS_{i}", self.truth_dicttruth_dict[f"LCAS_{i}"], f"LCAS_{i}[n_LCA_{i}]/b"
289  )
290 
291  # Feature data
292 
293  self.n_particlesn_particles = np.zeros(1, dtype=np.int32)
294  self.treetree.Branch("n_particles", self.n_particlesn_particles, "n_particles/I")
295 
296 
297  self.primaryprimary = np.zeros(self.max_particlesmax_particles, dtype=np.bool)
298  self.treetree.Branch("primary", self.primaryprimary, "primary[n_particles]/O")
299 
300 
301  self.leavesleaves = np.zeros(self.max_particlesmax_particles, dtype=np.int32)
302  self.treetree.Branch("leaves", self.leavesleaves, "leaves[n_particles]/I")
303 
304 
305  self.b_indexb_index = np.zeros(self.max_particlesmax_particles, dtype=np.int32)
306  self.treetree.Branch("b_index", self.b_indexb_index, "b_index[n_particles]/I")
307 
308 
309  self.feat_dictfeat_dict = {}
310  for feat in self.featuresfeatures:
311  self.feat_dictfeat_dict[feat] = np.zeros(self.max_particlesmax_particles, np.float32)
312  self.treetree.Branch(
313  f"feat_{feat}", self.feat_dictfeat_dict[feat], f"feat_{feat}[n_particles]/F"
314  )
315 
316 
317  self.mc_pdgmc_pdg = np.zeros(self.max_particlesmax_particles, np.float32)
318  self.treetree.Branch("mcPDG", self.mc_pdgmc_pdg, "mcPDG[n_particles]/F")
319 
320  def _reset_LCA(self):
321  """
322  Resets the value of the LCA to 0.
323  """
324  for p_index in [1, 2]:
325  self.truth_dicttruth_dict[f"LCAS_{p_index}"][: self.max_particlesmax_particles**2] *= 0
326  self.truth_dicttruth_dict[f"n_LCA_leaves_{p_index}"][0] *= 0
327  self.truth_dicttruth_dict[f"LCA_leaves_{p_index}"][: self.max_particlesmax_particles] *= 0
328  self.truth_dicttruth_dict[f"n_LCA_{p_index}"][0] *= 0
329 
330  def event(self):
331  """
332  Called for each event.
333  """
334  from ROOT.Belle2 import PyStoreObj
335 
336  # Get the particle list (note this is a regular Particle list, not MCParticle)
337  p_list = PyStoreObj(self.mcparticle_listmcparticle_list)
338 
339  self.event_numevent_num[0] = self.eventinfoeventinfo.getEvent()
340 
341  # Is Upsilon flag
342  if self.mcparticle_listmcparticle_list == "Upsilon(4S):MC":
343  self.isBisB[0] = 0
344  elif self.mcparticle_listmcparticle_list == "B0:MC":
345  self.isBisB[0] = 1
346  elif self.mcparticle_listmcparticle_list == "B+:MC":
347  self.isBisB[0] = 2
348 
349  # Create the LCA
350  # IMPORTANT: The ArrayIndex is 0-based.
351  # mcplist contains the root particles we are to create LCAs from
352  # Reset the LCA list so if only one B is there it does not carry an older version over
353  self._reset_LCA_reset_LCA()
354 
355  if p_list.getListSize() > 0:
356  for part in p_list.obj():
357  # Get the corresponding MCParticle
358  mcp = part.getMCParticle()
359  # In this way the LCA variables in the ntuples will be labelled _1 and _2
360  # If we train on B decays these will correspond to the two B's
361  # while if we train on the Upsilon, _1 will correspond to it and _2 will remain empty
362  # becaus getArrayIndex() gives 0 for the Upsilon and 1, 2 for the B's
363  array_index = 1 if self.isBisB[0] == 0 else mcp.getArrayIndex()
364 
365  # Call function to write history of leaves in the tree.
366  # It internally calls function update_levels to find and save the level of each particle in the tree.
367  # Necessary step to build the LCA.
368  (
369  lcas_leaf_hist,
370  lcas_levels,
371  _, _, _,
372  ) = write_hist(
373  particle=mcp,
374  leaf_hist={},
375  levels={},
376  hist=[],
377  pdg={},
378  leaf_pdg={},
379  semilep_flag=False,
380  )
381 
382  lcas = np.zeros([len(lcas_leaf_hist), len(lcas_leaf_hist)])
383 
384  for x, y in combinations(enumerate(lcas_leaf_hist), 2):
385  lcas_intersection = [
386  i for i in lcas_leaf_hist[x[1]] if i in lcas_leaf_hist[y[1]]
387  ]
388  lcas[x[0], y[0]] = lcas_levels[lcas_intersection[-1]]
389  lcas[y[0], x[0]] = lcas_levels[lcas_intersection[-1]]
390 
391  self.truth_dicttruth_dict[f"LCAS_{array_index}"][: lcas.size] = lcas.flatten()
392 
393  self.truth_dicttruth_dict[f"n_LCA_leaves_{array_index}"][0] = len(lcas_leaf_hist.keys())
394  self.truth_dicttruth_dict[f"LCA_leaves_{array_index}"][
395  : len(lcas_leaf_hist.keys())
396  ] = list(lcas_leaf_hist.keys())
397 
398  self.truth_dicttruth_dict[f"n_LCA_{array_index}"][0] = lcas.size
399 
400  # ### Create the features
401  # Where we'll append features
402  evt_feats = {f: [] for f in self.featuresfeatures}
403 
404  # Ideally this would be saved in evt_feat_dict but the -1 for unmatched
405  # particles messes that up
406  evt_leaf_dict = {
407  "leaves": [],
408  "primary": [],
409  "b_index": [],
410  "mc_pdg": [],
411  }
412 
413  # IMPORTANT: The ArrayIndex is 0-based.
414  # mcplist contains the root particles we are to create LCAs from
415  for p_list_name in self.particle_listsparticle_lists:
416  # Get the particle list (note this is a regular Particle list, not MCParticle)
417  p_list = PyStoreObj(p_list_name)
418 
419  for particle in p_list.obj():
420  # Get the B parent index, set to -1 if particle has no MC match
421  b_index = int(vm.evaluate("ancestorBIndex", particle))
422  # Need this to reorder LCA later, returns -1 if no MC match
423  if b_index >= 0:
424  p_index = particle.getMCParticle().getArrayIndex()
425  p_primary = particle.getMCParticle().isPrimaryParticle()
426  # Save particle's PDG code
427  mc_pdg = particle.getMCParticle().getPDG()
428  else:
429  p_index = -1
430  p_primary = False
431  mc_pdg = 0
432 
433  evt_leaf_dict["primary"].append(p_primary)
434  evt_leaf_dict["leaves"].append(p_index)
435  evt_leaf_dict["b_index"].append(b_index)
436  evt_leaf_dict["mc_pdg"].append(mc_pdg)
437  for feat in self.featuresfeatures:
438  evt_feats[feat].append(vm.evaluate(feat, particle))
439 
440  n_particles = len(evt_leaf_dict["primary"])
441  self.n_particlesn_particles[0] = n_particles
442 
443  self.primaryprimary[:n_particles] = evt_leaf_dict["primary"]
444  self.leavesleaves[:n_particles] = evt_leaf_dict["leaves"]
445  self.b_indexb_index[:n_particles] = evt_leaf_dict["b_index"]
446  self.mc_pdgmc_pdg[:n_particles] = evt_leaf_dict["mc_pdg"]
447 
448  for feat in self.featuresfeatures:
449  self.feat_dictfeat_dict[feat][:n_particles] = evt_feats[feat]
450  # Add number of final state particles
451 
452  # Write everything to the TTree
453  self.treetree.Fill()
454 
455  def terminate(self):
456  """
457  Called at the end.
458  """
459  self.root_outfileroot_outfile.Write()
460  self.root_outfileroot_outfile.Close()
ParticleList is a container class that stores a collection of Particle objects.
Definition: ParticleList.h:140
a (simplified) python wrapper for StoreObjPtr.
Definition: PyStoreObj.h:67
primary
Particle-is-primary flag.
isB
bool containing information whether we reconstruct B or Upsilon
max_particles
Max number of particles It doesn't actually matter what this is as long as it's bigger than the numbe...
n_particles
Number of particles.
particle_lists
Input particle lists.
def __init__(self, particle_lists, features, mcparticle_list, output_file)
feat_dict
Features dictionary.
mcparticle_list
MC particle list (Upsilon, B0, B+)