Belle II Software development
LCASaverModule.py
1
8
9
10import basf2 as b2
11import numpy as np
12import copy
13from itertools import combinations
14from variables import variables as vm
15
16
17def get_object_list(pointerVec):
18 """
19 Workaround to avoid memory problems in basf2.
20
21 Args:
22 pointerVec: Input particle list.
23
24 Returns:
25 list: Output python list.
26 """
27 from ROOT import Belle2
28
29 objList = []
30 size = (
31 pointerVec.getListSize()
32 if isinstance(pointerVec, Belle2.ParticleList)
33 else len(pointerVec)
34 )
35 for i in range(size):
36 objList.append(pointerVec[i])
37 return objList
38
39
40def pdg_to_lca_converter(pdg):
41 """
42 Converts PDG code to LCAS classes.
43
44 .. tip:: If you want to modify the LCAS classes, it's here. Don't forget to update the number of edge classes accordingly in the yaml file.
45
46 Args:
47 pdg (int): PDG code to convert.
48
49 Returns:
50 int or None: Corresponding LCAS class, or None if PDG not present in ``pdg_lca_match``.
51 """
52 # MC PDG code
53 pdg_lca_match = {
54 443: 1, # J/psi
55 111: 1, # pi^0
56 310: 2, # K_S^0
57 # 130: 2, #K_L^0
58 421: 3, # D^0
59 411: 3, # D^+
60 431: 3, # D_s^+
61 413: 4, # D^+*
62 423: 4, # D^0*
63 433: 4, # D_s^*+
64 521: 5, # B^+
65 511: 5, # B^0
66 300553: 6, # Upsilon(4S)
67 }
68
69 pdg = abs(pdg)
70 if pdg in pdg_lca_match:
71 return pdg_lca_match[pdg]
72 else:
73 return None
74
75
76def _update_levels(levels, hist, pdg):
77 """
78 Assigns LCAS level to each particle in the decay tree.
79
80 Arguments are automatically set.
81 """
82 for i, n in enumerate(hist):
83 if n in levels.keys():
84 continue
85
86 lca = pdg_to_lca_converter(pdg[n])
87 if lca:
88 levels[n] = lca
89 else:
90 for j in range(i + 1):
91 lca = pdg_to_lca_converter(pdg[hist[i - j]])
92 if lca:
93 levels[n] = lca
94 break
95
96 return levels
97
98
99def write_hist(
100 particle,
101 leaf_hist={},
102 levels={},
103 hist=[],
104 pdg={},
105 leaf_pdg={},
106 semilep_flag=False,
107):
108 """
109 Recursive function to traverse down to the leaves saving the history.
110
111 Args:
112 particle: The current particle being inspected.
113 Other arguments are automatically set.
114 """
115
116 neutrino_pdgs = [12, 14, 16, 18]
117
118 # Check if there's any neutrinos in the tree and set semileptonic flag
119 # Need to only include primary particle since they're what couns in the LCA
120 if (abs(particle.getPDG()) in neutrino_pdgs) and particle.isPrimaryParticle():
121 semilep_flag = True
122
123 # Need to create a true copy, no just a ref to the object
124 hist = copy.deepcopy(hist)
125 leaf_hist = copy.deepcopy(leaf_hist)
126 leaf_pdg = copy.deepcopy(leaf_pdg)
127 levels = copy.deepcopy(levels)
128 pdg = copy.deepcopy(pdg)
129
130 # Below is deciding what to save to the LCA matrix
131 # If primary or secondary with no daughters:
132 # save
133 # If primary with no primary daughters:
134 # save and continue down daughters
135 # else:
136 # continue down daughters
137
138 # Check if we're a primary particle with no primary daughters
139 prim_no_prim_daughters = (
140 len(
141 [d for d in get_object_list(particle.getDaughters()) if d.isPrimaryParticle()]
142 )
143 == 0
144 )
145
146 # If it's a leaf we save it
147 # Also check that it has no primary daughters
148 if (
149 # particle.isPrimaryParticle() and
150 (particle.getNDaughters() == 0)
151 or prim_no_prim_daughters # Even if saving secondaries we need to record primaries with no primary daughters
152 ):
153 # Won't save neutrinos or very low energy photons to LCA
154 if abs(particle.getPDG()) not in neutrino_pdgs and not (
155 particle.getPDG() == 22 and particle.getEnergy() < 1e-4
156 ):
157 # Leaves get their history added
158 leaf_hist[particle.getArrayIndex()] = hist
159 leaf_pdg[particle.getArrayIndex()] = particle.getPDG()
160
161 # And now that we have a full history down to the leaf
162 # we can update the levels
163 levels = _update_levels(levels, hist, pdg)
164
165 # Here is deciding whether to continue traversing down the decay tree
166 # Don't want to do this if all daughters are secondaries since we're not saving them
167 if (
168 particle.getNDaughters() != 0
169 ) and not prim_no_prim_daughters: # Don't travers if all daughters are secondaries
170 # Only append primaries to history
171 if particle.isPrimaryParticle():
172 hist.append(particle.getArrayIndex())
173 # Save all PDG values of the in between primary particles
174 pdg[particle.getArrayIndex()] = particle.getPDG()
175
176 # Now iterate over daughters passing history down
177 daughters = get_object_list(particle.getDaughters())
178 for daughter in daughters:
179 (
180 leaf_hist,
181 levels,
182 pdg,
183 leaf_pdg,
184 semilep_flag,
185 ) = write_hist(
186 daughter,
187 leaf_hist,
188 levels,
189 hist,
190 pdg,
191 leaf_pdg,
192 semilep_flag,
193 )
194
195 return (
196 leaf_hist,
197 levels,
198 pdg,
199 leaf_pdg,
200 semilep_flag,
201 )
202
203
204class LCASaverModule(b2.Module):
205 """
206 Save Lowest Common Ancestor matrix of each MC Particle in the given list.
207
208 Args:
209 particle_lists (list): Name of particle lists to save features of.
210 features (list): List of features to save for each particle.
211 mcparticle_list (str): Name of particle list to build LCAs from (will use as root).
212 output_file (str): Path to output file to save.
213 """
214
215 def __init__(
216 self,
217 particle_lists,
218 features,
219 mcparticle_list,
220 output_file,
221 ):
222 """
223 Initialization.
224 """
225 super().__init__()
226
227 self.particle_lists = particle_lists
229 self.features = features
231 self.mcparticle_list = mcparticle_list
233 self.output_file = output_file
235
238 self.max_particles = 500
240 def initialize(self):
241 """
242 Called once at the beginning.
243 """
244 from ROOT import Belle2, TFile, TTree
245
246
247 self.eventinfo = Belle2.PyStoreObj("EventMetaData")
249
250 self.root_outfile = TFile(self.output_file, "recreate")
252 self.tree = TTree("Tree", "tree")
254
255 self.event_num = np.zeros(1, dtype=np.int32)
256 self.tree.Branch("event", self.event_num, "event/I")
257
258 self.isB = np.zeros(1, dtype=bool)
259 self.tree.Branch("isB", self.isB, "isB/b")
260
261
262 self.truth_dict = {}
264 # We assume at most two LCA matrices for event
265 for i in [1, 2]:
266 self.truth_dict[f"n_LCA_leaves_{i}"] = np.zeros(1, dtype=np.int32)
267 self.truth_dict[f"LCA_leaves_{i}"] = np.zeros(
268 self.max_particles, dtype=np.int32
269 )
270 self.tree.Branch(
271 f"n_LCA_leaves_{i}",
272 self.truth_dict[f"n_LCA_leaves_{i}"],
273 f"n_LCA_leaves_{i}/I",
274 )
275 self.tree.Branch(
276 f"LCA_leaves_{i}",
277 self.truth_dict[f"LCA_leaves_{i}"],
278 f"LCA_leaves_{i}[n_LCA_leaves_{i}]/I",
279 )
280
281 self.truth_dict[f"n_LCA_{i}"] = np.zeros(1, dtype=np.int32)
282 self.truth_dict[f"LCAS_{i}"] = np.zeros(
283 self.max_particles**2, dtype=np.uint8
284 )
285 self.tree.Branch(f"n_LCA_{i}", self.truth_dict[f"n_LCA_{i}"], f"n_LCA_{i}/I")
286 self.tree.Branch(
287 f"LCAS_{i}", self.truth_dict[f"LCAS_{i}"], f"LCAS_{i}[n_LCA_{i}]/b"
288 )
289
290 # Feature data
291
292 self.n_particles = np.zeros(1, dtype=np.int32)
293 self.tree.Branch("n_particles", self.n_particles, "n_particles/I")
294
295
296 self.primary = np.zeros(self.max_particles, dtype=bool)
297 self.tree.Branch("primary", self.primary, "primary[n_particles]/O")
298
299
300 self.leaves = np.zeros(self.max_particles, dtype=np.int32)
301 self.tree.Branch("leaves", self.leaves, "leaves[n_particles]/I")
302
303
304 self.b_index = np.zeros(self.max_particles, dtype=np.int32)
305 self.tree.Branch("b_index", self.b_index, "b_index[n_particles]/I")
306
307
308 self.feat_dict = {}
309 for feat in self.features:
310 self.feat_dict[feat] = np.zeros(self.max_particles, np.float32)
311 self.tree.Branch(
312 f"feat_{feat}", self.feat_dict[feat], f"feat_{feat}[n_particles]/F"
313 )
314
315
316 self.mc_pdg = np.zeros(self.max_particles, np.float32)
317 self.tree.Branch("mcPDG", self.mc_pdg, "mcPDG[n_particles]/F")
318
319 def _reset_LCA(self):
320 """
321 Resets the value of the LCA to 0.
322 """
323 for p_index in [1, 2]:
324 self.truth_dict[f"LCAS_{p_index}"][: self.max_particles**2] *= 0
325 self.truth_dict[f"n_LCA_leaves_{p_index}"][0] *= 0
326 self.truth_dict[f"LCA_leaves_{p_index}"][: self.max_particles] *= 0
327 self.truth_dict[f"n_LCA_{p_index}"][0] *= 0
328
329 def event(self):
330 """
331 Called for each event.
332 """
333 from ROOT import Belle2 # noqa: make Belle2 namespace available
334 from ROOT.Belle2 import PyStoreObj
335
336 # Get the particle list (note this is a regular Particle list, not MCParticle)
337 p_list = PyStoreObj(self.mcparticle_list)
338
339 self.event_num[0] = self.eventinfo.getEvent()
340
341 # Is Upsilon flag
342 if self.mcparticle_list == "Upsilon(4S):MC":
343 self.isB[0] = 0
344 elif self.mcparticle_list == "B0:MC":
345 self.isB[0] = 1
346 elif self.mcparticle_list == "B+:MC":
347 self.isB[0] = 2
348
349 # Create the LCA
350 # IMPORTANT: The ArrayIndex is 0-based.
351 # mcplist contains the root particles we are to create LCAs from
352 # Reset the LCA list so if only one B is there it does not carry an older version over
353 self._reset_LCA()
354
355 if p_list.getListSize() > 0:
356 for part in p_list.obj():
357 # Get the corresponding MCParticle
358 mcp = part.getMCParticle()
359 # In this way the LCA variables in the ntuples will be labelled _1 and _2
360 # If we train on B decays these will correspond to the two B's
361 # while if we train on the Upsilon, _1 will correspond to it and _2 will remain empty
362 # because getArrayIndex() gives 0 for the Upsilon and 1, 2 for the B's
363 array_index = 1 if self.isB[0] == 0 else mcp.getArrayIndex()
364
365 # Call function to write history of leaves in the tree.
366 # It internally calls function update_levels to find and save the level of each particle in the tree.
367 # Necessary step to build the LCA.
368 (
369 lcas_leaf_hist,
370 lcas_levels,
371 _, _, _,
372 ) = write_hist(
373 particle=mcp,
374 leaf_hist={},
375 levels={},
376 hist=[],
377 pdg={},
378 leaf_pdg={},
379 semilep_flag=False,
380 )
381
382 lcas = np.zeros([len(lcas_leaf_hist), len(lcas_leaf_hist)])
383
384 for x, y in combinations(enumerate(lcas_leaf_hist), 2):
385 lcas_intersection = [
386 i for i in lcas_leaf_hist[x[1]] if i in lcas_leaf_hist[y[1]]
387 ]
388 lcas[x[0], y[0]] = lcas_levels[lcas_intersection[-1]]
389 lcas[y[0], x[0]] = lcas_levels[lcas_intersection[-1]]
390
391 self.truth_dict[f"LCAS_{array_index}"][: lcas.size] = lcas.flatten()
392
393 self.truth_dict[f"n_LCA_leaves_{array_index}"][0] = len(lcas_leaf_hist.keys())
394 self.truth_dict[f"LCA_leaves_{array_index}"][
395 : len(lcas_leaf_hist.keys())
396 ] = list(lcas_leaf_hist.keys())
397
398 self.truth_dict[f"n_LCA_{array_index}"][0] = lcas.size
399
400 # ### Create the features
401 # Where we'll append features
402 evt_feats = {f: [] for f in self.features}
403
404 # Ideally this would be saved in evt_feat_dict but the -1 for unmatched
405 # particles messes that up
406 evt_leaf_dict = {
407 "leaves": [],
408 "primary": [],
409 "b_index": [],
410 "mc_pdg": [],
411 }
412
413 # IMPORTANT: The ArrayIndex is 0-based.
414 # mcplist contains the root particles we are to create LCAs from
415 for p_list_name in self.particle_lists:
416 # Get the particle list (note this is a regular Particle list, not MCParticle)
417 p_list = PyStoreObj(p_list_name)
418
419 for particle in p_list.obj():
420 # Get the B parent index, set to -1 if particle has no MC match
421 b_index = int(vm.evaluate("ancestorBIndex", particle))
422 # Need this to reorder LCA later, returns -1 if no MC match
423 if b_index >= 0:
424 p_index = particle.getMCParticle().getArrayIndex()
425 p_primary = particle.getMCParticle().isPrimaryParticle()
426 # Save particle's PDG code
427 mc_pdg = particle.getMCParticle().getPDG()
428 else:
429 p_index = -1
430 p_primary = False
431 mc_pdg = 0
432
433 evt_leaf_dict["primary"].append(p_primary)
434 evt_leaf_dict["leaves"].append(p_index)
435 evt_leaf_dict["b_index"].append(b_index)
436 evt_leaf_dict["mc_pdg"].append(mc_pdg)
437 for feat in self.features:
438 evt_feats[feat].append(vm.evaluate(feat, particle))
439
440 n_particles = len(evt_leaf_dict["primary"])
441 self.n_particles[0] = n_particles
442
443 self.primary[:n_particles] = evt_leaf_dict["primary"]
444 self.leaves[:n_particles] = evt_leaf_dict["leaves"]
445 self.b_index[:n_particles] = evt_leaf_dict["b_index"]
446 self.mc_pdg[:n_particles] = evt_leaf_dict["mc_pdg"]
447
448 for feat in self.features:
449 self.feat_dict[feat][:n_particles] = evt_feats[feat]
450 # Add number of final state particles
451
452 # Write everything to the TTree
453 self.tree.Fill()
454
455 def terminate(self):
456 """
457 Called at the end.
458 """
459 self.root_outfile.Write()
460 self.root_outfile.Close()
461
ParticleList is a container class that stores a collection of Particle objects.
Definition: ParticleList.h:140
a (simplified) python wrapper for StoreObjPtr.
Definition: PyStoreObj.h:67
primary
Particle-is-primary flag.
isB
bool containing information whether we reconstruct B or Upsilon
max_particles
Max number of particles It doesn't actually matter what this is as long as it's bigger than the numbe...
n_particles
Number of particles.
particle_lists
Input particle lists.
def __init__(self, particle_lists, features, mcparticle_list, output_file)
feat_dict
Features dictionary.
mcparticle_list
MC particle list (Upsilon, B0, B+)