Belle II Software development
LCASaverModule.py
1
8
9
10import basf2 as b2
11import numpy as np
12import copy
13from itertools import combinations
14from variables import variables as vm
15
16
17def get_object_list(pointerVec):
18 """
19 Workaround to avoid memory problems in basf2.
20
21 Args:
22 pointerVec: Input particle list.
23
24 Returns:
25 list: Output python list.
26 """
27 from ROOT import Belle2
28
29 objList = []
30 size = (
31 pointerVec.getListSize()
32 if isinstance(pointerVec, Belle2.ParticleList)
33 else len(pointerVec)
34 )
35 for i in range(size):
36 objList.append(pointerVec[i])
37 return objList
38
39
40def pdg_to_lca_converter(pdg):
41 """
42 Converts PDG code to LCAS classes.
43
44 .. tip:: If you want to modify the LCAS classes, it's here.
45 Don't forget to update the number of edge classes accordingly in the yaml file.
46
47 Args:
48 pdg (int): PDG code to convert.
49
50 Returns:
51 int or None: Corresponding LCAS class, or None if PDG not present in ``pdg_lca_match``.
52 """
53 # MC PDG code
54 pdg_lca_match = {
55 443: 1, # J/psi
56 111: 1, # pi^0
57 310: 2, # K_S^0
58 # 130: 2, #K_L^0
59 421: 3, # D^0
60 411: 3, # D^+
61 431: 3, # D_s^+
62 413: 4, # D^+*
63 423: 4, # D^0*
64 433: 4, # D_s^*+
65 521: 5, # B^+
66 511: 5, # B^0
67 300553: 6, # Upsilon(4S)
68 }
69
70 pdg = abs(pdg)
71 if pdg in pdg_lca_match:
72 return pdg_lca_match[pdg]
73 else:
74 return None
75
76
77def _update_levels(levels, hist, pdg):
78 """
79 Assigns LCAS level to each particle in the decay tree.
80
81 Arguments are automatically set.
82 """
83 for i, n in enumerate(hist):
84 if n in levels.keys():
85 continue
86
87 lca = pdg_to_lca_converter(pdg[n])
88 if lca:
89 levels[n] = lca
90 else:
91 for j in range(i + 1):
92 lca = pdg_to_lca_converter(pdg[hist[i - j]])
93 if lca:
94 levels[n] = lca
95 break
96
97 return levels
98
99
100def write_hist(
101 particle,
102 leaf_hist={},
103 levels={},
104 hist=[],
105 pdg={},
106 leaf_pdg={},
107 semilep_flag=False,
108):
109 """
110 Recursive function to traverse down to the leaves saving the history.
111
112 Args:
113 particle: The current particle being inspected.
114 Other arguments are automatically set.
115 """
116
117 neutrino_pdgs = [12, 14, 16, 18]
118
119 # Check if there's any neutrinos in the tree and set semileptonic flag
120 # Need to only include primary particle since they're what couns in the LCA
121 if (abs(particle.getPDG()) in neutrino_pdgs) and particle.isPrimaryParticle():
122 semilep_flag = True
123
124 # Need to create a true copy, no just a ref to the object
125 hist = copy.deepcopy(hist)
126 leaf_hist = copy.deepcopy(leaf_hist)
127 leaf_pdg = copy.deepcopy(leaf_pdg)
128 levels = copy.deepcopy(levels)
129 pdg = copy.deepcopy(pdg)
130
131 # Below is deciding what to save to the LCA matrix
132 # If primary or secondary with no daughters:
133 # save
134 # If primary with no primary daughters:
135 # save and continue down daughters
136 # else:
137 # continue down daughters
138
139 # Check if we're a primary particle with no primary daughters
140 prim_no_prim_daughters = (
141 len(
142 [d for d in get_object_list(particle.getDaughters()) if d.isPrimaryParticle()]
143 )
144 == 0
145 )
146
147 # If it's a leaf we save it
148 # Also check that it has no primary daughters
149 if (
150 # particle.isPrimaryParticle() and
151 (particle.getNDaughters() == 0)
152 or prim_no_prim_daughters # Even if saving secondaries we need to record primaries with no primary daughters
153 ):
154 # Won't save neutrinos or very low energy photons to LCA
155 if abs(particle.getPDG()) not in neutrino_pdgs and not (
156 particle.getPDG() == 22 and particle.getEnergy() < 1e-4
157 ):
158 # Leaves get their history added
159 leaf_hist[particle.getArrayIndex()] = hist
160 leaf_pdg[particle.getArrayIndex()] = particle.getPDG()
161
162 # And now that we have a full history down to the leaf
163 # we can update the levels
164 levels = _update_levels(levels, hist, pdg)
165
166 # Here is deciding whether to continue traversing down the decay tree
167 # Don't want to do this if all daughters are secondaries since we're not saving them
168 if (
169 particle.getNDaughters() != 0
170 ) and not prim_no_prim_daughters: # Don't travers if all daughters are secondaries
171 # Only append primaries to history
172 if particle.isPrimaryParticle():
173 hist.append(particle.getArrayIndex())
174 # Save all PDG values of the in between primary particles
175 pdg[particle.getArrayIndex()] = particle.getPDG()
176
177 # Now iterate over daughters passing history down
178 daughters = get_object_list(particle.getDaughters())
179 for daughter in daughters:
180 (
181 leaf_hist,
182 levels,
183 pdg,
184 leaf_pdg,
185 semilep_flag,
186 ) = write_hist(
187 daughter,
188 leaf_hist,
189 levels,
190 hist,
191 pdg,
192 leaf_pdg,
193 semilep_flag,
194 )
195
196 return (
197 leaf_hist,
198 levels,
199 pdg,
200 leaf_pdg,
201 semilep_flag,
202 )
203
204
205class LCASaverModule(b2.Module):
206 """
207 Save Lowest Common Ancestor matrix of each MC Particle in the given list.
208
209 Args:
210 particle_lists (list): Name of particle lists to save features of.
211 features (list): List of features to save for each particle.
212 mcparticle_list (str): Name of particle list to build LCAs from (will use as root).
213 output_file (str): Path to output file to save.
214 """
215
217 self,
218 particle_lists,
219 features,
220 mcparticle_list,
221 output_file,
222 ):
223 """
224 Initialization.
225 """
226 super().__init__()
227
228 self.particle_lists = particle_lists
229
230 self.features = features
231
232 self.mcparticle_list = mcparticle_list
233
234 self.output_file = output_file
235
236
239 self.max_particles = 500
240
241 def initialize(self):
242 """
243 Called once at the beginning.
244 """
245 from ROOT import Belle2, TFile, TTree
246
247
248 self.eventinfo = Belle2.PyStoreObj("EventMetaData")
249
250
251 self.root_outfile = TFile(self.output_file, "recreate")
252
253 self.tree = TTree("Tree", "tree")
254
255
256 self.event_num = np.zeros(1, dtype=np.int32)
257 self.tree.Branch("event", self.event_num, "event/I")
258
259 self.isB = np.zeros(1, dtype=bool)
260 self.tree.Branch("isB", self.isB, "isB/b")
261
262
263 self.truth_dict = {}
264
265 # We assume at most two LCA matrices for event
266 for i in [1, 2]:
267 self.truth_dict[f"n_LCA_leaves_{i}"] = np.zeros(1, dtype=np.int32)
268 self.truth_dict[f"LCA_leaves_{i}"] = np.zeros(
269 self.max_particles, dtype=np.int32
270 )
271 self.tree.Branch(
272 f"n_LCA_leaves_{i}",
273 self.truth_dict[f"n_LCA_leaves_{i}"],
274 f"n_LCA_leaves_{i}/I",
275 )
276 self.tree.Branch(
277 f"LCA_leaves_{i}",
278 self.truth_dict[f"LCA_leaves_{i}"],
279 f"LCA_leaves_{i}[n_LCA_leaves_{i}]/I",
280 )
281
282 self.truth_dict[f"n_LCA_{i}"] = np.zeros(1, dtype=np.int32)
283 self.truth_dict[f"LCAS_{i}"] = np.zeros(
284 self.max_particles**2, dtype=np.uint8
285 )
286 self.tree.Branch(f"n_LCA_{i}", self.truth_dict[f"n_LCA_{i}"], f"n_LCA_{i}/I")
287 self.tree.Branch(
288 f"LCAS_{i}", self.truth_dict[f"LCAS_{i}"], f"LCAS_{i}[n_LCA_{i}]/b"
289 )
290
291 # Feature data
292
293 self.n_particles = np.zeros(1, dtype=np.int32)
294 self.tree.Branch("n_particles", self.n_particles, "n_particles/I")
295
296
297 self.primary = np.zeros(self.max_particles, dtype=bool)
298 self.tree.Branch("primary", self.primary, "primary[n_particles]/O")
299
300
301 self.leaves = np.zeros(self.max_particles, dtype=np.int32)
302 self.tree.Branch("leaves", self.leaves, "leaves[n_particles]/I")
303
304
305 self.b_index = np.zeros(self.max_particles, dtype=np.int32)
306 self.tree.Branch("b_index", self.b_index, "b_index[n_particles]/I")
307
308
309 self.feat_dict = {}
310 for feat in self.features:
311 self.feat_dict[feat] = np.zeros(self.max_particles, np.float32)
312 self.tree.Branch(
313 f"feat_{feat}", self.feat_dict[feat], f"feat_{feat}[n_particles]/F"
314 )
315
316
317 self.mc_pdg = np.zeros(self.max_particles, np.float32)
318 self.tree.Branch("mcPDG", self.mc_pdg, "mcPDG[n_particles]/F")
319
320 def _reset_LCA(self):
321 """
322 Resets the value of the LCA to 0.
323 """
324 for p_index in [1, 2]:
325 self.truth_dict[f"LCAS_{p_index}"][: self.max_particles**2] *= 0
326 self.truth_dict[f"n_LCA_leaves_{p_index}"][0] *= 0
327 self.truth_dict[f"LCA_leaves_{p_index}"][: self.max_particles] *= 0
328 self.truth_dict[f"n_LCA_{p_index}"][0] *= 0
329
330 def event(self):
331 """
332 Called for each event.
333 """
334 from ROOT import Belle2 # noqa: make Belle2 namespace available
335 from ROOT.Belle2 import PyStoreObj
336
337 # Get the particle list (note this is a regular Particle list, not MCParticle)
338 p_list = PyStoreObj(self.mcparticle_list)
339
340 self.event_num[0] = self.eventinfo.getEvent()
341
342 # Is Upsilon flag
343 if self.mcparticle_list == "Upsilon(4S):MC":
344 self.isB[0] = 0
345 elif self.mcparticle_list == "B0:MC":
346 self.isB[0] = 1
347 elif self.mcparticle_list == "B+:MC":
348 self.isB[0] = 2
349
350 # Create the LCA
351 # IMPORTANT: The ArrayIndex is 0-based.
352 # mcplist contains the root particles we are to create LCAs from
353 # Reset the LCA list so if only one B is there it does not carry an older version over
354 self._reset_LCA()
355
356 if p_list.getListSize() > 0:
357 for part in p_list.obj():
358 # Get the corresponding MCParticle
359 mcp = part.getMCParticle()
360 # In this way the LCA variables in the ntuples will be labelled _1 and _2
361 # If we train on B decays these will correspond to the two B's
362 # while if we train on the Upsilon, _1 will correspond to it and _2 will remain empty
363 # because getArrayIndex() gives 0 for the Upsilon and 1, 2 for the B's
364 array_index = 1 if self.isB[0] == 0 else mcp.getArrayIndex()
365
366 # Call function to write history of leaves in the tree.
367 # It internally calls function update_levels to find and save the level of each particle in the tree.
368 # Necessary step to build the LCA.
369 (
370 lcas_leaf_hist,
371 lcas_levels,
372 _, _, _,
373 ) = write_hist(
374 particle=mcp,
375 leaf_hist={},
376 levels={},
377 hist=[],
378 pdg={},
379 leaf_pdg={},
380 semilep_flag=False,
381 )
382
383 lcas = np.zeros([len(lcas_leaf_hist), len(lcas_leaf_hist)])
384
385 for x, y in combinations(enumerate(lcas_leaf_hist), 2):
386 lcas_intersection = [
387 i for i in lcas_leaf_hist[x[1]] if i in lcas_leaf_hist[y[1]]
388 ]
389 lcas[x[0], y[0]] = lcas_levels[lcas_intersection[-1]]
390 lcas[y[0], x[0]] = lcas_levels[lcas_intersection[-1]]
391
392 self.truth_dict[f"LCAS_{array_index}"][: lcas.size] = lcas.flatten()
393
394 self.truth_dict[f"n_LCA_leaves_{array_index}"][0] = len(lcas_leaf_hist.keys())
395 self.truth_dict[f"LCA_leaves_{array_index}"][
396 : len(lcas_leaf_hist.keys())
397 ] = list(lcas_leaf_hist.keys())
398
399 self.truth_dict[f"n_LCA_{array_index}"][0] = lcas.size
400
401 # ### Create the features
402 # Where we'll append features
403 evt_feats = {f: [] for f in self.features}
404
405 # Ideally this would be saved in evt_feat_dict but the -1 for unmatched
406 # particles messes that up
407 evt_leaf_dict = {
408 "leaves": [],
409 "primary": [],
410 "b_index": [],
411 "mc_pdg": [],
412 }
413
414 # IMPORTANT: The ArrayIndex is 0-based.
415 # mcplist contains the root particles we are to create LCAs from
416 for p_list_name in self.particle_lists:
417 # Get the particle list (note this is a regular Particle list, not MCParticle)
418 p_list = PyStoreObj(p_list_name)
419
420 for particle in p_list.obj():
421 # Get the B parent index, set to -1 if particle has no MC match
422 b_index = int(vm.evaluate("ancestorBIndex", particle))
423 # Need this to reorder LCA later, returns -1 if no MC match
424 if b_index >= 0:
425 p_index = particle.getMCParticle().getArrayIndex()
426 p_primary = particle.getMCParticle().isPrimaryParticle()
427 # Save particle's PDG code
428 mc_pdg = particle.getMCParticle().getPDG()
429 else:
430 p_index = -1
431 p_primary = False
432 mc_pdg = 0
433
434 evt_leaf_dict["primary"].append(p_primary)
435 evt_leaf_dict["leaves"].append(p_index)
436 evt_leaf_dict["b_index"].append(b_index)
437 evt_leaf_dict["mc_pdg"].append(mc_pdg)
438 for feat in self.features:
439 evt_feats[feat].append(vm.evaluate(feat, particle))
440
441 n_particles = len(evt_leaf_dict["primary"])
442 self.n_particles[0] = n_particles
443
444 self.primary[:n_particles] = evt_leaf_dict["primary"]
445 self.leaves[:n_particles] = evt_leaf_dict["leaves"]
446 self.b_index[:n_particles] = evt_leaf_dict["b_index"]
447 self.mc_pdg[:n_particles] = evt_leaf_dict["mc_pdg"]
448
449 for feat in self.features:
450 self.feat_dict[feat][:n_particles] = evt_feats[feat]
451 # Add number of final state particles
452
453 # Write everything to the TTree
454 self.tree.Fill()
455
456 def terminate(self):
457 """
458 Called at the end.
459 """
460 self.root_outfile.Write()
461 self.root_outfile.Close()
ParticleList is a container class that stores a collection of Particle objects.
a (simplified) python wrapper for StoreObjPtr.
Definition PyStoreObj.h:67
primary
Particle-is-primary flag.
int max_particles
Max number of particles It doesn't actually matter what this is as long as it's bigger than the numbe...
isB
bool containing information whether we reconstruct B or Upsilon
str mcparticle_list
MC particle list (Upsilon, B0, B+)
__init__(self, particle_lists, features, mcparticle_list, output_file)
n_particles
Number of particles.
particle_lists
Input particle lists.
dict truth_dict
Truth information.
dict feat_dict
Features dictionary.