Belle II Software development
mc_side_module.py
1
8
9import ROOT
10from ROOT import Belle2
11
12import numpy as np
13
14import tracking.validation.utilities as utilities
15
16import tracking.harvest.refiners as refiners
17import tracking.harvest.harvesting as harvesting
18import tracking.harvest.peelers as peelers
19ROOT.gSystem.Load("libtracking")
20
21
23 """Module to collect matching information about the found particles and to generate
24 validation plots and figures of merit on the performance of track finding."""
25
26 """ Expert level behavior:
27 expert_level = default_expert_level: all figures and plots from this module except tree entries
28 expert_level > default_expert_level: everything including tree entries
29 expert_level <= default_expert_level//2: only basic figures
30 default_expert_level//2 < expert_level < default_expert_level: basic figures and basic tree entries
31 """
32
33 default_expert_level = 10
34
36 self,
37 name,
38 contact,
39 output_file_name=None,
40 reco_tracks_name='RecoTracks',
41 mc_reco_tracks_name='MCRecoTracks',
42 expert_level=None):
43 """Constructor"""
44
45 output_file_name = output_file_name or name + 'TrackingValidation.root'
46
47 super().__init__(foreach=mc_reco_tracks_name,
48 name=name,
49 output_file_name=output_file_name,
50 contact=contact,
51 expert_level=expert_level)
52
53 ## Name of the StoreArray of the tracks from pattern recognition
54 self.reco_tracks_name = reco_tracks_name
55
56 ## Name of the StoreArray of the ideal mc tracks
57 self.mc_reco_tracks_name = mc_reco_tracks_name
58
59 ## Reference to the track match lookup object reading the relation information constructed by the MCMatcherTracksModule
60 self.track_match_look_up = None
61
62 if self.expert_level >= self.default_expert_level:
63 ## Set of all detector and hits ids contained in any pr track. Updated each event.
64 self.found_det_hit_ids = set()
65
66 ## Set of all detector and hits ids contained in matched pr tracks. Updated each event.
67 self.matched_det_hit_ids = set()
68
69 ## Set of all detector and hits ids contained in clone pr tracks. Updated each event.
70 self.clone_det_hit_ids = set()
71
72 ## Set of all detector and hits ids contained in background and ghost pr tracks. Updated each event.
73 self.fake_det_hit_ids = set()
74
75 def initialize(self):
76 """Initialization signal at the start of the event processing"""
77 super().initialize()
78 self.track_match_look_up = Belle2.TrackMatchLookUp(self.mc_reco_tracks_name, self.reco_tracks_name)
79
80 def prepare(self):
81 """Collect some statistics about the pattern recognition tracks used for comparison to the MC tracks
82
83 Executed once at the start of each event.
84 """
85 super().prepare()
86 if self.expert_level >= self.default_expert_level:
87 reco_tracks = Belle2.PyStoreArray(self.reco_tracks_name)
88 track_match_look_up = self.track_match_look_up
89
90 found_det_hit_ids = set()
91 matched_det_hit_ids = set()
92 clone_det_hit_ids = set()
93 fake_det_hit_ids = set()
94
95 for reco_track in reco_tracks:
96 det_hit_ids = utilities.get_det_hit_ids(reco_track)
97
98 found_det_hit_ids |= det_hit_ids
99
100 if track_match_look_up.isAnyChargeMatchedPRRecoTrack(reco_track):
101 matched_det_hit_ids |= det_hit_ids
102
103 if track_match_look_up.isAnyChargeClonePRRecoTrack(reco_track):
104 clone_det_hit_ids |= det_hit_ids
105
106 if (track_match_look_up.isGhostPRRecoTrack(reco_track) or
107 track_match_look_up.isBackgroundPRRecoTrack(reco_track)):
108 fake_det_hit_ids |= det_hit_ids
109
110 self.found_det_hit_ids = found_det_hit_ids
111 self.matched_det_hit_ids = matched_det_hit_ids
112 self.clone_det_hit_ids = clone_det_hit_ids
113 self.fake_det_hit_ids = fake_det_hit_ids
114
115 def pick(self, mc_reco_track):
116 """Pick every MCRecoTrack"""
117 return True
118
119 def peel(self, mc_reco_track):
120 """Looks at the individual Monte Carlo tracks and store information about them"""
121 track_match_look_up = self.track_match_look_up
122
123 # Analyse from the Monte Carlo reference side
124 mc_reco_tracks = Belle2.PyStoreArray(self.foreach)
125 multiplicity = mc_reco_tracks.getEntries()
126
127 mc_particle = track_match_look_up.getRelatedMCParticle(mc_reco_track)
128 is_primary = bool(mc_particle.hasStatus(Belle2.MCParticle.c_PrimaryParticle))
129 mc_to_pr_match_info_crops = self.peel_mc_to_pr_match_info(mc_reco_track)
130 mc_store_array_crops = peelers.peel_store_array_info(mc_reco_track, key="mc_{part_name}")
131
132 crops = dict(is_primary=is_primary,
133 multiplicity=multiplicity,
134 **mc_to_pr_match_info_crops,
135 **mc_store_array_crops
136 )
137
138 if self.expert_level >= self.default_expert_level:
139 reco_track = track_match_look_up.getRelatedPRRecoTrack(mc_reco_track)
140 mc_particle_crops = peelers.peel_mc_particle(mc_particle)
141 hit_content_crops = peelers.peel_reco_track_hit_content(mc_reco_track)
142
143 # Custom peel function to get single detector hit purities
144 subdetector_hit_efficiency_crops = peelers.peel_subdetector_hit_efficiency(mc_reco_track, reco_track)
145
146 mc_hit_efficiencies_in_all_pr_tracks_crops = self.peel_hit_efficiencies_in_all_pr_tracks(mc_reco_track)
147
148 # Event Info
149 event_meta_data = Belle2.PyStoreObj("EventMetaData")
150 event_crops = peelers.peel_event_info(event_meta_data)
151
152 # Store Array for easier joining
153 store_array_crops = peelers.peel_store_array_info(reco_track, key="pr_{part_name}")
154
155 # Information on PR reco track
156 pr_purity_information = {
157 "pr_hit_purity": track_match_look_up.getRelatedPurity(reco_track) if reco_track else float("nan"),
158 **peelers.peel_subdetector_hit_purity(reco_track=reco_track, mc_reco_track=mc_reco_track,
159 key="pr_{part_name}")
160 }
161
162 # Information on TrackFinders
163 trackfinder_crops = peelers.peel_trackfinder(reco_track)
164
165 # Basic peel function to get Quality Indicators
166 qualityindicator_crops = peelers.peel_quality_indicators(reco_track)
167
168 crops.update(dict(**hit_content_crops,
169 **mc_particle_crops,
170 **subdetector_hit_efficiency_crops,
171 **mc_hit_efficiencies_in_all_pr_tracks_crops,
172 **event_crops,
173 **store_array_crops,
174 **pr_purity_information,
175 **trackfinder_crops,
176 **qualityindicator_crops
177 ))
178
179 return crops
180
181 def peel_mc_to_pr_match_info(self, mc_reco_track):
182 """Extracts track-match information from the MCMatcherTracksModule results"""
183 track_match_look_up = self.track_match_look_up
184 return dict(
185 is_matched=track_match_look_up.isAnyChargeMatchedMCRecoTrack(mc_reco_track),
186 is_matched_correct_charge=track_match_look_up.isCorrectChargeMatchedMCRecoTrack(mc_reco_track),
187 is_matched_wrong_charge=track_match_look_up.isWrongChargeMatchedMCRecoTrack(mc_reco_track),
188 is_merged=track_match_look_up.isAnyChargeMergedMCRecoTrack(mc_reco_track),
189 is_merged_correct_charge=track_match_look_up.isCorrectChargeMergedMCRecoTrack(mc_reco_track),
190 is_merged_wrong_charge=track_match_look_up.isWrongChargeMergedMCRecoTrack(mc_reco_track),
191 is_missing=track_match_look_up.isMissingMCRecoTrack(mc_reco_track),
192 hit_efficiency=track_match_look_up.getRelatedEfficiency(mc_reco_track),
193 )
194
195 def peel_hit_efficiencies_in_all_pr_tracks(self, mc_reco_track):
196 """Extracts hit efficiencies"""
197 mc_det_hit_ids = utilities.get_det_hit_ids(mc_reco_track)
198
199 hit_efficiency_in_all_found = utilities.calc_hit_efficiency(self.found_det_hit_ids,
200 mc_det_hit_ids)
201
202 unfound_hit_efficiency = 1.0 - hit_efficiency_in_all_found
203
204 hit_efficiency_in_all_matched = utilities.calc_hit_efficiency(self.matched_det_hit_ids,
205 mc_det_hit_ids)
206
207 hit_efficiency_in_all_fake = utilities.calc_hit_efficiency(self.fake_det_hit_ids,
208 mc_det_hit_ids)
209
210 hit_efficiency_crops = dict(
211 hit_efficiency_in_all_found=hit_efficiency_in_all_found,
212 unfound_hit_efficiency=unfound_hit_efficiency,
213 hit_efficiency_in_all_matched=hit_efficiency_in_all_matched,
214 hit_efficiency_in_all_fake=hit_efficiency_in_all_fake,
215 )
216 return hit_efficiency_crops
217
218 # Refiners to be executed on terminate #
219 # #################################### #
220
221 ## Save a tree of all collected variables in a sub folder
222 save_tree = refiners.save_tree(
223 # using cond to suppress false doxygen warnings
224 ## \cond
225 name="mc_tree",
226 folder_name="mc_tree",
227 above_expert_level=default_expert_level
228 ## \endcond
229 )
230
231 ## Save a basic tree
232 save_tree_basic = refiners.save_tree(
233 # using cond to suppress false doxygen warnings
234 ## \cond
235 name="mc_tree",
236 folder_name="mc_tree",
237 above_expert_level=default_expert_level // 2,
238 below_expert_level=default_expert_level
239 ## \endcond
240 )
241
242 ## Generate the average finding efficiencies and hit efficiencies
243 save_overview_figures_of_merit = refiners.save_fom(
244 ## \cond
245 name="{module.id}_overview_figures_of_merit",
246 title="Overview figures in {module.title}",
247 aggregation=np.nanmean,
248 key="{part_name}",
249 select={"is_matched": "finding efficiency", "hit_efficiency": "hit efficiency", },
250 filter_on="is_primary",
251 description="""
252finding efficiency - the ratio of matched primary Monte Carlo tracks to all Monte Carlo tracks
253hit efficiency - the ratio of hits picked up by the matched pattern recognition track of primary Monte Carlo tracks
254"""
255 ## \endcond
256 )
257
258 # Default refiners that can be disabled with a lower expert_level
259 # #################################### #
260
261 ## Save a histogram of the hit efficiency
262 save_hit_efficiency_histogram = refiners.save_histograms(
263 ## \cond
264 above_expert_level=default_expert_level - 1,
265 select={"hit_efficiency": "hit efficiency"},
266 filter_on="is_primary",
267 description="Not a serious plot yet.",
268 ## \endcond
269 )
270
271 ## Rename the efficiency-profile quantities so that they display nicely in ROOT TLatex
272 renaming_select_for_finding_efficiency_profiles = {
273 'is_matched': 'finding efficiency',
274 'd0_truth': 'd_{0}',
275 'pt_truth': 'p_{t}',
276 'multiplicity': 'multiplicity',
277 'phi0_truth': '#phi',
278 }
279
280 ## Make profile of finding efficiency
281 save_finding_efficiency_profiles = refiners.save_profiles(
282 ## \cond
283 above_expert_level=default_expert_level - 1,
284 select=renaming_select_for_finding_efficiency_profiles,
285 y='finding efficiency',
286 y_binary=True,
287 filter_on="is_primary",
288 outlier_z_score=5.0,
289 allow_discrete=True,
290 ## \endcond
291 )
292
293 ## Make profile of finding efficiency versus tan(lambda)
294 save_finding_efficiency_by_tan_lamba_profiles = refiners.save_profiles(
295 ## \cond
296 above_expert_level=default_expert_level - 1,
297 select={
298 'is_matched': 'finding efficiency',
299 'tan_lambda_truth': 'tan #lambda'
300 },
301 y='finding efficiency',
302 y_binary=True,
303 filter_on="is_primary",
304 outlier_z_score=5.0,
305 lower_bound=-1.73,
306 upper_bound=3.27,
307 ## \endcond
308 )
309
310 ## Make profiles of finding efficiency versus tan(lambda) grouped by transverse momentum
311 save_finding_efficiency_by_tan_lamba_in_pt_groups_profiles = refiners.save_profiles(
312 ## \cond
313 above_expert_level=default_expert_level - 1,
314 select={
315 'is_matched': 'finding efficiency',
316 'tan_lambda_truth': 'tan #lambda'
317 },
318 y='finding efficiency',
319 y_binary=True,
320 filter_on="is_primary",
321 groupby=[("pt_truth", [0.070, 0.250, 0.600])],
322 outlier_z_score=5.0,
323 lower_bound=-1.73,
324 upper_bound=3.27,
325 ## \endcond
326 )
327
328 # Make profiles of the hit efficiencies versus various fit parameters
329 ## Rename the hit-profile quantities so that they display nicely in ROOT TLatex
330 renaming_select_for_hit_efficiency_profiles = {
331 'hit_efficiency': 'hit efficiency',
332 'd0_truth': 'd_{0}',
333 'pt_truth': 'p_{t}',
334 'multiplicity': 'multiplicity',
335 'phi0_truth': '#phi',
336 }
337
338 ## Make profile of hit efficiency
339 save_hit_efficiency_profiles = refiners.save_profiles(
340 ## \cond
341 above_expert_level=default_expert_level - 1,
342 select=renaming_select_for_hit_efficiency_profiles,
343 y='hit efficiency',
344 y_binary=True,
345 filter_on="is_primary",
346 outlier_z_score=5.0,
347 allow_discrete=True,
348 ## \endcond
349 )
350
351 ## Make profile of finding efficiency versus tan(lambda)
352 save_hit_efficiency_by_tan_lambda_profiles = refiners.save_profiles(
353 ## \cond
354 above_expert_level=default_expert_level - 1,
355 select={
356 'hit_efficiency': 'hit efficiency',
357 'tan_lambda_truth': 'tan #lambda',
358 },
359 y='hit efficiency',
360 y_binary=True,
361 filter_on="is_primary",
362 outlier_z_score=5.0,
363 lower_bound=-1.73,
364 upper_bound=3.27,
365 ## \endcond
366 )
367
368 ## Charge dependent histograms
369 ## Make profile of finding efficiency versus pt, tan(lambda)
370 save_finding_efficiency_by_pt_profiles_groupbyCharge = refiners.save_profiles(
371 ## \cond
372 above_expert_level=default_expert_level - 1,
373 select={
374 'is_matched': 'finding efficiency',
375 'pt_truth': 'p_{t}',
376 },
377 y='finding efficiency',
378 y_binary=True,
379 filter_on="is_primary",
380 groupby=[("charge_truth", [0.])],
381 outlier_z_score=5.0,
382 allow_discrete=True,
383 ## \endcond
384 )
385
386 ## Charge dependent histograms
387 ## Make profile of finding efficiency versus pt, tan(lambda)
388 save_finding_efficiency_by_tan_lambda_profiles_groupbyCharge = refiners.save_profiles(
389 ## \cond
390 above_expert_level=default_expert_level - 1,
391 select={
392 'is_matched': 'finding efficiency',
393 'tan_lambda_truth': 'tan #lambda'
394 },
395 y='finding efficiency',
396 y_binary=True,
397 filter_on="is_primary",
398 groupby=[("charge_truth", [0.])],
399 outlier_z_score=5.0,
400 lower_bound=-1.73,
401 upper_bound=3.27,
402 ## \endcond
403 )
404
405 ## Charge dependent histograms
406 ## Make profile of finding efficiency versus pt, tan(lambda)
407 save_hit_efficiency_by_pt_profiles_groupbyCharge = refiners.save_profiles(
408 ## \cond
409 above_expert_level=default_expert_level - 1,
410 select={
411 'hit_efficiency': 'hit efficiency',
412 'pt_truth': 'p_{t}',
413 },
414 y='hit efficiency',
415 y_binary=True,
416 filter_on="is_primary",
417 groupby=[("charge_truth", [0.])],
418 outlier_z_score=5.0,
419 allow_discrete=True,
420 ## \endcond
421 )
422
423 ## Charge dependent histograms
424 ## Make profile of finding efficiency versus pt, tan(lambda)
425 save_hit_efficiency_by_tan_lambda_profiles_groupbyCharge = refiners.save_profiles(
426 ## \cond
427 above_expert_level=default_expert_level - 1,
428 select={
429 'hit_efficiency': 'hit efficiency',
430 'tan_lambda_truth': 'tan #lambda',
431 },
432 y='hit efficiency',
433 y_binary=True,
434 filter_on="is_primary",
435 groupby=[("charge_truth", [0.])],
436 outlier_z_score=5.0,
437 lower_bound=-1.73,
438 upper_bound=3.27,
439 ## \endcond
440 )
441
442 ## This creates a histogram for all MC track displaying the ratio of hits contained in any PR track
443 ## Hence this is distinctly larger than the hit efficiency to the matched PR track
444 ## Usefulness under discussion
445 save_hit_efficiency_in_all_found_hist = refiners.save_histograms(
446 ## \cond
447 above_expert_level=default_expert_level - 1,
448 # renaming quantity to name that is more suitable for display
449 select=dict(hit_efficiency_in_all_found="total hit efficiency vs. all reconstructed tracks")
450 ## \endcond
451 )
452
453 ## This creates a histogram for *each missing* MC track displaying the ratio of hits contained in any PR tracks
454 ## High values in this hit efficiencies means that the MC track is consumed by other PR tracks but no proper
455 ## match could be established.
456 save_missing_mc_tracks_hit_efficiency_in_all_found_hist = refiners.save_histograms(
457 ## \cond
458 above_expert_level=default_expert_level - 1,
459 filter_on="is_missing", # show only the efficiencies of missing mc tracks
460 # renaming quantity to name that is more suitable for display
461 select=dict(hit_efficiency_in_all_found="total hit efficiency in all reconstructed tracks for missing mc tracks")
462 ## \endcond
463 )
__init__(self, name, contact, output_file_name=None, reco_tracks_name='RecoTracks', mc_reco_tracks_name='MCRecoTracks', expert_level=None)