Belle II Software  release-08-01-10
hit_module.py
1 #!/usr/bin/env python3
2 # -*- coding: utf-8 -*-
3 
4 
11 
12 import collections
13 import numpy as np
14 
15 from tracking.validation.fom import (
16  ValidationFiguresOfMerit,
17  ValidationManyFiguresOfMerit
18 )
19 
20 from tracking.validation.module import (
21  AlwaysPassFilter,
22  TrackingValidationModule
23 )
24 
25 from tracking.validation.utilities import getObjectList
26 
27 import basf2
28 
29 import ROOT
30 
31 from ROOT import Belle2
32 
33 ROOT.gSystem.Load('libtracking')
34 # FIXME: define hash function for TrackCand to be able to add it to set. This is
35 # not really correct, it just checks for the address and normally if a==b also
36 # hash(a) == hash(b) is required
37 ROOT.genfit.TrackCand.__hash__ = lambda x: id(x)
38 
39 
41 
42  """Module to collect more matching information about the found particles and to generate validation
43  plots and figures of merit on the performance of track finding. This module gives information on the
44  number of hits etc. """
45 
46  def __init__(
47  self,
48  name,
49  contact,
50  fit=False,
51  pulls=False,
52  resolution=False,
53  output_file_name=None,
54  track_filter_object=AlwaysPassFilter(),
55  plot_name_postfix='',
56  plot_title_postfix='',
57  exclude_profile_mc_parameter='',
58  exclude_profile_pr_parameter='',
59  use_expert_folder=True,
60  trackCandidatesColumnName='RecoTracks',
61  mcTrackCandidatesColumnName='MCRecoTracks',
62  cdcHitsColumnName='CDCHits',
63  write_tables=False):
64  """Constructor"""
65 
66  TrackingValidationModule.__init__(
67  self,
68  name,
69  contact,
70  fit,
71  pulls,
72  resolution,
73  output_file_name,
74  track_filter_object,
75  plot_name_postfix,
76  plot_title_postfix,
77  exclude_profile_mc_parameter,
78  exclude_profile_pr_parameter,
79  use_expert_folder,
80  trackCandidatesColumnName,
81  mcTrackCandidatesColumnName)
82 
83 
84  self.cdcHitsColumnnamecdcHitsColumnname = cdcHitsColumnName
85 
86  self.write_tableswrite_tables = write_tables
87 
88  def initialize(self):
89  """Receive signal at the start of event processing"""
90  TrackingValidationModule.initialize(self)
91 
92  # Use deques in favour of lists to prevent repeated memory allocation of cost O(n)
93 
94  self.number_of_total_hitsnumber_of_total_hits = collections.deque()
95 
96  self.number_of_mc_hitsnumber_of_mc_hits = collections.deque()
97 
98  self.number_of_pr_hitsnumber_of_pr_hits = collections.deque()
99 
100  self.is_hit_foundis_hit_found = collections.deque()
101 
102  self.is_hit_matchedis_hit_matched = collections.deque()
103 
104  # MC information
105 
106  self.mc_missingmc_missing = collections.deque()
107 
108  self.ratio_hits_in_mc_tracks_and_not_in_pr_tracksratio_hits_in_mc_tracks_and_not_in_pr_tracks = collections.deque()
109 
110  self.ratio_hits_in_mc_tracks_and_in_pr_tracksratio_hits_in_mc_tracks_and_in_pr_tracks = collections.deque()
111 
112  self.ratio_hits_in_missing_mc_tracks_and_in_pr_tracksratio_hits_in_missing_mc_tracks_and_in_pr_tracks = collections.deque()
113 
114  self.ratio_hits_in_mc_tracks_and_in_fake_pr_tracksratio_hits_in_mc_tracks_and_in_fake_pr_tracks = \
115  collections.deque()
116 
117  self.ratio_hits_in_mc_tracks_and_in_good_pr_tracksratio_hits_in_mc_tracks_and_in_good_pr_tracks = \
118  collections.deque()
119 
120  self.mc_is_primarymc_is_primary = collections.deque()
121 
122  self.mc_number_of_hitsmc_number_of_hits = collections.deque()
123 
124  # PT information
125 
126  self.number_of_connected_tracksnumber_of_connected_tracks = collections.deque()
127 
128  self.number_of_wrong_hitsnumber_of_wrong_hits = collections.deque()
129  # It is calculated by going through all hits of the fake track and the connected mc track cands and counting the number.
130  # These numbers are than summed up and substracted by the biggest number
131  # of hits this candidates shares with the mc track cands.
132 
133  self.pr_number_of_hitspr_number_of_hits = collections.deque()
134 
135  self.pr_number_of_matched_hitspr_number_of_matched_hits = collections.deque()
136 
137  def event(self):
138  """Event method"""
139 
140  TrackingValidationModule.event(self)
141  self.examine_hits_in_eventexamine_hits_in_event()
142 
144  """Classify all of the hits in the event according to the parent track(s)"""
145 
146  trackCands = Belle2.PyStoreArray(self.trackCandidatesColumnNametrackCandidatesColumnName)
147  mcTrackCands = Belle2.PyStoreArray(self.mcTrackCandidatesColumnNamemcTrackCandidatesColumnName)
148  if self.cdcHitsColumnnamecdcHitsColumnname not in Belle2.PyStoreArray.list():
149  # No CDC hits available, hit analysis incomplete, don't perform
150  # hit analysis
151  return
152  cdcHits = Belle2.PyStoreArray(self.cdcHitsColumnnamecdcHitsColumnname)
153 
154  # # CDC Hits in MC tracks
155  totalHitListMC = []
156  for mcTrackCand in mcTrackCands:
157  cdcHitIDs = [cdcHit.getArrayIndex() for cdcHit in getObjectList(mcTrackCand.getCDCHitList())] # Checked
158  # Working around a bug in ROOT where you should not access empty std::vectors
159  if len(cdcHitIDs) == 0:
160  cdcHitIDs = set()
161  else:
162  cdcHitIDs = set(cdcHitIDs)
163  totalHitListMC.extend(cdcHitIDs)
164 
165  # Make the ids unique
166  totalHitListMC = set(totalHitListMC)
167 
168  # # CDC Hits in PR tracks
169  totalHitListPR = []
170  totalHitListPRGood = []
171  totalHitListPRClone = []
172  totalHitListPRFake = []
173  for trackCand in trackCands:
174  if trackCand.getNumberOfTotalHits() == 0:
175  basf2.B2WARNING("Encountered a pattern recognition track with no hits")
176  continue
177 
178  cdcHitIDs = [cdcHit.getArrayIndex() for cdcHit in getObjectList(trackCand.getCDCHitList())] # Checked
179  # Working around a bug in ROOT where you should not access empty std::vectors
180  if len(cdcHitIDs) == 0:
181  cdcHitIDs = set()
182  else:
183  cdcHitIDs = set(cdcHitIDs)
184 
185  totalHitListPR.extend(cdcHitIDs)
186  if self.trackMatchLookUptrackMatchLookUp.isAnyChargeMatchedPRRecoTrack(trackCand):
187  totalHitListPRGood.extend(cdcHitIDs)
188 
189  if self.trackMatchLookUptrackMatchLookUp.isAnyChargeClonePRRecoTrack(trackCand):
190  totalHitListPRClone.extend(cdcHitIDs)
191 
192  if (self.trackMatchLookUptrackMatchLookUp.isBackgroundPRRecoTrack(trackCand) or
193  self.trackMatchLookUptrackMatchLookUp.isGhostPRRecoTrack(trackCand)):
194  totalHitListPRFake.extend(cdcHitIDs)
195 
196  # Make the ids unique
197  totalHitListPR = set(totalHitListPR)
198  totalHitListPRGood = set(totalHitListPRGood)
199  totalHitListPRClone = set(totalHitListPRClone)
200  totalHitListPRFake = set(totalHitListPRFake)
201 
202  # # All CDC Hits
203  totalHitList = set([cdcHit.getArrayIndex() for cdcHit in cdcHits])
204 
205  number_of_mc_hits = len(totalHitListMC)
206  number_of_pr_hits = len(totalHitListPR)
207  number_of_all_hits = len(totalHitList)
208 
209  is_hit_matched = 0
210  is_hit_found = len(totalHitListMC & totalHitListPR)
211 
212  for trackCand in trackCands:
213 
214  is_matched = self.trackMatchLookUptrackMatchLookUp.isAnyChargeMatchedPRRecoTrack(trackCand)
215  is_clone = self.trackMatchLookUptrackMatchLookUp.isAnyChargeClonePRRecoTrack(trackCand)
216 
217  trackCandHits = [cdcHit.getArrayIndex() for cdcHit in getObjectList(trackCand.getCDCHitList())]
218  # Working around a bug in ROOT where you should not access empty std::vectors
219  if len(trackCandHits) == 0:
220  trackCandHits = set()
221  else:
222  trackCandHits = set(trackCandHits)
223 
224  # this is not very efficient...
225  list_of_connected_mc_tracks = set()
226  list_of_numbers_of_hits_for_connected_tracks = collections.deque()
227  # number_of_connected_tracks = 0
228  # number_of_wrong_hits = 0
229 
230  for mcTrackCand in mcTrackCands:
231  mcTrackCandHits = [cdcHit.getArrayIndex() for cdcHit in getObjectList(mcTrackCand.getCDCHitList())]
232  # Working around a bug in ROOT where you should not access empty std::vectors
233  if len(mcTrackCandHits) == 0:
234  mcTrackCandHits = set()
235  else:
236  mcTrackCandHits = set(mcTrackCandHits)
237 
238  length_of_intersection = len(mcTrackCandHits & trackCandHits)
239  if length_of_intersection > 0:
240  list_of_connected_mc_tracks.add(mcTrackCand)
241  list_of_numbers_of_hits_for_connected_tracks.append(length_of_intersection)
242 
243  if len(list_of_numbers_of_hits_for_connected_tracks) == 0:
244  self.number_of_wrong_hitsnumber_of_wrong_hits.append(0)
245  self.pr_number_of_matched_hitspr_number_of_matched_hits.append(0)
246  else:
247  maximum_intersection = \
248  max(list_of_numbers_of_hits_for_connected_tracks)
249  self.pr_number_of_matched_hitspr_number_of_matched_hits.append(sum(list_of_numbers_of_hits_for_connected_tracks))
250  self.number_of_wrong_hitsnumber_of_wrong_hits.append(sum(list_of_numbers_of_hits_for_connected_tracks) -
251  maximum_intersection)
252 
253  self.number_of_connected_tracksnumber_of_connected_tracks.append(len(list_of_connected_mc_tracks))
254 
255  if is_matched or is_clone:
256  mcTrackCand = \
257  self.trackMatchLookUptrackMatchLookUp.getRelatedMCRecoTrack(trackCand)
258  mcTrackCandHits = [cdcHit.getArrayIndex() for cdcHit in getObjectList(mcTrackCand.getCDCHitList())] # Checked
259  # Working around a bug in ROOT where you should not access empty std::vectors
260  if len(mcTrackCandHits) == 0:
261  mcTrackCandHits = set()
262  else:
263  mcTrackCandHits = set(mcTrackCandHits)
264 
265  is_hit_matched += len(trackCandHits & mcTrackCandHits)
266 
267  self.pr_number_of_hitspr_number_of_hits.append(len(trackCandHits))
268 
269  for mcTrackCand in mcTrackCands:
270  is_missing = \
271  self.trackMatchLookUptrackMatchLookUp.isMissingMCRecoTrack(mcTrackCand)
272 
273  mcTrackCandHits = [cdcHit.getArrayIndex() for cdcHit in getObjectList(mcTrackCand.getCDCHitList())] # Checked
274 
275  # Working around a bug in ROOT where you should not access empty std::vectors
276  if len(mcTrackCandHits) == 0:
277  continue
278  else:
279  mcTrackCandHits = set(mcTrackCandHits)
280 
281  ratio = 1.0 * len(mcTrackCandHits & totalHitListPR) / len(mcTrackCandHits)
282 
283  self.ratio_hits_in_mc_tracks_and_not_in_pr_tracksratio_hits_in_mc_tracks_and_not_in_pr_tracks.append(1.0 - ratio)
284  self.ratio_hits_in_mc_tracks_and_in_pr_tracksratio_hits_in_mc_tracks_and_in_pr_tracks.append(ratio)
285  if is_missing:
286  self.ratio_hits_in_missing_mc_tracks_and_in_pr_tracksratio_hits_in_missing_mc_tracks_and_in_pr_tracks.append(ratio)
287  self.ratio_hits_in_mc_tracks_and_in_good_pr_tracksratio_hits_in_mc_tracks_and_in_good_pr_tracks.append(
288  1.0 * len(mcTrackCandHits & totalHitListPRGood) / len(mcTrackCandHits))
289  self.ratio_hits_in_mc_tracks_and_in_fake_pr_tracksratio_hits_in_mc_tracks_and_in_fake_pr_tracks.append(
290  1.0 * len(mcTrackCandHits & totalHitListPRFake) / len(mcTrackCandHits))
291 
292  mcParticle = \
293  self.trackMatchLookUptrackMatchLookUp.getRelatedMCParticle(mcTrackCand)
294  is_primary = \
295  mcParticle.hasStatus(Belle2.MCParticle.c_PrimaryParticle)
296  self.mc_is_primarymc_is_primary.append(is_primary)
297  self.mc_number_of_hitsmc_number_of_hits.append(len(mcTrackCandHits))
298 
299  self.mc_missingmc_missing.append(is_missing)
300 
301  self.number_of_total_hitsnumber_of_total_hits.append(number_of_all_hits)
302  self.number_of_mc_hitsnumber_of_mc_hits.append(number_of_mc_hits)
303  self.number_of_pr_hitsnumber_of_pr_hits.append(number_of_pr_hits)
304 
305  self.is_hit_foundis_hit_found.append(is_hit_found)
306  self.is_hit_matchedis_hit_matched.append(is_hit_matched)
307 
308  def terminate(self):
309  """Receive signal at the end of event processing"""
310  TrackingValidationModule.terminate(self)
311 
312  output_tfile = ROOT.TFile(self.output_file_nameoutput_file_name, 'update')
313 
314  validation_plots = []
315 
316  # Hit ratios #
317 
318  all_tracks_plot = self.profiles_by_parameters_baseprofiles_by_parameters_base(
319  xs=self.ratio_hits_in_mc_tracks_and_in_pr_tracksratio_hits_in_mc_tracks_and_in_pr_tracks,
320  quantity_name="ratio of hits in MCTracks found by the track finder",
321  make_hist=True,
322  parameter_names=[],
323  profile_parameters={},
324  unit=None)
325 
326  validation_plots.extend(all_tracks_plot)
327 
328  missing_tracks_plot = self.profiles_by_parameters_baseprofiles_by_parameters_base(
329  xs=self.ratio_hits_in_missing_mc_tracks_and_in_pr_tracksratio_hits_in_missing_mc_tracks_and_in_pr_tracks,
330  quantity_name="ratio of hits in missing MCTracks found by the track finder",
331  make_hist=True,
332  parameter_names=[],
333  profile_parameters={},
334  unit=None)
335 
336  validation_plots.extend(missing_tracks_plot)
337 
338  for validation_plot in validation_plots:
339  validation_plot.write()
340 
341  if self.write_tableswrite_tables:
342  # MC Figures of merit
343  mc_figures_of_merit = \
344  ValidationManyFiguresOfMerit('%s_mc_figures_of_merit' % self.validation_namevalidation_name)
345 
346  mc_figures_of_merit['mc_pts'] = self.mc_ptsmc_pts
347  mc_figures_of_merit['mc_d0s'] = self.mc_d0smc_d0s
348  mc_figures_of_merit['mc_matches'] = self.mc_matchesmc_matches
349  mc_figures_of_merit['mc_hit_efficiencies'] = self.mc_hit_efficienciesmc_hit_efficiencies
350  mc_figures_of_merit['mc_multiplicities'] = self.mc_multiplicitiesmc_multiplicities
351  mc_figures_of_merit['mc_phis'] = self.mc_phimc_phi
352  mc_figures_of_merit['mc_tan_lambdas'] = self.mc_tan_lambdasmc_tan_lambdas
353  mc_figures_of_merit['mc_missing'] = self.mc_missingmc_missing
354  mc_figures_of_merit['mc_is_primary'] = self.mc_is_primarymc_is_primary
355  mc_figures_of_merit['mc_number_of_hits'] = self.mc_number_of_hitsmc_number_of_hits
356  mc_figures_of_merit['ratio_hits_in_mc_tracks_and_in_good_pr_tracks'] = \
357  self.ratio_hits_in_mc_tracks_and_in_good_pr_tracksratio_hits_in_mc_tracks_and_in_good_pr_tracks
358  mc_figures_of_merit['ratio_hits_in_mc_tracks_and_in_fake_pr_tracks'] = \
359  self.ratio_hits_in_mc_tracks_and_in_fake_pr_tracksratio_hits_in_mc_tracks_and_in_fake_pr_tracks
360  mc_figures_of_merit['ratio_hits_in_mc_tracks_and_not_in_pr_tracks'] = \
361  self.ratio_hits_in_mc_tracks_and_not_in_pr_tracksratio_hits_in_mc_tracks_and_not_in_pr_tracks
362 
363  mc_figures_of_merit.write()
364 
365  # PR Figures of merit
366  pr_figures_of_merit = \
367  ValidationManyFiguresOfMerit('%s_pr_figures_of_merit' % self.validation_namevalidation_name)
368 
369  pr_figures_of_merit['pr_clones_and_matches'] = \
370  self.pr_clones_and_matchespr_clones_and_matches
371  pr_figures_of_merit['pr_matches'] = self.pr_matchespr_matches
372  pr_figures_of_merit['pr_fakes'] = self.pr_fakespr_fakes
373  pr_figures_of_merit['pr_number_of_hits'] = self.pr_number_of_hitspr_number_of_hits
374  pr_figures_of_merit['pr_number_of_matched_hits'] = \
375  self.pr_number_of_matched_hitspr_number_of_matched_hits
376  pr_figures_of_merit['pr_seed_tan_lambdas'] = self.pr_seed_tan_lambdaspr_seed_tan_lambdas
377  pr_figures_of_merit['pr_seed_phi'] = self.pr_seed_phipr_seed_phi
378 
379  pr_figures_of_merit['number_of_connected_tracks'] = \
380  self.number_of_connected_tracksnumber_of_connected_tracks
381  pr_figures_of_merit['number_of_wrong_hits'] = self.number_of_wrong_hitsnumber_of_wrong_hits
382 
383  pr_figures_of_merit.write()
384 
385  # Hit Figures of merit
386  hit_figures_of_merit = \
387  ValidationFiguresOfMerit('%s_hit_figures_of_merit' % self.validation_namevalidation_name)
388 
389  hit_figures_of_merit['number_of_total_hits'] = \
390  np.sum(self.number_of_total_hitsnumber_of_total_hits)
391  hit_figures_of_merit['number_of_mc_hits'] = \
392  np.sum(self.number_of_mc_hitsnumber_of_mc_hits)
393  hit_figures_of_merit['number_of_pr_hits'] = \
394  np.sum(self.number_of_pr_hitsnumber_of_pr_hits)
395  hit_figures_of_merit['is_hit_found'] = np.sum(self.is_hit_foundis_hit_found)
396  hit_figures_of_merit['is_hit_matched'] = np.sum(self.is_hit_matchedis_hit_matched)
397 
398  print(hit_figures_of_merit)
399  hit_figures_of_merit.write()
400 
401  output_tfile.Close()
A (simplified) python wrapper for StoreArray.
Definition: PyStoreArray.h:72
static std::vector< std::string > list(DataStore::EDurability durability=DataStore::EDurability::c_Event)
Return list of available arrays for given durability.
Definition: PyStoreArray.cc:28
ratio_hits_in_mc_tracks_and_in_pr_tracks
list of fraction of number of hits in MC track and in PR track
Definition: hit_module.py:110
pr_number_of_hits
list of the number of pattern-reconstructed hits
Definition: hit_module.py:133
mc_is_primary
list of flags indicating that the MC track is [not] a primary MCParticle
Definition: hit_module.py:120
ratio_hits_in_missing_mc_tracks_and_in_pr_tracks
list of fraction of number of hits in missing MC track and in PR track
Definition: hit_module.py:112
ratio_hits_in_mc_tracks_and_in_good_pr_tracks
list of fraction of number of hits in MC track and in good PR track
Definition: hit_module.py:117
def __init__(self, name, contact, fit=False, pulls=False, resolution=False, output_file_name=None, track_filter_object=AlwaysPassFilter(), plot_name_postfix='', plot_title_postfix='', exclude_profile_mc_parameter='', exclude_profile_pr_parameter='', use_expert_folder=True, trackCandidatesColumnName='RecoTracks', mcTrackCandidatesColumnName='MCRecoTracks', cdcHitsColumnName='CDCHits', write_tables=False)
Definition: hit_module.py:63
mc_number_of_hits
list of the number of MCTrackCandHits on the MC track
Definition: hit_module.py:122
number_of_connected_tracks
This is the number of mcTrackCands sharing a hit with the track cand.
Definition: hit_module.py:126
ratio_hits_in_mc_tracks_and_not_in_pr_tracks
list of fraction of number of hits in MC track but not in PR track
Definition: hit_module.py:108
write_tables
cached value of the flag to write the validation figures of merit
Definition: hit_module.py:86
number_of_wrong_hits
This number gives information about the "badness" of the fake.
Definition: hit_module.py:128
number_of_pr_hits
number of hits on pattern reconstructed tracks
Definition: hit_module.py:98
cdcHitsColumnname
cached name of the CDCHits StoreArray
Definition: hit_module.py:84
pr_number_of_matched_hits
list of the number of pattern-reconstructed hits matched to MC track
Definition: hit_module.py:135
ratio_hits_in_mc_tracks_and_in_fake_pr_tracks
list of fraction of number of hits in MC track and in fake PR track
Definition: hit_module.py:114
is_hit_matched
list of flags for [not-]matched hits
Definition: hit_module.py:102
mc_missing
list of flags where MCRecoTrack is [not] missing MCTrackCand
Definition: hit_module.py:106
pr_seed_tan_lambdas
list of PR-track seed tan(lambda) values
Definition: module.py:196
trackCandidatesColumnName
cached name of the RecoTracks StoreArray
Definition: module.py:156
pr_seed_phi
list of PR-track seed phi values
Definition: module.py:198
output_file_name
cached value of the output ROOT TFile
Definition: module.py:141
trackMatchLookUp
Track-match object that examines relation information from MCMatcherTracksModule.
Definition: module.py:182
mc_multiplicities
list of MC-track multiplicities
Definition: module.py:255
pr_clones_and_matches
Use deques in favour of lists to prevent repeated memory allocation of cost O(n)
Definition: module.py:187
mcTrackCandidatesColumnName
cached name of the MCRecoTracks StoreArray
Definition: module.py:158
validation_name
cached value of the tracking-validation name
Definition: module.py:131
mc_tan_lambdas
list of MC-track tan(lambda) values
Definition: module.py:247
mc_hit_efficiencies
list of MC-track hit efficiencies
Definition: module.py:253
def profiles_by_parameters_base(self, xs, quantity_name, parameter_names, profile_parameters, unit, make_hist, weights=None, is_asymmetry=False)
Definition: module.py:792