Belle II Software  release-05-01-25
skimRetentionCheck.py
1 # !/usr/bin/env python3
2 # -*- coding: utf-8 -*-
3 
4 """\
5 Provides class for tracking retention rate of each cut in a skim.
6 """
7 
8 __author__ = "Cyrille Praz, Slavomira Stefkova"
9 
10 import os
11 import matplotlib.pyplot as plt
12 
13 from ROOT import Belle2
14 
15 import basf2 as b2
16 
17 
18 class RetentionCheck(b2.Module):
19  """Check the retention rate and the number of candidates for a given set of particle lists.
20 
21  The module stores its results in the static variable "summary".
22 
23  To monitor the effect of every module of an initial path, this module should be added after
24  each module of the path. A function was written (`skimExpertFunctions.pathWithRetentionCheck`) to do it:
25 
26  >>> path = pathWithRetentionCheck(particle_lists, path)
27 
28  After the path processing, the result of the RetentionCheck can be printed with
29 
30  >>> RetentionCheck.print_results()
31 
32  or plotted with (check the corresponding documentation)
33 
34  >>> RetentionCheck.plot_retention(...)
35 
36  and the summary dictionary can be accessed through
37 
38  >>> RetentionCheck.summary
39 
40  Authors:
41 
42  Cyrille Praz, Slavomira Stefkova
43 
44  Parameters:
45 
46  module_name (str): name of the module after which the retention rate is measured
47  module_number (int): index of the module after which the retention rate is measured
48  particle_lists (list(str)): list of particle list names which will be tracked by the module
49  """
50 
51  summary = {} # static dictionary containing the results (retention rates, number of candidates, ...)
52  output_override = None # if the -o option is provided to basf2, this variable store the ouptut for the plotting
53 
54  def __init__(self, module_name='', module_number=0, particle_lists=[]):
55 
56  self.module_name = str(module_name)
57  self.module_number = int(module_number)
58 
59  self.candidate_count = {pl: 0 for pl in particle_lists}
60  self.event_with_candidate_count = {pl: 0 for pl in particle_lists}
61 
62  self.particle_lists = particle_lists
63 
64  self._key = "{:04}. {}".format(int(self.module_number), str(self.module_name))
65  type(self).summary[self._key] = {}
66 
67  if type(self).output_override is None:
68  type(self).output_override = Belle2.Environment.Instance().getOutputFileOverride()
69 
70  super().__init__()
71 
72  def event(self):
73 
74  for particle_list in self.particle_lists:
75 
76  pl = Belle2.PyStoreObj(Belle2.ParticleList.Class(), particle_list)
77 
78  if pl.isValid():
79 
80  self.candidate_count[particle_list] += pl.getListSize()
81 
82  if pl.getListSize() != 0:
83 
84  self.event_with_candidate_count[particle_list] += 1
85 
86  def terminate(self):
87 
88  N = Belle2.Environment.Instance().getNumberOfEvents()
89 
90  for particle_list in self.particle_lists:
91 
92  if N > 0:
93 
94  retention_rate = float(self.event_with_candidate_count[particle_list]) / N
95 
96  else:
97 
98  b2.B2WARNING("Belle2.Environment.Instance().getNumberOfEvents() gives 0 or less.")
99  retention_rate = 0
100 
101  type(self).summary[self._key][particle_list] = {"retention_rate": retention_rate,
102  "#candidates": self.candidate_count[particle_list],
103  "#evts_with_candidates": self.event_with_candidate_count[particle_list],
104  "total_#events": N}
105 
106  @classmethod
107  def print_results(cls):
108  """ Print the results, should be called after the path processing."""
109  summary_tables = {} # one summary table per particle list
110  table_headline = "{:<100}|{:>9}|{:>12}|{:>22}|{:>12}|\n"
111  table_line = "{:<100}|{:>9.3f}|{:>12}|{:>22}|{:>12}|\n"
112 
113  atLeastOneEntry = {} # check if there is at least one non-zero retention for a given particle list
114 
115  for module, module_results in cls.summary.items():
116 
117  for particle_list, list_results in module_results.items():
118 
119  if particle_list not in summary_tables.keys():
120 
121  atLeastOneEntry[particle_list] = False
122 
123  summary_tables[particle_list] = table_headline.format(
124  "Module", "Retention", "# Candidates", "# Evts with candidates", "Total # evts")
125  summary_tables[particle_list] += "=" * 160 + "\n"
126 
127  else:
128 
129  if list_results["retention_rate"] > 0 or atLeastOneEntry[particle_list]:
130 
131  atLeastOneEntry[particle_list] = True
132  if len(module) > 100: # module name tool long
133  module = module[:96] + "..."
134  summary_tables[particle_list] += table_line.format(module, *list_results.values())
135 
136  for particle_list, summary_table in summary_tables.items():
137  b2.B2INFO("\n" + "=" * 160 + "\n" +
138  "Results of the modules RetentionCheck for the list " + particle_list + ".\n" +
139  "=" * 160 + "\n" +
140  "Note: the module RetentionCheck is defined in skim/scripts/skimExpertFunctions.py\n" +
141  "=" * 160 + "\n" +
142  summary_table +
143  "=" * 160 + "\n" +
144  "End of the results of the modules RetentionCheck for the list " + particle_list + ".\n" +
145  "=" * 160 + "\n"
146  )
147 
148  @classmethod
149  def plot_retention(cls, particle_list, plot_title="", save_as=None, module_name_max_length=80):
150  """ Plot the result of the RetentionCheck for a given particle list.
151 
152  Example of use (to be put after process(path)):
153 
154  >>> RetentionCheck.plot_retention('B+:semileptonic','skim:feiSLBplus','retention_plots/plot.pdf')
155 
156  Parameters:
157 
158  particle_list (str): particle list name
159  title (str): plot title (overwritten by the -o argument in basf2)
160  save_as (str): output filename (overwritten by the -o argument in basf2)
161  module_name_max_length (int): if the module name length is higher than this value, do not display the full name
162  """
163  module_name = []
164  retention = []
165 
166  at_least_one_entry = False
167  for module, results in cls.summary.items():
168 
169  if particle_list not in results.keys():
170  b2.B2WARNING(particle_list + " is not present in the results of the RetentionCheck for the module {}."
171  .format(module))
172  return
173 
174  if results[particle_list]['retention_rate'] > 0 or at_least_one_entry:
175  at_least_one_entry = True
176  if len(module) > module_name_max_length and module_name_max_length > 3: # module name tool long
177  module = module[:module_name_max_length - 3] + "..."
178  module_name.append(module)
179  retention.append(100 * (results[particle_list]['retention_rate']))
180 
181  if not at_least_one_entry:
182  b2.B2WARNING(particle_list + " seems to have a zero retention rate when created (if created).")
183  return
184 
185  plt.figure()
186  bars = plt.barh(module_name, retention, label=particle_list, color=(0.67, 0.15, 0.31, 0.6))
187 
188  for bar in bars:
189  yval = bar.get_width()
190  plt.text(0.5, bar.get_y() + bar.get_height() / 2.0 + 0.1, str(round(yval, 3)))
191 
192  plt.gca().invert_yaxis()
193  plt.xticks(rotation=45)
194  plt.xlim(0, 100)
195  plt.axvline(x=10.0, linewidth=1, linestyle="--", color='k', alpha=0.5)
196  plt.xlabel('Retention Rate [%]')
197  plt.legend(loc='lower right')
198 
199  if save_as or cls.output_override:
200  if cls.output_override:
201  plot_title = (cls.output_override).split(".")[0]
202  save_as = plot_title + '.pdf'
203  if '/' in save_as:
204  os.makedirs(os.path.dirname(save_as), exist_ok=True)
205  plt.title(plot_title)
206  plt.savefig(save_as, bbox_inches="tight")
207  b2.B2RESULT("Retention rate results for list {} saved in {}."
208  .format(particle_list, os.getcwd() + "/" + save_as))
209 
210 
211 def pathWithRetentionCheck(particle_lists, path):
212  """ Return a new path with the module RetentionCheck inserted between each module of a given path.
213 
214  This allows for checking how the retention rate is modified by each module of the path.
215 
216  Example of use (to be put just before process(path)):
217 
218  >>> path = pathWithRetentionCheck(['B+:semileptonic'], path)
219 
220  Warning: pathWithRetentionCheck(['B+:semileptonic'], path) does not modify path,
221  it only returns a new one.
222 
223  After the path processing, the result of the RetentionCheck can be printed with
224 
225  >>> RetentionCheck.print_results()
226 
227  or plotted with (check the corresponding documentation)
228 
229  >>> RetentionCheck.plot_retention(...)
230 
231  and the summary dictionary can be accessed through
232 
233  >>> RetentionCheck.summary
234 
235  Parameters:
236 
237  particle_lists (list(str)): list of particle list names which will be tracked by RetentionCheck
238  path (basf2.Path): initial path (it is not modified, see warning above and example of use)
239  """
240  new_path = b2.Path()
241  for module_number, module in enumerate(path.modules()):
242  new_path.add_module(module)
243  if 'ParticleSelector' in module.name():
244  name = module.name() + '(' + module.available_params()[0].values + ')' # get the cut string
245  else:
246  name = module.name()
247  new_path.add_module(RetentionCheck(name, module_number, particle_lists))
248  return new_path
skimRetentionCheck.RetentionCheck.output_override
output_override
Definition: skimRetentionCheck.py:52
skimRetentionCheck.RetentionCheck.module_name
module_name
Definition: skimRetentionCheck.py:56
Belle2::PyStoreObj
a (simplified) python wrapper for StoreObjPtr.
Definition: PyStoreObj.h:69
skimRetentionCheck.RetentionCheck.module_number
module_number
Definition: skimRetentionCheck.py:57
skimRetentionCheck.RetentionCheck.candidate_count
candidate_count
Definition: skimRetentionCheck.py:59
skimRetentionCheck.RetentionCheck.event_with_candidate_count
event_with_candidate_count
Definition: skimRetentionCheck.py:60
skimRetentionCheck.RetentionCheck.print_results
def print_results(cls)
Definition: skimRetentionCheck.py:107
skimRetentionCheck.RetentionCheck._key
_key
Definition: skimRetentionCheck.py:64
skimRetentionCheck.RetentionCheck.summary
dictionary summary
Definition: skimRetentionCheck.py:51
skimRetentionCheck.RetentionCheck.plot_retention
def plot_retention(cls, particle_list, plot_title="", save_as=None, module_name_max_length=80)
Definition: skimRetentionCheck.py:149
skimRetentionCheck.RetentionCheck.particle_lists
particle_lists
Definition: skimRetentionCheck.py:62
skimRetentionCheck.RetentionCheck
Definition: skimRetentionCheck.py:18
Belle2::Environment::Instance
static Environment & Instance()
Static method to get a reference to the Environment instance.
Definition: Environment.cc:31