Belle II Software  release-06-02-00
core.py
1 #!/usr/bin/env python3
2 # -*- coding: utf-8 -*-
3 
4 
11 
12 """
13 The core classes of the skim package are defined in ``skim.core``: ``BaseSkim`` and
14 ``CombinedSkim``.
15 
16 * ``BaseSkim`` is an abstract base class from which all skims inherit. It defines
17  template functions for a skim, and includes attributes describing the skim metadata.
18 
19 * ``CombinedSkim`` is a class for combining ``BaseSkim`` objects into a single steering
20  file.
21 """
22 
23 from abc import ABC, abstractmethod
24 import warnings
25 
26 import basf2 as b2
27 from modularAnalysis import applyCuts, summaryOfLists
28 from skim.registry import Registry
29 from skim.utils.flags import InitialiseSkimFlag, UpdateSkimFlag
30 from skim.utils.testfiles import get_test_file
31 from skim.utils.misc import _hashable_list
32 
33 
34 class BaseSkim(ABC):
35  """Base class for skims. Initialises a skim name, and creates template functions
36  required for each skim.
37 
38  See `writing-skims` for information on how to use this to define a new skim.
39  """
40 
41  NoisyModules = None
42  """List of module types to be silenced. This may be necessary in certain skims in
43  order to keep log file sizes small.
44 
45  .. tip::
46  The elements of this list should be the module *type*, which is not necessarily
47  the same as the module name. The module type can be inspected in Python via
48  ``module.type()``.
49 
50  .. seealso::
51  This attribute is used by `BaseSkim.set_skim_logging`.
52  """
53 
54  TestSampleProcess = "mixed"
55  """MC process of test file. `BaseSkim.TestFiles` passes this property to
56  `skim.utils.testfiles.get_test_file` to retrieve an appropriate file location.
57  Defaults to a :math:`B^{0}\\overline{B^{0}}` sample.
58  """
59 
60  MergeDataStructures = {}
61  """Dict of ``str -> function`` pairs to determine if any special data structures
62  should be merged when combining skims. Currently, this is only used to merge FEI
63  config parameters when running multiple FEI skims at once, so that it can be run
64  just once with all the necessary arguments."""
65 
66  ApplyHLTHadronCut = False
67  """If this property is set to True, then the HLT selection for ``hlt_hadron`` will
68  be applied to the skim lists when the skim is added to the path.
69  """
70 
71  produce_on_tau_samples = True
72  """If this property is set to False, then ``b2skim-prod`` will not produce data
73  production requests for this skim on taupair MC samples. This decision may be made
74  for one of two reasons:
75 
76  * The retention rate of the skim on taupair samples is basically zero, so there is
77  no point producing the skim for these samples.
78 
79  * The retention rate of the skim on taupair samples is too high (>20%), so the
80  production system may struggle to handle the jobs.
81  """
82 
83  produces_mdst_by_default = False
84  """Special property for combined systematics skims, which produce MDST output instead of
85  uDST. This property is used by ``b2skim-prod`` to set the ``DataLevel`` parameter in
86  the ``DataDescription`` block for this skim to ``mdst`` instead of ``udst``.
87  """
88 
89  validation_sample = None
90  """
91  MDST sample to use for validation histograms. Must be a valid location of a
92  validation dataset (see documentation for `basf2.find_file`).
93  """
94 
95  mc = True
96  """
97  Include Monte Carlo quantities in skim output.
98  """
99 
100  analysisGlobaltag = None
101  """
102  Analysis globaltag.
103  """
104 
105  @property
106  @abstractmethod
107  def __description__(self):
108  pass
109 
110  @property
111  @abstractmethod
112  def __category__(self):
113  pass
114 
115  @property
116  @abstractmethod
117  def __authors__(self):
118  pass
119 
120  @property
121  @abstractmethod
122  def __contact__(self):
123  pass
124 
125  @property
126  def code(self):
127  """Eight-digit code assigned to this skim in the registry."""
128  return Registry.encode_skim_name(self.namename)
129 
130  def __init__(
131  self,
132  *,
133  OutputFileName=None,
134  additionalDataDescription=None,
135  udstOutput=True,
136  validation=False,
137  mc=True,
138  analysisGlobaltag=None,
139  ):
140  """Initialise the BaseSkim class.
141 
142  Parameters:
143  OutputFileName (str): Name to give output uDST files. If none given, then
144  defaults to eight-number skim code.
145  additionalDataDescription (dict): additional data description to be added to the output file metadata.
146  udstOutput (bool): If True, add uDST output to the path.
147  validation (bool): If True, build lists and write validation histograms
148  instead of writing uDSTs.
149  mc (bool): If True, include MC quantities in output.
150  analysisGlobaltag (str): Analysis globaltag.
151  """
152  self.namename = self.__class__.__name__
153  self.OutputFileNameOutputFileName = OutputFileName
154  self.additionalDataDescriptionadditionalDataDescription = additionalDataDescription
155  self._udstOutput_udstOutput = udstOutput
156  self._validation_validation = validation
157  self.mcmcmc = mc
158  self.analysisGlobaltaganalysisGlobaltag = analysisGlobaltag
159 
160  if self.NoisyModulesNoisyModules is None:
161  self.NoisyModulesNoisyModules = []
162 
163  def load_standard_lists(self, path):
164  """
165  Load any standard lists. This code will be run before any
166  `BaseSkim.additional_setup` and `BaseSkim.build_lists`.
167 
168  Note:
169  This is separated into its own function so that when skims are combined, any
170  standard lists used by two skims can be loaded just once.
171 
172  Parameters:
173  path (basf2.Path): Skim path to be processed.
174  """
175 
176  def additional_setup(self, path):
177  """
178  Perform any setup steps necessary before running the skim.
179 
180  Warning:
181  Standard particle lists should *not* be loaded in here. This should be done
182  by overriding the method `BaseSkim.load_standard_lists`. This is crucial for
183  avoiding loading lists twice when combining skims for production.
184 
185  Parameters:
186  path (basf2.Path): Skim path to be processed.
187  """
188 
189  # Abstract method to ensure that it is overridden whenever `BaseSkim` is inherited
190  @abstractmethod
191  def build_lists(self, path):
192  """Create the skim lists to be saved in the output uDST. This function is where
193  the main skim cuts should be applied. This function should return a list of
194  particle list names.
195 
196  Parameters:
197  path (basf2.Path): Skim path to be processed.
198 
199  .. versionchanged:: release-06-00-00
200 
201  Previously, this function was expected to set the attribute
202  `BaseSkim.SkimLists`. Now this is handled by `BaseSkim`, and this function is
203  expected to return the list of particle list names.
204  """
205 
206  def validation_histograms(self, path):
207  """Create validation histograms for the skim.
208 
209  Parameters:
210  path (basf2.Path): Skim path to be processed.
211  """
212 
213  # Everything beyond this point can remain as-is when defining a skim
214  def __call__(self, path):
215  """Produce the skim particle lists and write uDST file.
216 
217  Parameters:
218  path (basf2.Path): Skim path to be processed.
219  """
220  self._MainPath_MainPath = path
221 
222  self.initialise_skim_flaginitialise_skim_flag(path)
223  self.load_standard_listsload_standard_lists(path)
224  self.additional_setupadditional_setup(path)
225  # At this point, BaseSkim.skim_event_cuts may have been run, so pass
226  # self._ConditionalPath for the path if it is not None (otherwise just pass the
227  # regular path)
228  self.SkimListsSkimListsSkimLists = self.build_listsbuild_lists(self._ConditionalPath_ConditionalPath or path)
229  self.apply_hlt_hadron_cut_if_requiredapply_hlt_hadron_cut_if_required(self._ConditionalPath_ConditionalPath or path)
230 
231  self.update_skim_flagupdate_skim_flag(self._ConditionalPath_ConditionalPath or path)
232 
233  if self._udstOutput_udstOutput:
234  self.output_udstoutput_udst(self._ConditionalPath_ConditionalPath or path)
235 
236  if self._validation_validation:
237  if self._method_unchanged_method_unchanged("validation_histograms"):
238  b2.B2FATAL(f"No validation histograms defined for {self} skim.")
239  self.validation_histogramsvalidation_histograms(self._ConditionalPath_ConditionalPath or path)
240 
241  self.set_skim_loggingset_skim_logging()
242 
243  @property
244  def postskim_path(self):
245  """
246  Return the skim path.
247 
248  * If `BaseSkim.skim_event_cuts` has been run, then the skim lists will only be
249  created on a conditional path, so subsequent modules should be added to the
250  conditional path.
251 
252  * If `BaseSkim.skim_event_cuts` has not been run, then the main analysis path is
253  returned.
254  """
255 
256  if not self._MainPath_MainPath:
257  raise ValueError("Skim has not been added to the path yet!")
258  return self._ConditionalPath_ConditionalPath or self._MainPath_MainPath
259 
260  SkimLists = []
261  """
262  List of particle lists reconstructed by the skim. This attribute should only be
263  accessed after running the ``__call__`` method.
264  """
265 
266  _MainPath = None
267  """Main analysis path."""
268 
269  _ConditionalPath = None
270  """Conditional path to be set by `BaseSkim.skim_event_cuts` if event-level cuts are applied."""
271 
272  def skim_event_cuts(self, cut, *, path):
273  """Apply event-level cuts in a skim-safe way.
274 
275  Parameters:
276  cut (str): Event-level cut to be applied.
277  path (basf2.Path): Skim path to be processed.
278 
279  Returns:
280  Path on which the rest of this skim should be processed.
281  On this path, only events which passed the event-level cut will
282  be processed further.
283 
284  .. Tip::
285  If running this function in `BaseSkim.additional_setup` or
286  `BaseSkim.build_lists`, redefine the ``path`` to the path returned by
287  `BaseSkim.skim_event_cuts`, *e.g.*
288 
289  .. code-block:: python
290 
291  def build_lists(self, path):
292  path = self.skim_event_cuts("nTracks>4", path=path)
293  # rest of skim list building...
294 
295  .. Note::
296  The motivation for using this function over `applyEventCuts` is that
297  `applyEventCuts` completely removes events from processing. If we combine
298  multiple skims in a single steering file (which is done in production), and
299  the first has a set of event-level cuts, then all the remaining skims will
300  never even see those events.
301 
302  Internally, this function creates a new path, which is only processed for
303  events passing the event-level cut. To avoid issues around particles not
304  being available on the main path (leading to noisy error logs), we need to
305  add the rest of the skim to this path. So this new path is assigned to the
306  attribute ``BaseSkim._ConditionalPath``, and ``BaseSkim.__call__`` will run
307  all remaining methods on this path.
308  """
309  if self._ConditionalPath_ConditionalPath is not None:
310  b2.B2FATAL(
311  "BaseSkim.skim_event_cuts cannot be run twice in one skim. "
312  "Please join your event-level cut strings into a single string."
313  )
314 
315  ConditionalPath = b2.Path()
316  self._ConditionalPath_ConditionalPath = ConditionalPath
317 
318  eselect = path.add_module("VariableToReturnValue", variable=f"passesEventCut({cut})")
319  eselect.if_value('=1', ConditionalPath, b2.AfterConditionPath.CONTINUE)
320 
321  return ConditionalPath
322 
323  @property
324  def TestFiles(self):
325  """
326  Location of test MDST sample. To modify this, set the property
327  `BaseSkim.TestSampleProcess`, and this function will find an appropriate test
328  sample from the list in
329  ``/group/belle2/dataprod/MC/SkimTraining/SampleLists/TestFiles.yaml``
330 
331  If no sample can be found, an empty list is returned.
332  """
333  try:
334  return [str(get_test_file(process=self.TestSampleProcessTestSampleProcess))]
335  except FileNotFoundError:
336  # Could not find TestFiles.yaml
337  # (Don't issue a warning, since this will just show up as noise during grid processing)
338  return []
339  except ValueError:
340  b2.B2WARNING(
341  f"Could not find '{self.TestSampleProcess}' sample in TestFiles.yaml"
342  )
343  # Could not find sample in YAML file
344  return []
345 
346  @property
347  def flag(self):
348  """
349  Event-level variable indicating whether an event passes the skim or not. To use
350  the skim flag without writing uDST output, use the argument ``udstOutput=False``
351  when instantiating the skim class.
352  """
353  return f"passes_{self}"
354 
355  def initialise_skim_flag(self, path):
356  """
357  Add the module `skim.utils.flags.InitialiseSkimFlag` to the path, which
358  initialises flag for this skim to zero.
359  """
360  path.add_module(InitialiseSkimFlag(self))
361 
362  def update_skim_flag(self, path):
363  """
364  Add the module `skim.utils.flags.UpdateSkimFlag` to the path, which
365  updates flag for this skim.
366 
367  .. Warning::
368 
369  If a conditional path has been created before this, then this function
370  *must* run on the conditional path, since the skim lists are not guaranteed
371  to exist for all events on the main path.
372  """
373  path.add_module(UpdateSkimFlag(self))
374 
375  def _method_unchanged(self, method):
376  """Check if the method of the class is the same as in its parent class, or if it has
377  been overridden.
378 
379  Useful for determining if *e.g.* `validation_histograms` has been defined for a
380  particular skim.
381  """
382  cls = self.__class__
383  ParentsWithAttr = [parent for parent in cls.__mro__[1:] if hasattr(parent, method)]
384 
385  if ParentsWithAttr:
386  # Look for oldest ancestor which as that attribute, to handle inheritance.
387  # In the case of `validation_histograms`, this will be `BaseSkim`.
388  OldestParentWithAttr = ParentsWithAttr[-1]
389  return getattr(cls, method) == getattr(OldestParentWithAttr, method)
390  else:
391  return False
392 
393  def __str__(self):
394  return self.namename
395 
396  def __name__(self):
397  return self.namename
398 
399  def set_skim_logging(self):
400  """Turns the log level to ERROR for selected modules to decrease the total size
401  of the skim log files. Additional modules can be silenced by setting the attribute
402  `NoisyModules` for an individual skim.
403 
404  Parameters:
405  path (basf2.Path): Skim path to be processed.
406 
407  .. warning::
408 
409  This method works by inspecting the modules added to the path, and setting
410  the log level to ERROR. This method should be called *after* all
411  skim-related modules are added to the path.
412  """
413  b2.set_log_level(b2.LogLevel.INFO)
414 
415  NoisyModules = ["ParticleLoader", "ParticleVertexFitter"] + self.NoisyModulesNoisyModules
416 
417  # Set log level of modules on both main path and conditional path
418  paths = filter(None, (self._MainPath_MainPath, self._ConditionalPath_ConditionalPath))
419  modules = [module for path in paths for module in path.modules()]
420 
421  for module in modules:
422  if module.type() in set(NoisyModules):
423  module.set_log_level(b2.LogLevel.ERROR)
424 
425  def output_udst(self, path):
426  """Write the skim particle lists to an output uDST and print a summary of the
427  skim list statistics.
428 
429  Parameters:
430  path (basf2.Path): Skim path to be processed.
431  """
432 
433  # Keep this import here to avoid ROOT hijacking the argument parser
434  import udst # noqa
435 
436  # Make a fuss if self.SkimLists is empty
437  if len(self.SkimListsSkimListsSkimLists) == 0:
438  b2.B2FATAL(
439  f"No skim list names defined in self.SkimLists for {self} skim!"
440  )
441 
443  skimDecayMode=self.codecode,
444  skimParticleLists=self.SkimListsSkimListsSkimLists,
445  outputFile=self.OutputFileNameOutputFileName,
446  dataDescription=self.additionalDataDescriptionadditionalDataDescription,
447  mc=self.mcmcmc,
448  path=path,
449  )
450  summaryOfLists(self.SkimListsSkimListsSkimLists, path=path)
451 
453  """Apply the ``hlt_hadron`` selection if the property ``ApplyHLTHadronCut`` is True.
454 
455  Parameters:
456  path (basf2.Path): Skim path to be processed.
457  """
458  hlt_hadron = "SoftwareTriggerResult(software_trigger_cut&skim&accept_hadron)"
459  if self.ApplyHLTHadronCutApplyHLTHadronCut:
460  for SkimList in self.SkimListsSkimListsSkimLists:
461  applyCuts(SkimList, f"{hlt_hadron}==1", path=path)
462 
463 
465  """Class for creating combined skims which can be run using similar-looking methods
466  to `BaseSkim` objects.
467 
468  A steering file which combines skims can be as simple as the following:
469 
470  .. code-block:: python
471 
472  import basf2 as b2
473  import modularAnalysis as ma
474  from skim.WGs.foo import OneSkim, TwoSkim, RedSkim, BlueSkim
475 
476  path = b2.Path()
477  ma.inputMdstList("default", [], path=path)
478  skims = CombinedSkim(OneSkim(), TwoSkim(), RedSkim(), BlueSkim())
479  skims(path) # load standard lists, create skim lists, and save to uDST
480  path.process()
481 
482  When skims are combined using this class, the `BaseSkim.NoisyModules` lists of each
483  skim are combined and all silenced.
484 
485  The heavy-lifting functions `BaseSkim.additional_setup`, `BaseSkim.build_lists` and
486  `BaseSkim.output_udst` are modified to loop over the corresponding functions of each
487  invididual skim. The `load_standard_lists` method is also modified to load all
488  required lists, without accidentally loading a list twice.
489 
490  Calling an instance of the `CombinedSkim` class will load all the required particle
491  lists, then run all the setup steps, then the list building functions, and then all
492  the output steps.
493  """
494 
495  __authors__ = ["Phil Grace"]
496  __description__ = None
497  __category__ = "combined"
498  __contact__ = None
499 
500  def __init__(
501  self,
502  *skims,
503  NoisyModules=None,
504  additionalDataDescription=None,
505  udstOutput=None,
506  mdstOutput=False,
507  mdst_kwargs=None,
508  CombinedSkimName="CombinedSkim",
509  OutputFileName=None,
510  mc=None,
511  analysisGlobaltag=None,
512  ):
513  """Initialise the CombinedSkim class.
514 
515  Parameters:
516  *skims (BaseSkim): One or more (instantiated) skim objects.
517  NoisyModules (list(str)): Additional modules to silence.
518  additionalDataDescription (dict): Overrides corresponding setting of all individual skims.
519  udstOutput (bool): Overrides corresponding setting of all individual skims.
520  mdstOutput (bool): Write a single MDST output file containing events which
521  pass any of the skims in this combined skim.
522  mdst_kwargs (dict): kwargs to be passed to `mdst.add_mdst_output`. Only used
523  if ``mdstOutput`` is True.
524  CombinedSkimName (str): Sets output of ``__str__`` method of this combined skim.
525  OutputFileName (str): If mdstOutput=True, this option sets the name of the combined output file.
526  If mdstOutput=False, this option does nothing.
527  mc (bool): If True, include MC quantities in output.
528  analysisGlobaltag (str): Analysis globaltag.
529  """
530 
531  if NoisyModules is None:
532  NoisyModules = []
533  # Check that we were passed only BaseSkim objects
534  if not all([isinstance(skim, BaseSkim) for skim in skims]):
535  raise NotImplementedError(
536  "Must pass only `BaseSkim` type objects to `CombinedSkim`."
537  )
538 
539  self.SkimsSkims = skims
540  self.namenamename = CombinedSkimName
541  for skim in self:
542  skim.NoisyModules += NoisyModules
543 
544  # empty but needed for functions inherited from baseSkim to work
545  self.SkimListsSkimListsSkimListsSkimLists = []
546 
547  if additionalDataDescription is not None:
548  for skim in self:
549  skim.additionalDataDescription = additionalDataDescription
550 
551  self._udstOutput_udstOutput_udstOutput = udstOutput
552  if udstOutput is not None:
553  for skim in self:
554  skim._udstOutput = udstOutput
555 
556  self.mcmcmcmc = mc
557  if mc is not None:
558  for skim in self:
559  skim.mc = mc
560 
561  self.analysisGlobaltaganalysisGlobaltaganalysisGlobaltag = analysisGlobaltag
562  if analysisGlobaltag is not None:
563  for skim in self:
564  skim.analysisGlobaltag = analysisGlobaltag
565 
566  self._mdstOutput_mdstOutput = mdstOutput
567  self.mdst_kwargsmdst_kwargs = mdst_kwargs or {}
568  self.mdst_kwargsmdst_kwargs.update(OutputFileName=OutputFileName)
569 
570  if mc is not None:
571  self.mdst_kwargsmdst_kwargs.update(mc=mc)
572 
573  self.merge_data_structuresmerge_data_structures()
574 
575  def __str__(self):
576  return self.namenamename
577 
578  def __name__(self):
579  return self.namenamename
580 
581  def __call__(self, path):
582  for skim in self:
583  skim._MainPath = path
584 
585  self.initialise_skim_flaginitialise_skim_flaginitialise_skim_flag(path)
586  self.load_standard_listsload_standard_listsload_standard_lists(path)
587  self.additional_setupadditional_setupadditional_setup(path)
588  self.build_listsbuild_listsbuild_lists(path)
589  self.apply_hlt_hadron_cut_if_requiredapply_hlt_hadron_cut_if_requiredapply_hlt_hadron_cut_if_required(path)
590  self.update_skim_flagupdate_skim_flagupdate_skim_flag(path)
591  self._check_duplicate_list_names_check_duplicate_list_names()
592  self.output_udstoutput_udstoutput_udst(path)
593  self.output_mdst_if_any_flag_passesoutput_mdst_if_any_flag_passes(path=path, **self.mdst_kwargsmdst_kwargs)
594  self.set_skim_loggingset_skim_loggingset_skim_logging()
595 
596  def __iter__(self):
597  yield from self.SkimsSkims
598 
599  def load_standard_lists(self, path):
600  """Add all required standard list loading to the path.
601 
602  Note:
603  To avoid loading standard lists twice, this function creates dummy paths
604  that are passed through ``load_standard_lists`` for each skim. These dummy
605  paths are then inspected, and a list of unique module-parameter combinations
606  is added to the main skim path.
607 
608  Parameters:
609  path (basf2.Path): Skim path to be processed.
610  """
611  ModulesAndParams = []
612  for skim in self:
613  DummyPath = b2.Path()
614  skim.load_standard_lists(DummyPath)
615 
616  # Create a hashable data object to store the information about which
617  # standard lists have been added to the path.
618  ModulesAndParams.extend(tuple([
619  (
620  module.type(),
621  tuple(sorted(
622  (param.name, _hashable_list(param.values) if isinstance(param.values, list) else param.values)
623  for param in module.available_params()
624  if param.values != param.default
625  )),
626  )
627  for module in DummyPath.modules()
628  ]))
629 
630  # Take this data structure and convert it to a dict. This removes any duplicate entries.
631  ModulesAndParams = dict.fromkeys(ModulesAndParams)
632 
633  # Add the (now unique) module+param combinations to the main path
634  for module, params in ModulesAndParams:
635  path.add_module(module, **dict(params))
636 
637  def additional_setup(self, path):
638  """Run the `BaseSkim.additional_setup` function of each skim.
639 
640  Parameters:
641  path (basf2.Path): Skim path to be processed.
642  """
643  for skim in self:
644  skim.additional_setup(path)
645 
646  def build_lists(self, path):
647  """Run the `BaseSkim.build_lists` function of each skim.
648 
649  Parameters:
650  path (basf2.Path): Skim path to be processed.
651  """
652  for skim in self:
653  skim.SkimLists = skim.build_lists(skim._ConditionalPath or path)
654 
655  def output_udst(self, path):
656  """Run the `BaseSkim.output_udst` function of each skim.
657 
658  Parameters:
659  path (basf2.Path): Skim path to be processed.
660  """
661  for skim in self:
662  if skim._udstOutput:
663  skim.output_udst(skim._ConditionalPath or path)
664 
665  def output_mdst_if_any_flag_passes(self, *, path, **kwargs):
666  """
667  Add MDST output to the path if the event passes any of the skim flags.
668  EventExtraInfo is included in the MDST output so that the flags are available in
669  the output.
670 
671  The ``CombinedSkimName`` parameter in the `CombinedSkim` initialisation is used
672  for the output filename if ``filename`` is not included in kwargs.
673 
674  Parameters:
675  path (basf2.Path): Skim path to be processed.
676  **kwargs: Passed on to `mdst.add_mdst_output`.
677  """
678  from mdst import add_mdst_output
679 
680  if not self._mdstOutput_mdstOutput:
681  return
682 
683  sum_flags = " + ".join(f"eventExtraInfo({f})" for f in self.flagsflags)
684  variable = f"formula({sum_flags})"
685 
686  passes_flag_path = b2.Path()
687  passes_flag = path.add_module("VariableToReturnValue", variable=variable)
688  passes_flag.if_value(">0", passes_flag_path, b2.AfterConditionPath.CONTINUE)
689 
690  filename = kwargs.get("filename", kwargs.get("OutputFileName", self.codecode))
691 
692  if filename is None:
693  filename = self.codecode
694 
695  if not filename.endswith(".mdst.root"):
696  filename += ".mdst.root"
697 
698  kwargs["filename"] = filename
699 
700  if "OutputFileName" in kwargs.keys():
701  del kwargs["OutputFileName"]
702 
703  kwargs.setdefault("dataDescription", {})
704 
705  # If the combinedSkim is not in the registry getting the code will throw a LookupError.
706  # There is no requirement that a combinedSkim with single MDST output is
707  # registered so set the skimDecayMode to ``None`` if no code is defined.
708  try:
709  skim_code = self.codecode
710  except LookupError:
711  skim_code = None
712  kwargs["dataDescription"].setdefault("skimDecayMode", skim_code)
713 
714  try:
715  kwargs["additionalBranches"] += ["EventExtraInfo"]
716  except KeyError:
717  kwargs["additionalBranches"] = ["EventExtraInfo"]
718 
719  add_mdst_output(path=passes_flag_path, **kwargs)
720 
722  """Run the `BaseSkim.apply_hlt_hadron_cut_if_required` function for each skim.
723 
724  Parameters:
725  path (basf2.Path): Skim path to be processed.
726  """
727  for skim in self:
728  skim.apply_hlt_hadron_cut_if_required(skim._ConditionalPath or path)
729 
730  def set_skim_logging(self):
731  """Run `BaseSkim.set_skim_logging` for each skim."""
732  for skim in self:
733  skim.set_skim_logging()
734 
735  @property
736  def TestFiles(self):
737  return list({f for skim in self for f in skim.TestFiles})
738 
739  @property
740  def flags(self):
741  """
742  List of flags for each skim in combined skim.
743  """
744  return [skim.flag for skim in self]
745 
746  @property
747  def flag(self):
748  """
749  Event-level variable indicating whether an event passes the combinedSkim or not.
750  """
751  return f"passes_{self}"
752 
753  def initialise_skim_flag(self, path):
754  """
755  Add the module `skim.utils.flags.InitialiseSkimFlag` to the path, to
756  initialise flags for each skim.
757  """
758  path.add_module(InitialiseSkimFlag(*self))
759 
760  def update_skim_flag(self, path):
761  """
762  Add the module `skim.utils.flags.UpdateSkimFlag` to the conditional path
763  of each skims.
764  """
765  for skim in self:
766  skim.postskim_path.add_module(UpdateSkimFlag(skim))
767 
768  @property
769  def produce_on_tau_samples(self):
770  """
771  Corresponding value of this attribute for each individual skim.
772 
773  A warning is issued if the individual skims in combined skim contain a mix of
774  True and False for this property.
775  """
776  produce_on_tau = [skim.produce_on_tau_samples for skim in self]
777  if all(produce_on_tau):
778  return True
779  elif all(not TauBool for TauBool in produce_on_tau):
780  return False
781  else:
782  warnings.warn(
783  (
784  "The individual skims in the combined skim contain a mix of True and "
785  "False for the attribute `produce_on_tau_samples`.\n The default in "
786  "this case is to allow the combined skim to be produced on tau samples.\n"
787  " Skims included in the problematic combined skim: "
788  f"{', '.join(skim.name for skim in self)}"
789  ),
790  RuntimeWarning,
791  )
792  return True
793 
795  """Read the values of `BaseSkim.MergeDataStructures` and merge data structures
796  accordingly.
797 
798  For example, if ``MergeDataStructures`` has the value ``{"FEIChannelArgs":
799  _merge_boolean_dicts.__func__}``, then ``_merge_boolean_dicts`` is run on all
800  input skims with the attribute ``FEIChannelArgs``, and the value of
801  ``FEIChannelArgs`` for that skim is set to the result.
802 
803  In the FEI skims, this is used to merge configs which are passed to a cached
804  function, thus allowing us to apply the FEI once with all the required particles
805  available.
806  """
807  for iSkim, skim in enumerate(self.SkimsSkims):
808  for attribute, MergingFunction in skim.MergeDataStructures.items():
809  SkimsWithAttribute = [skim for skim in self if hasattr(skim, attribute)]
810  setattr(
811  self.SkimsSkims[iSkim],
812  attribute,
813  MergingFunction(*[getattr(skim, attribute) for skim in SkimsWithAttribute])
814  )
815 
817  """Check for duplicate particle list names.
818 
819  .. Note::
820 
821  Skims cannot be relied on to define their particle list names in advance, so
822  this function can only be run after `build_lists` is run.
823  """
824  ParticleListLists = [skim.SkimLists for skim in self]
825  ParticleLists = [lst for L in ParticleListLists for lst in L]
826  DuplicatedParticleLists = {
827  ParticleList
828  for ParticleList in ParticleLists
829  if ParticleLists.count(ParticleList) > 1
830  }
831  if DuplicatedParticleLists:
832  raise ValueError(
833  f"Non-unique output particle list names in combined skim! "
834  f"{', '.join(DuplicatedParticleLists)}"
835  )
def __init__(self, *OutputFileName=None, additionalDataDescription=None, udstOutput=True, validation=False, mc=True, analysisGlobaltag=None)
Definition: core.py:139
def initialise_skim_flag(self, path)
Definition: core.py:355
def __call__(self, path)
Definition: core.py:214
def additional_setup(self, path)
Definition: core.py:176
def output_udst(self, path)
Definition: core.py:425
def code(self)
Definition: core.py:126
def set_skim_logging(self)
Definition: core.py:399
def apply_hlt_hadron_cut_if_required(self, path)
Definition: core.py:452
def update_skim_flag(self, path)
Definition: core.py:362
def build_lists(self, path)
Definition: core.py:191
def _method_unchanged(self, method)
Definition: core.py:375
bool ApplyHLTHadronCut
Definition: core.py:66
string TestSampleProcess
Definition: core.py:54
list SkimLists
Definition: core.py:260
def skim_event_cuts(self, cut, *path)
Definition: core.py:272
additionalDataDescription
Definition: core.py:154
def flag(self)
Definition: core.py:347
def TestFiles(self)
Definition: core.py:324
def validation_histograms(self, path)
Definition: core.py:206
def postskim_path(self)
Definition: core.py:244
def load_standard_lists(self, path)
Definition: core.py:163
def initialise_skim_flag(self, path)
Definition: core.py:753
def __call__(self, path)
Definition: core.py:581
def additional_setup(self, path)
Definition: core.py:637
def output_udst(self, path)
Definition: core.py:655
def set_skim_logging(self)
Definition: core.py:730
def apply_hlt_hadron_cut_if_required(self, path)
Definition: core.py:721
def update_skim_flag(self, path)
Definition: core.py:760
def build_lists(self, path)
Definition: core.py:646
def __init__(self, *skims, NoisyModules=None, additionalDataDescription=None, udstOutput=None, mdstOutput=False, mdst_kwargs=None, CombinedSkimName="CombinedSkim", OutputFileName=None, mc=None, analysisGlobaltag=None)
Definition: core.py:512
def merge_data_structures(self)
Definition: core.py:794
def flag(self)
Definition: core.py:747
def TestFiles(self)
Definition: core.py:736
def _check_duplicate_list_names(self)
Definition: core.py:816
def output_mdst_if_any_flag_passes(self, *path, **kwargs)
Definition: core.py:665
def flags(self)
Definition: core.py:740
def load_standard_lists(self, path)
Definition: core.py:599
std::map< ExpRun, std::pair< double, double > > filter(const std::map< ExpRun, std::pair< double, double >> &runs, double cut, std::map< ExpRun, std::pair< double, double >> &runsRemoved)
filter events to remove runs shorter than cut, it stores removed runs in runsRemoved
Definition: Splitter.cc:40
def add_skimmed_udst_output(path, skimDecayMode, skimParticleLists=None, outputParticleLists=None, additionalBranches=None, outputFile=None, dataDescription=None, mc=True)
Definition: udst.py:99