13 The core classes of the skim package are defined in ``skim.core``: ``BaseSkim`` and
16 * ``BaseSkim`` is an abstract base class from which all skims inherit. It defines
17 template functions for a skim, and includes attributes describing the skim metadata.
19 * ``CombinedSkim`` is a class for combining ``BaseSkim`` objects into a single steering
23 from abc
import ABC, abstractmethod
27 from modularAnalysis
import applyCuts, summaryOfLists
29 from skim.utils.flags
import InitialiseSkimFlag, UpdateSkimFlag
30 from skim.utils.testfiles
import get_test_file
31 from skim.utils.misc
import _hashable_list
35 """Base class for skims. Initialises a skim name, and creates template functions
36 required for each skim.
38 See `writing-skims` for information on how to use this to define a new skim.
42 """List of module types to be silenced. This may be necessary in certain skims in
43 order to keep log file sizes small.
46 The elements of this list should be the module *type*, which is not necessarily
47 the same as the module name. The module type can be inspected in Python via
51 This attribute is used by `BaseSkim.set_skim_logging`.
54 TestSampleProcess =
"mixed"
55 """MC process of test file. `BaseSkim.TestFiles` passes this property to
56 `skim.utils.testfiles.get_test_file` to retrieve an appropriate file location.
57 Defaults to a :math:`B^{0}\\overline{B^{0}}` sample.
60 MergeDataStructures = {}
61 """Dict of ``str -> function`` pairs to determine if any special data structures
62 should be merged when combining skims. Currently, this is only used to merge FEI
63 config parameters when running multiple FEI skims at once, so that it can be run
64 just once with all the necessary arguments."""
66 ApplyHLTHadronCut =
False
67 """If this property is set to True, then the HLT selection for ``hlt_hadron`` will
68 be applied to the skim lists when the skim is added to the path.
71 produce_on_tau_samples =
True
72 """If this property is set to False, then ``b2skim-prod`` will not produce data
73 production requests for this skim on taupair MC samples. This decision may be made
74 for one of two reasons:
76 * The retention rate of the skim on taupair samples is basically zero, so there is
77 no point producing the skim for these samples.
79 * The retention rate of the skim on taupair samples is too high (>20%), so the
80 production system may struggle to handle the jobs.
83 produces_mdst_by_default =
False
84 """Special property for combined systematics skims, which produce MDST output instead of
85 uDST. This property is used by ``b2skim-prod`` to set the ``DataLevel`` parameter in
86 the ``DataDescription`` block for this skim to ``mdst`` instead of ``udst``.
89 validation_sample =
None
91 MDST sample to use for validation histograms. Must be a valid location of a
92 validation dataset (see documentation for `basf2.find_file`).
97 Include Monte Carlo quantities in skim output.
100 analysisGlobaltag =
None
107 def __description__(self):
112 def __category__(self):
117 def __authors__(self):
122 def __contact__(self):
127 """Eight-digit code assigned to this skim in the registry."""
128 return Registry.encode_skim_name(self.
namename)
134 additionalDataDescription=None,
138 analysisGlobaltag=None,
140 """Initialise the BaseSkim class.
143 OutputFileName (str): Name to give output uDST files. If none given, then
144 defaults to eight-number skim code.
145 additionalDataDescription (dict): additional data description to be added to the output file metadata.
146 udstOutput (bool): If True, add uDST output to the path.
147 validation (bool): If True, build lists and write validation histograms
148 instead of writing uDSTs.
149 mc (bool): If True, include MC quantities in output.
150 analysisGlobaltag (str): Analysis globaltag.
152 self.
namename = self.__class__.__name__
165 Load any standard lists. This code will be run before any
166 `BaseSkim.additional_setup` and `BaseSkim.build_lists`.
169 This is separated into its own function so that when skims are combined, any
170 standard lists used by two skims can be loaded just once.
173 path (basf2.Path): Skim path to be processed.
178 Perform any setup steps necessary before running the skim.
181 Standard particle lists should *not* be loaded in here. This should be done
182 by overriding the method `BaseSkim.load_standard_lists`. This is crucial for
183 avoiding loading lists twice when combining skims for production.
186 path (basf2.Path): Skim path to be processed.
192 """Create the skim lists to be saved in the output uDST. This function is where
193 the main skim cuts should be applied. This function should return a list of
197 path (basf2.Path): Skim path to be processed.
199 .. versionchanged:: release-06-00-00
201 Previously, this function was expected to set the attribute
202 `BaseSkim.SkimLists`. Now this is handled by `BaseSkim`, and this function is
203 expected to return the list of particle list names.
207 """Create validation histograms for the skim.
210 path (basf2.Path): Skim path to be processed.
215 """Produce the skim particle lists and write uDST file.
218 path (basf2.Path): Skim path to be processed.
238 b2.B2FATAL(f
"No validation histograms defined for {self} skim.")
246 Return the skim path.
248 * If `BaseSkim.skim_event_cuts` has been run, then the skim lists will only be
249 created on a conditional path, so subsequent modules should be added to the
252 * If `BaseSkim.skim_event_cuts` has not been run, then the main analysis path is
257 raise ValueError(
"Skim has not been added to the path yet!")
262 List of particle lists reconstructed by the skim. This attribute should only be
263 accessed after running the ``__call__`` method.
267 """Main analysis path."""
269 _ConditionalPath =
None
270 """Conditional path to be set by `BaseSkim.skim_event_cuts` if event-level cuts are applied."""
273 """Apply event-level cuts in a skim-safe way.
276 cut (str): Event-level cut to be applied.
277 path (basf2.Path): Skim path to be processed.
280 Path on which the rest of this skim should be processed.
281 On this path, only events which passed the event-level cut will
282 be processed further.
285 If running this function in `BaseSkim.additional_setup` or
286 `BaseSkim.build_lists`, redefine the ``path`` to the path returned by
287 `BaseSkim.skim_event_cuts`, *e.g.*
289 .. code-block:: python
291 def build_lists(self, path):
292 path = self.skim_event_cuts("nTracks>4", path=path)
293 # rest of skim list building...
296 The motivation for using this function over `applyEventCuts` is that
297 `applyEventCuts` completely removes events from processing. If we combine
298 multiple skims in a single steering file (which is done in production), and
299 the first has a set of event-level cuts, then all the remaining skims will
300 never even see those events.
302 Internally, this function creates a new path, which is only processed for
303 events passing the event-level cut. To avoid issues around particles not
304 being available on the main path (leading to noisy error logs), we need to
305 add the rest of the skim to this path. So this new path is assigned to the
306 attribute ``BaseSkim._ConditionalPath``, and ``BaseSkim.__call__`` will run
307 all remaining methods on this path.
311 "BaseSkim.skim_event_cuts cannot be run twice in one skim. "
312 "Please join your event-level cut strings into a single string."
315 ConditionalPath = b2.Path()
318 eselect = path.add_module(
"VariableToReturnValue", variable=f
"passesEventCut({cut})")
319 eselect.if_value(
'=1', ConditionalPath, b2.AfterConditionPath.CONTINUE)
321 return ConditionalPath
326 Location of test MDST sample. To modify this, set the property
327 `BaseSkim.TestSampleProcess`, and this function will find an appropriate test
328 sample from the list in
329 ``/group/belle2/dataprod/MC/SkimTraining/SampleLists/TestFiles.yaml``
331 If no sample can be found, an empty list is returned.
335 except FileNotFoundError:
341 f
"Could not find '{self.TestSampleProcess}' sample in TestFiles.yaml"
349 Event-level variable indicating whether an event passes the skim or not. To use
350 the skim flag without writing uDST output, use the argument ``udstOutput=False``
351 when instantiating the skim class.
353 return f
"passes_{self}"
357 Add the module `skim.utils.flags.InitialiseSkimFlag` to the path, which
358 initialises flag for this skim to zero.
360 path.add_module(InitialiseSkimFlag(self))
364 Add the module `skim.utils.flags.UpdateSkimFlag` to the path, which
365 updates flag for this skim.
369 If a conditional path has been created before this, then this function
370 *must* run on the conditional path, since the skim lists are not guaranteed
371 to exist for all events on the main path.
373 path.add_module(UpdateSkimFlag(self))
376 """Check if the method of the class is the same as in its parent class, or if it has
379 Useful for determining if *e.g.* `validation_histograms` has been defined for a
383 ParentsWithAttr = [parent
for parent
in cls.__mro__[1:]
if hasattr(parent, method)]
388 OldestParentWithAttr = ParentsWithAttr[-1]
389 return getattr(cls, method) == getattr(OldestParentWithAttr, method)
400 """Turns the log level to ERROR for selected modules to decrease the total size
401 of the skim log files. Additional modules can be silenced by setting the attribute
402 `NoisyModules` for an individual skim.
405 path (basf2.Path): Skim path to be processed.
409 This method works by inspecting the modules added to the path, and setting
410 the log level to ERROR. This method should be called *after* all
411 skim-related modules are added to the path.
413 b2.set_log_level(b2.LogLevel.INFO)
415 NoisyModules = [
"ParticleLoader",
"ParticleVertexFitter"] + self.
NoisyModulesNoisyModules
419 modules = [module
for path
in paths
for module
in path.modules()]
421 for module
in modules:
422 if module.type()
in set(NoisyModules):
423 module.set_log_level(b2.LogLevel.ERROR)
426 """Write the skim particle lists to an output uDST and print a summary of the
427 skim list statistics.
430 path (basf2.Path): Skim path to be processed.
439 f
"No skim list names defined in self.SkimLists for {self} skim!"
443 skimDecayMode=self.
codecode,
453 """Apply the ``hlt_hadron`` selection if the property ``ApplyHLTHadronCut`` is True.
456 path (basf2.Path): Skim path to be processed.
458 hlt_hadron =
"SoftwareTriggerResult(software_trigger_cut&skim&accept_hadron)"
461 applyCuts(SkimList, f
"{hlt_hadron}==1", path=path)
465 """Class for creating combined skims which can be run using similar-looking methods
466 to `BaseSkim` objects.
468 A steering file which combines skims can be as simple as the following:
470 .. code-block:: python
473 import modularAnalysis as ma
474 from skim.WGs.foo import OneSkim, TwoSkim, RedSkim, BlueSkim
477 ma.inputMdstList("default", [], path=path)
478 skims = CombinedSkim(OneSkim(), TwoSkim(), RedSkim(), BlueSkim())
479 skims(path) # load standard lists, create skim lists, and save to uDST
482 When skims are combined using this class, the `BaseSkim.NoisyModules` lists of each
483 skim are combined and all silenced.
485 The heavy-lifting functions `BaseSkim.additional_setup`, `BaseSkim.build_lists` and
486 `BaseSkim.output_udst` are modified to loop over the corresponding functions of each
487 invididual skim. The `load_standard_lists` method is also modified to load all
488 required lists, without accidentally loading a list twice.
490 Calling an instance of the `CombinedSkim` class will load all the required particle
491 lists, then run all the setup steps, then the list building functions, and then all
495 __authors__ = [
"Phil Grace"]
496 __description__ =
None
497 __category__ =
"combined"
504 additionalDataDescription=None,
508 CombinedSkimName="CombinedSkim",
511 analysisGlobaltag=None,
513 """Initialise the CombinedSkim class.
516 *skims (BaseSkim): One or more (instantiated) skim objects.
517 NoisyModules (list(str)): Additional modules to silence.
518 additionalDataDescription (dict): Overrides corresponding setting of all individual skims.
519 udstOutput (bool): Overrides corresponding setting of all individual skims.
520 mdstOutput (bool): Write a single MDST output file containing events which
521 pass any of the skims in this combined skim.
522 mdst_kwargs (dict): kwargs to be passed to `mdst.add_mdst_output`. Only used
523 if ``mdstOutput`` is True.
524 CombinedSkimName (str): Sets output of ``__str__`` method of this combined skim.
525 OutputFileName (str): If mdstOutput=True, this option sets the name of the combined output file.
526 If mdstOutput=False, this option does nothing.
527 mc (bool): If True, include MC quantities in output.
528 analysisGlobaltag (str): Analysis globaltag.
531 if NoisyModules
is None:
534 if not all([isinstance(skim, BaseSkim)
for skim
in skims]):
535 raise NotImplementedError(
536 "Must pass only `BaseSkim` type objects to `CombinedSkim`."
539 self.
SkimsSkims = skims
540 self.
namenamename = CombinedSkimName
542 skim.NoisyModules += NoisyModules
547 if additionalDataDescription
is not None:
549 skim.additionalDataDescription = additionalDataDescription
552 if udstOutput
is not None:
554 skim._udstOutput = udstOutput
562 if analysisGlobaltag
is not None:
564 skim.analysisGlobaltag = analysisGlobaltag
568 self.
mdst_kwargsmdst_kwargs.update(OutputFileName=OutputFileName)
583 skim._MainPath = path
597 yield from self.
SkimsSkims
600 """Add all required standard list loading to the path.
603 To avoid loading standard lists twice, this function creates dummy paths
604 that are passed through ``load_standard_lists`` for each skim. These dummy
605 paths are then inspected, and a list of unique module-parameter combinations
606 is added to the main skim path.
609 path (basf2.Path): Skim path to be processed.
611 ModulesAndParams = []
613 DummyPath = b2.Path()
614 skim.load_standard_lists(DummyPath)
618 ModulesAndParams.extend(tuple([
622 (param.name, _hashable_list(param.values)
if isinstance(param.values, list)
else param.values)
623 for param
in module.available_params()
624 if param.values != param.default
627 for module
in DummyPath.modules()
631 ModulesAndParams = dict.fromkeys(ModulesAndParams)
634 for module, params
in ModulesAndParams:
635 path.add_module(module, **dict(params))
638 """Run the `BaseSkim.additional_setup` function of each skim.
641 path (basf2.Path): Skim path to be processed.
644 skim.additional_setup(path)
647 """Run the `BaseSkim.build_lists` function of each skim.
650 path (basf2.Path): Skim path to be processed.
653 skim.SkimLists = skim.build_lists(skim._ConditionalPath
or path)
656 """Run the `BaseSkim.output_udst` function of each skim.
659 path (basf2.Path): Skim path to be processed.
663 skim.output_udst(skim._ConditionalPath
or path)
667 Add MDST output to the path if the event passes any of the skim flags.
668 EventExtraInfo is included in the MDST output so that the flags are available in
671 The ``CombinedSkimName`` parameter in the `CombinedSkim` initialisation is used
672 for the output filename if ``filename`` is not included in kwargs.
675 path (basf2.Path): Skim path to be processed.
676 **kwargs: Passed on to `mdst.add_mdst_output`.
678 from mdst
import add_mdst_output
683 sum_flags =
" + ".join(f
"eventExtraInfo({f})" for f
in self.
flagsflags)
684 variable = f
"formula({sum_flags})"
686 passes_flag_path = b2.Path()
687 passes_flag = path.add_module(
"VariableToReturnValue", variable=variable)
688 passes_flag.if_value(
">0", passes_flag_path, b2.AfterConditionPath.CONTINUE)
690 filename = kwargs.get(
"filename", kwargs.get(
"OutputFileName", self.
codecode))
693 filename = self.
codecode
695 if not filename.endswith(
".mdst.root"):
696 filename +=
".mdst.root"
698 kwargs[
"filename"] = filename
700 if "OutputFileName" in kwargs.keys():
701 del kwargs[
"OutputFileName"]
703 kwargs.setdefault(
"dataDescription", {})
709 skim_code = self.
codecode
712 kwargs[
"dataDescription"].setdefault(
"skimDecayMode", skim_code)
715 kwargs[
"additionalBranches"] += [
"EventExtraInfo"]
717 kwargs[
"additionalBranches"] = [
"EventExtraInfo"]
719 add_mdst_output(path=passes_flag_path, **kwargs)
722 """Run the `BaseSkim.apply_hlt_hadron_cut_if_required` function for each skim.
725 path (basf2.Path): Skim path to be processed.
728 skim.apply_hlt_hadron_cut_if_required(skim._ConditionalPath
or path)
731 """Run `BaseSkim.set_skim_logging` for each skim."""
733 skim.set_skim_logging()
737 return list({f
for skim
in self
for f
in skim.TestFiles})
742 List of flags for each skim in combined skim.
744 return [skim.flag
for skim
in self]
749 Event-level variable indicating whether an event passes the combinedSkim or not.
751 return f
"passes_{self}"
755 Add the module `skim.utils.flags.InitialiseSkimFlag` to the path, to
756 initialise flags for each skim.
758 path.add_module(InitialiseSkimFlag(*self))
762 Add the module `skim.utils.flags.UpdateSkimFlag` to the conditional path
766 skim.postskim_path.add_module(UpdateSkimFlag(skim))
769 def produce_on_tau_samples(self):
771 Corresponding value of this attribute for each individual skim.
773 A warning is issued if the individual skims in combined skim contain a mix of
774 True and False for this property.
776 produce_on_tau = [skim.produce_on_tau_samples
for skim
in self]
777 if all(produce_on_tau):
779 elif all(
not TauBool
for TauBool
in produce_on_tau):
784 "The individual skims in the combined skim contain a mix of True and "
785 "False for the attribute `produce_on_tau_samples`.\n The default in "
786 "this case is to allow the combined skim to be produced on tau samples.\n"
787 " Skims included in the problematic combined skim: "
788 f
"{', '.join(skim.name for skim in self)}"
795 """Read the values of `BaseSkim.MergeDataStructures` and merge data structures
798 For example, if ``MergeDataStructures`` has the value ``{"FEIChannelArgs":
799 _merge_boolean_dicts.__func__}``, then ``_merge_boolean_dicts`` is run on all
800 input skims with the attribute ``FEIChannelArgs``, and the value of
801 ``FEIChannelArgs`` for that skim is set to the result.
803 In the FEI skims, this is used to merge configs which are passed to a cached
804 function, thus allowing us to apply the FEI once with all the required particles
807 for iSkim, skim
in enumerate(self.
SkimsSkims):
808 for attribute, MergingFunction
in skim.MergeDataStructures.items():
809 SkimsWithAttribute = [skim
for skim
in self
if hasattr(skim, attribute)]
811 self.
SkimsSkims[iSkim],
813 MergingFunction(*[getattr(skim, attribute)
for skim
in SkimsWithAttribute])
817 """Check for duplicate particle list names.
821 Skims cannot be relied on to define their particle list names in advance, so
822 this function can only be run after `build_lists` is run.
824 ParticleListLists = [skim.SkimLists
for skim
in self]
825 ParticleLists = [lst
for L
in ParticleListLists
for lst
in L]
826 DuplicatedParticleLists = {
828 for ParticleList
in ParticleLists
829 if ParticleLists.count(ParticleList) > 1
831 if DuplicatedParticleLists:
833 f
"Non-unique output particle list names in combined skim! "
834 f
"{', '.join(DuplicatedParticleLists)}"
def __init__(self, *OutputFileName=None, additionalDataDescription=None, udstOutput=True, validation=False, mc=True, analysisGlobaltag=None)
def initialise_skim_flag(self, path)
def additional_setup(self, path)
def output_udst(self, path)
def set_skim_logging(self)
def apply_hlt_hadron_cut_if_required(self, path)
def update_skim_flag(self, path)
def build_lists(self, path)
def _method_unchanged(self, method)
def skim_event_cuts(self, cut, *path)
additionalDataDescription
def validation_histograms(self, path)
def load_standard_lists(self, path)
def initialise_skim_flag(self, path)
def additional_setup(self, path)
def output_udst(self, path)
def set_skim_logging(self)
def apply_hlt_hadron_cut_if_required(self, path)
def update_skim_flag(self, path)
def build_lists(self, path)
def __init__(self, *skims, NoisyModules=None, additionalDataDescription=None, udstOutput=None, mdstOutput=False, mdst_kwargs=None, CombinedSkimName="CombinedSkim", OutputFileName=None, mc=None, analysisGlobaltag=None)
def merge_data_structures(self)
def _check_duplicate_list_names(self)
def output_mdst_if_any_flag_passes(self, *path, **kwargs)
def load_standard_lists(self, path)
std::map< ExpRun, std::pair< double, double > > filter(const std::map< ExpRun, std::pair< double, double >> &runs, double cut, std::map< ExpRun, std::pair< double, double >> &runsRemoved)
filter events to remove runs shorter than cut, it stores removed runs in runsRemoved
def add_skimmed_udst_output(path, skimDecayMode, skimParticleLists=None, outputParticleLists=None, additionalBranches=None, outputFile=None, dataDescription=None, mc=True)