15 from pathlib
import Path
21 from basf2
import find_file
25 """Base class for skim test samples."""
29 Initialise Sample. Passing any unrecognised keywords will raise an error.
32 keys =
", ".join(kwargs.keys())
34 f
"Unrecognised arguments in test sample initialisation: {keys}"
37 location = NotImplemented
38 """Path of the test file."""
43 Identifying string which is safe to be included as a filename component or as a
44 key in the skim stats JSON file.
46 As a rough naming convention, data samples should start with 'Data-', MC sample
47 with 'MC-', and custom samples with 'Custom-'.
54 Human-readable name for displaying in printed tables.
61 Replace ``'${SampleDirectory}'`` with ``Sample.SampleDirectory``, and resolve
65 location (str, pathlib.Path): Filename to be resolved.
68 pathlib.Path: Resolved path.
70 SampleDirectory =
"/group/belle2/dataprod/MC/SkimTraining"
71 location = str(location).replace(
"${SampleDirectory}", SampleDirectory)
72 return Path(location).expanduser().resolve()
77 Sample serialised as a dictionary.
97 super().__init__(**kwargs)
101 if isinstance(experiment, int)
or not experiment.startswith(
"exp"):
102 experiment = f
"exp{experiment}"
109 f
"{self.__class__.__name__}("
110 f
"location={repr(self.location)}, "
111 f
"processing={repr(self.processing)}, "
112 f
"experiment={repr(self.experiment)}, "
113 f
"beam_energy={repr(self.beam_energy)}, "
114 f
"general_skim={repr(self.general_skim)})"
141 name = f
"{self.processing} {self.experiment}"
144 name += f
", {self.beam_energy}"
146 name += f
", ({self.general_skim})"
158 beam_background="BGx1",
162 super().__init__(**kwargs)
168 if isinstance(campaign, int)
or not campaign.startswith(
"MC"):
169 campaign = f
"MC{campaign}"
172 if isinstance(beam_background, int)
or not beam_background.startswith(
"BGx"):
173 beam_background = f
"BGx{beam_background}"
178 f
"{self.__class__.__name__}("
179 f
"location={repr(self.location)}, "
180 f
"process={repr(self.process)}, "
181 f
"campaign={repr(self.campaign)}, "
182 f
"beam_energy={repr(self.beam_energy)}, "
183 f
"beam_background={repr(self.beam_background)})"
190 "process": self.
processprocess,
204 name = f
"{self.campaign} {self.process}"
207 name += f
" {self.beam_background}"
209 name += f
", {self.beam_energy}"
214 def __init__(self, *, location, label=None, **kwargs):
216 super().__init__(**kwargs)
220 self.
labellabel = str(location)
222 self.
labellabel = label
228 f
"{self.__class__.__name__}("
229 f
"location={repr(self.location)}, "
230 f
"label={repr(self.label)})"
239 return f
"Custom-{self.sanitised_label}"
243 return self.
labellabel
247 """Container class for lists of MC, data, and custom samples."""
249 DefaultSampleYAML = (
250 "/group/belle2/dataprod/MC/SkimTraining/SampleLists/TestFiles.yaml"
253 def __init__(self, *, SampleYAML=None, SampleDict=None, SampleList=None):
255 Initialise a list of test samples. Three methods are of initialisation are
256 allowed. If no arguments are given this function will default to a standard list
257 of samples defined in
258 ``/group/belle2/dataprod/MC/SkimTraining/SampleLists/TestFiles.yaml``.
261 SampleYAML (str, pathlib.path): Path to a YAML file containing sample
263 SampleDict (dict): Dict containing sample specifications.
264 SampleList (list(Sample)): List of Sample objects.
266 if sum(p
is not None for p
in (SampleYAML, SampleDict, SampleList)) > 1:
268 "Only one out of SampleYAML, SampleDict, or SampleList can be passed."
271 if SampleList
is not None:
273 self.
mc_samplesmc_samples = [s
for s
in SampleList
if isinstance(s, MCSample)]
274 self.
data_samplesdata_samples = [s
for s
in SampleList
if isinstance(s, DataSample)]
275 self.
custom_samplescustom_samples = [s
for s
in SampleList
if isinstance(s, CustomSample)]
278 if SampleDict
is None:
279 if SampleYAML
is None:
282 with open(SampleYAML)
as f:
283 SampleDict = yaml.safe_load(f)
290 def _all_samples(self):
296 def __getitem__(self, i):
303 return f
"{self.__class__.__name__}(" f
"SampleList={repr(list(self))})"
306 def SampleDict(self):
308 "MC": [s.as_dict
for s
in self.
mc_samplesmc_samples],
309 "Data": [s.as_dict
for s
in self.
data_samplesdata_samples],
315 Validate YAML input against JSON schema defined in
316 ``skim/tools/resources/test_samples_schema.json``.
318 schema_file = find_file(
"skim/tools/resources/test_samples_schema.json")
319 with open(schema_file)
as f:
320 schema = json.load(f)
323 jsonschema.validate(SampleDict, schema)
324 except jsonschema.exceptions.ValidationError
as e:
327 f
"Error in sample list configuration file {InputYAML}"
332 def _parse_samples(SampleDict, BlockName, SampleClass):
333 if SampleDict
is None:
337 InputSampleList = SampleDict[BlockName]
341 if InputSampleList
is None:
345 for sample
in InputSampleList:
346 samples.append(SampleClass(**sample))
350 """Read in each block of the YAML and create lists of sample objects."""
352 "Error in '{block}' block of test sample yaml file.\n"
353 "The following must all have defined values: {params}"
358 except TypeError
as e:
359 required =
", ".join(
361 for p
in (
"location",
"processing",
"beam_energy",
"experiment")
363 raise ValueError(MissingParams.format(block=
"Data", params=required))
from e
367 except TypeError
as e:
368 required =
", ".join(f
"'{p}'" for p
in (
"location",
"process",
"campaign"))
369 raise ValueError(MissingParams.format(block=
"MC", params=required))
from e
373 SampleDict,
"Custom", CustomSample
375 except TypeError
as e:
376 required =
", ".join(f
"'{p}'" for p
in (
"location",))
378 MissingParams.format(block=
"Custom", params=required)
387 beam_background=None,
392 Find all MC samples matching query.
395 process (str): Simulated MC process to query.
396 campaign (str, int): MC campaign number to query.
397 beam_energy (str): Beam energy to query.
398 beam_background (str, int): Nominal beam background to query.
399 exact_match (bool): If passed, an error is raised if there is not exactly
400 one matching sample. If there is exactly one matching sample, then the
401 single sample is returned, rather than a list.
402 inplace (bool): Replace MC samples with the list obtained from query.
404 if inplace
and exact_match:
406 "Incompatible arguments passed: `inplace` and `exact_match`"
412 if (process
is None or s.process == process)
413 and (campaign
is None or s.campaign == campaign)
414 and (beam_energy
is None or s.beam_energy == beam_energy)
415 and (beam_background
is None or s.beam_background == beam_background)
418 if len(samples) == 1:
422 "`exact_match=True` was specified, but did not find exactly one match."
441 Find all MC samples matching query.
444 processing (str): Data processing campaign number to query.
445 experiment (str, int): Experiment number to query.
446 beam_energy (str): Beam energy to query.
447 general_skim (skim): ``GeneralSkimName`` to query.
448 exact_match (bool): If passed, an error is raised if there is not exactly
449 one matching sample. If there is exactly one matching sample, then the
450 single sample is returned, rather than a list.
451 inplace (bool): Replace MC samples with the list obtained from query.
453 if inplace
and exact_match:
455 "Incompatible arguments passed: `inplace` and `exact_match`"
461 if (processing
is None or s.processing == processing)
462 and (experiment
is None or s.experiment == experiment)
463 and (beam_energy
is None or s.beam_energy == beam_energy)
464 and (general_skim
is None or s.general_skim == general_skim)
467 if len(samples) == 1:
471 "`exact_match=True` was specified, but did not find exactly one match."
480 def get_test_file(process, *, SampleYAML=None):
482 Attempt to find a test sample of the given MC process.
485 process (str): Physics process, e.g. mixed, charged, ccbar, eemumu.
486 SampleYAML (str, pathlib.Path): Path to a YAML file containing sample
490 str: Path to test sample file.
493 FileNotFoundError: Raised if no sample can be found.
496 matches = samples.query_mc_samples(process=process)
499 return matches[0].location
500 except IndexError
as e:
501 raise ValueError(f
"No test samples found for MC process '{process}'.")
from e
504 if __name__ ==
"__main__":
511 print(
"Samples defined in YAML file:")
512 for sample
in samples:
513 print(f
" * {repr(sample)}")
def encodeable_name(self)
def encodeable_name(self)
def encodeable_name(self)
def resolve_path(location)
def __init__(self, **kwargs)
def encodeable_name(self)
def validate_schema(self, SampleDict, InputYAML=None)
def query_data_samples(self, *processing=None, experiment=None, beam_energy=None, general_skim=None, exact_match=False, inplace=False)
def query_mc_samples(self, *process=None, campaign=None, beam_energy=None, beam_background=None, exact_match=False, inplace=False)
def _parse_samples(SampleDict, BlockName, SampleClass)
def _parse_all_samples(self, SampleDict)
def __init__(self, *SampleYAML=None, SampleDict=None, SampleList=None)