11This module contains various utility functions for the prompt calibration CAF scripts to use.
13from basf2
import B2INFO
14from collections
import defaultdict, OrderedDict
15from itertools
import groupby
17from caf.utils
import ExpRun, IoV
18from random
import choice, shuffle
21def filter_by_max_files_per_run(files_to_iov, max_files_per_run=1, min_events_per_file=0, random_select=False):
22 """This function creates a new files_to_iov dictionary by adding files
23 until the maximum number of files per run is reached. After this no more files
26 It makes the assumption that the IoV
is a single run,
and that the exp_low
and run_low of the IoV object
27 can be used to create the ExpRun fr comparison of whether to add a new input file.
30 files_to_iov (dict): The standard dictionary you might
as input to a Calibration. It
is of the form
32 >>> files_to_iov = {
"file_path.root": IoV(1,1,1,1),}
34 max_files_per_run (int): The maximum number of files that we will add to the output dictionary
for each run
in the
37 min_events_per_file (int): The minimum number of events that
is allowed to be
in any included file
's tree. random_select (bool): true will select random nfile and false will take first nfile.
40 dict: The same style of dict
as the input file_to_iov, but filtered down.
42 B2INFO(f"Beginning filtering process to only choose {max_files_per_run} file(s) per run.")
43 if min_events_per_file:
44 B2INFO(f
"We also require that each file must have at least {min_events_per_file} events in the tree.")
48 files_to_iov_list = list(files_to_iov.items())
49 shuffle(files_to_iov_list)
50 files_to_iov = type(files_to_iov)(files_to_iov_list)
53 run_to_files = defaultdict(list)
54 for input_file, file_iov
in files_to_iov.items():
55 run = ExpRun(exp=file_iov.exp_low, run=file_iov.run_low)
56 run_files = run_to_files.get(run,
None)
57 if not run_files
or len(run_files) < max_files_per_run:
58 if not min_events_per_file
or (min_events_per_file
and events_in_basf2_file(input_file) >= min_events_per_file):
59 B2INFO(f
"Choosing input file for {run}: {input_file}")
60 run_to_files[run].append(input_file)
66 new_files_to_iov = OrderedDict()
67 for run, run_files
in run_to_files.items():
68 for file_path
in run_files:
70 new_files_to_iov[file_path] = IoV(*run, *run)
71 return new_files_to_iov
74def group_files_by_iov(files_to_iov):
76 Inverts the files_to_iov dictionary to give back a dictionary of IoV -> File list
79 files_to_iov (dict): {"/path/to/file1.root": IoV(1,1,1,1),
"/path/to/file2.root": IoV(1,1,1,1)}
82 dict: {IoV(1,1,1,1): [
"/path/to/file1.root",
"/path/to/file2.root"]}
84 iov_to_files = OrderedDict()
85 for iov, g
in groupby(files_to_iov.items(),
lambda g: g[1]):
86 files = [f[0]
for f
in g]
87 iov_to_files[iov] = files
91def filter_by_max_events_per_run(files_to_iov, max_events_per_run, random_select=False, max_events_per_file=0):
93 This function creates a new files_to_iov dictionary by appending files
94 in order until the maximum number of events are reached per run.
95 Each file contributes a maximum of events specified by
"max_events_per_file".
98 files_to_iov (dict): {
"/path/to/file.root": IoV(1,1,1,1)} type dictionary. Same style
as used by the CAF
100 max_events_per_run (int): The threshold we want to reach but stop adding files
if we reach it.
101 random_select (bool): true will select random nfile
and false will take first nfile.
102 max_events_per_file (int): true will limit the contribution
from each file to max events specified.
105 dict: The same style of dict
as the input files_to_iov, but filtered down.
109 iov_to_files = group_files_by_iov(files_to_iov)
111 new_iov_to_files = OrderedDict()
113 for iov, files
in sorted(iov_to_files.items()):
114 run = ExpRun(iov.exp_low, iov.run_low)
116 remaining_files = files[:]
118 while total < max_events_per_run
and remaining_files:
120 file_path = choice(remaining_files)
121 remaining_files.remove(file_path)
123 file_path = remaining_files.pop(0)
124 events = events_in_basf2_file(file_path)
127 B2INFO(f
"No events in {file_path}, skipping...")
129 total += events
if max_events_per_file <= 0
or events <= max_events_per_file
else max_events_per_file
130 chosen_files.append(file_path)
131 B2INFO(f
"Choosing input file for {run}: {file_path} and total events so far {total}")
135 new_iov_to_files[iov] = chosen_files
137 B2INFO(f
"No files chosen for {run}")
140 new_files_to_iov = OrderedDict()
141 for iov, files
in new_iov_to_files.items():
143 new_files_to_iov[path] = iov
144 return new_files_to_iov
147def filter_by_select_max_events_from_files(input_file_list, select_max_events_from_files):
149 This function creates a new list by appending random files until
150 the maximum number of events are reached per data set.
153 input_file_list (list): ["/path/to/file2.root",
"/path/to/file2.root"]
154 select_max_events_from_files (int): The threshold we want to reach but stop adding files
if we reach it.
157 list: The sorted list of random files
or empty list of
not enough found
162 while total < select_max_events_from_files:
164 if not input_file_list:
167 file_path = choice(input_file_list)
168 input_file_list.remove(file_path)
170 events = events_in_basf2_file(file_path)
173 B2INFO(f
"No events in {file_path}, skipping...")
177 selected_file.append(file_path)
178 B2INFO(f
"Choosing random input file: {file_path} and total events so far {total}")
181 if total < select_max_events_from_files:
182 B2INFO(f
"total events {total} are less than requested {select_max_events_from_files}")
185 return sorted(selected_file)
188def events_in_basf2_file(file_path):
189 """Does a quick open and return of the number of entries in a basf2 file's tree object.
192 file_path (str): File path to ROOT file
195 int: Number of entries in tree.
197 f = ROOT.TFile.Open(file_path, "READ")
198 events = f.tree.GetEntries()