4 This module contains various utility functions for the prompt calibration CAF scripts to use.
6 from basf2
import B2INFO
7 from collections
import defaultdict, OrderedDict
8 from itertools
import groupby
10 from caf.utils
import ExpRun, IoV
11 from random
import choice
14 def filter_by_max_files_per_run(files_to_iov, max_files_per_run=1, min_events_per_file=0):
15 """This function creates a new files_to_iov dictionary by adding files
16 until the maximum numbe of files per run is reached. After this no more files
19 It makes the assumption that the IoV is a single run, and that the exp_low and run_low of the IoV object
20 can be used to create the ExpRun fr comparison of whether to add a new input file.
23 files_to_iov (dict): The standard dictionary you might as input to a Calibration. It is of the form
25 >>> files_to_iov = {"file_path.root": IoV(1,1,1,1),}
27 max_files_per_run (int): The maximum number of files that we will add to the output dictionary for each run in the
30 min_events_per_file (int): The minimum number of events that is allowed to be in any included file's tree.
33 dict: The same style of dict as the input file_to_iov, but filtered down.
35 B2INFO(f
"Beginning filtering process to only choose {max_files_per_run} file(s) per run.")
36 if min_events_per_file:
37 B2INFO(f
"We also require that each file must have at least {min_events_per_file} events in the tree.")
40 run_to_files = defaultdict(list)
41 for input_file, file_iov
in files_to_iov.items():
42 run = ExpRun(exp=file_iov.exp_low, run=file_iov.run_low)
43 run_files = run_to_files.get(run,
None)
44 if not run_files
or len(run_files) < max_files_per_run:
45 if not min_events_per_file
or (min_events_per_file
and events_in_basf2_file(input_file) >= min_events_per_file):
46 B2INFO(f
"Choosing input file for {run}: {input_file}")
47 run_to_files[run].append(input_file)
53 new_files_to_iov = OrderedDict()
54 for run, run_files
in run_to_files.items():
55 for file_path
in run_files:
57 new_files_to_iov[file_path] = IoV(*run, *run)
58 return new_files_to_iov
61 def group_files_by_iov(files_to_iov):
63 Inverts the files_to_iov dictionary to give back a dictionary of IoV -> File list
66 files_to_iov (dict): {"/path/to/file1.root": IoV(1,1,1,1), "/path/to/file2.root": IoV(1,1,1,1)}
69 dict: {IoV(1,1,1,1): ["/path/to/file1.root", "/path/to/file2.root"]}
71 iov_to_files = OrderedDict()
72 for iov, g
in groupby(files_to_iov.items(),
lambda g: g[1]):
73 files = [f[0]
for f
in g]
74 iov_to_files[iov] = files
78 def filter_by_max_events_per_run(files_to_iov, max_events_per_run, random_select=False):
80 This function creates a new files_to_iov dictionary by appending files
81 in order until the maximum number of events are reached per run.
84 files_to_iov (dict): {"/path/to/file.root": IoV(1,1,1,1)} type dictionary. Same style as used by the CAF
86 max_events_per_run (int): The threshold we want to reach but stop adding files if we reach it.
87 random_select (bool): true will select random nfile and false will take first nfile.
90 dict: The same style of dict as the input files_to_iov, but filtered down.
94 iov_to_files = group_files_by_iov(files_to_iov)
96 new_iov_to_files = OrderedDict()
98 for iov, files
in sorted(iov_to_files.items()):
99 run = ExpRun(iov.exp_low, iov.run_low)
101 remaining_files = files[:]
103 while total < max_events_per_run
and remaining_files:
105 file_path = choice(remaining_files)
106 remaining_files.remove(file_path)
108 file_path = remaining_files.pop(0)
109 events = events_in_basf2_file(file_path)
112 B2INFO(f
"No events in {file_path}, skipping...")
115 chosen_files.append(file_path)
116 B2INFO(f
"Choosing input file for {run}: {file_path} and total events so far {total}")
120 new_iov_to_files[iov] = chosen_files
122 B2INFO(f
"No files chosen for {run}")
125 new_files_to_iov = OrderedDict()
126 for iov, files
in new_iov_to_files.items():
128 new_files_to_iov[path] = iov
129 return new_files_to_iov
132 def filter_by_select_max_events_from_files(input_file_list, select_max_events_from_files):
134 This function creates a new list by appending random files until
135 the maximum number of events are reached per data set.
138 input_file_list (list): ["/path/to/file2.root", "/path/to/file2.root"]
139 select_max_events_from_files (int): The threshold we want to reach but stop adding files if we reach it.
142 list: The sorted list of random files or empty list of not enought found
147 while total < select_max_events_from_files:
149 if not input_file_list:
152 file_path = choice(input_file_list)
153 input_file_list.remove(file_path)
155 events = events_in_basf2_file(file_path)
158 B2INFO(f
"No events in {file_path}, skipping...")
162 selected_file.append(file_path)
163 B2INFO(f
"Choosing random input file: {file_path} and total events so far {total}")
166 if total < select_max_events_from_files:
167 B2INFO(f
"total events {total} are less than requested {select_max_events_from_files}")
170 return sorted(selected_file)
173 def events_in_basf2_file(file_path):
174 """Does a quick open and return of the number of entries in a basf2 file's tree object.
177 file_path (str): File path to ROOT file
180 int: Number of entries in tree.
182 f = ROOT.TFile.Open(file_path,
"READ")
183 events = f.tree.GetEntries()