4 This module contains various utility functions for the prompt calibration CAF scripts to use.
6 from basf2
import B2INFO
7 from collections
import defaultdict, OrderedDict
8 from itertools
import groupby
10 from caf.utils
import ExpRun, IoV
11 from random
import choice, shuffle
14 def filter_by_max_files_per_run(files_to_iov, max_files_per_run=1, min_events_per_file=0, random_select=False):
15 """This function creates a new files_to_iov dictionary by adding files
16 until the maximum number of files per run is reached. After this no more files
19 It makes the assumption that the IoV is a single run, and that the exp_low and run_low of the IoV object
20 can be used to create the ExpRun fr comparison of whether to add a new input file.
23 files_to_iov (dict): The standard dictionary you might as input to a Calibration. It is of the form
25 >>> files_to_iov = {"file_path.root": IoV(1,1,1,1),}
27 max_files_per_run (int): The maximum number of files that we will add to the output dictionary for each run in the
30 min_events_per_file (int): The minimum number of events that is allowed to be in any included file's tree.
31 random_select (bool): true will select random nfile and false will take first nfile.
34 dict: The same style of dict as the input file_to_iov, but filtered down.
36 B2INFO(f
"Beginning filtering process to only choose {max_files_per_run} file(s) per run.")
37 if min_events_per_file:
38 B2INFO(f
"We also require that each file must have at least {min_events_per_file} events in the tree.")
42 files_to_iov_list = list(files_to_iov.items())
43 shuffle(files_to_iov_list)
44 files_to_iov = type(files_to_iov)(files_to_iov_list)
47 run_to_files = defaultdict(list)
48 for input_file, file_iov
in files_to_iov.items():
49 run = ExpRun(exp=file_iov.exp_low, run=file_iov.run_low)
50 run_files = run_to_files.get(run,
None)
51 if not run_files
or len(run_files) < max_files_per_run:
52 if not min_events_per_file
or (min_events_per_file
and events_in_basf2_file(input_file) >= min_events_per_file):
53 B2INFO(f
"Choosing input file for {run}: {input_file}")
54 run_to_files[run].append(input_file)
60 new_files_to_iov = OrderedDict()
61 for run, run_files
in run_to_files.items():
62 for file_path
in run_files:
64 new_files_to_iov[file_path] = IoV(*run, *run)
65 return new_files_to_iov
68 def group_files_by_iov(files_to_iov):
70 Inverts the files_to_iov dictionary to give back a dictionary of IoV -> File list
73 files_to_iov (dict): {"/path/to/file1.root": IoV(1,1,1,1), "/path/to/file2.root": IoV(1,1,1,1)}
76 dict: {IoV(1,1,1,1): ["/path/to/file1.root", "/path/to/file2.root"]}
78 iov_to_files = OrderedDict()
79 for iov, g
in groupby(files_to_iov.items(),
lambda g: g[1]):
80 files = [f[0]
for f
in g]
81 iov_to_files[iov] = files
85 def filter_by_max_events_per_run(files_to_iov, max_events_per_run, random_select=False):
87 This function creates a new files_to_iov dictionary by appending files
88 in order until the maximum number of events are reached per run.
91 files_to_iov (dict): {"/path/to/file.root": IoV(1,1,1,1)} type dictionary. Same style as used by the CAF
93 max_events_per_run (int): The threshold we want to reach but stop adding files if we reach it.
94 random_select (bool): true will select random nfile and false will take first nfile.
97 dict: The same style of dict as the input files_to_iov, but filtered down.
101 iov_to_files = group_files_by_iov(files_to_iov)
103 new_iov_to_files = OrderedDict()
105 for iov, files
in sorted(iov_to_files.items()):
106 run = ExpRun(iov.exp_low, iov.run_low)
108 remaining_files = files[:]
110 while total < max_events_per_run
and remaining_files:
112 file_path = choice(remaining_files)
113 remaining_files.remove(file_path)
115 file_path = remaining_files.pop(0)
116 events = events_in_basf2_file(file_path)
119 B2INFO(f
"No events in {file_path}, skipping...")
122 chosen_files.append(file_path)
123 B2INFO(f
"Choosing input file for {run}: {file_path} and total events so far {total}")
127 new_iov_to_files[iov] = chosen_files
129 B2INFO(f
"No files chosen for {run}")
132 new_files_to_iov = OrderedDict()
133 for iov, files
in new_iov_to_files.items():
135 new_files_to_iov[path] = iov
136 return new_files_to_iov
139 def filter_by_select_max_events_from_files(input_file_list, select_max_events_from_files):
141 This function creates a new list by appending random files until
142 the maximum number of events are reached per data set.
145 input_file_list (list): ["/path/to/file2.root", "/path/to/file2.root"]
146 select_max_events_from_files (int): The threshold we want to reach but stop adding files if we reach it.
149 list: The sorted list of random files or empty list of not enought found
154 while total < select_max_events_from_files:
156 if not input_file_list:
159 file_path = choice(input_file_list)
160 input_file_list.remove(file_path)
162 events = events_in_basf2_file(file_path)
165 B2INFO(f
"No events in {file_path}, skipping...")
169 selected_file.append(file_path)
170 B2INFO(f
"Choosing random input file: {file_path} and total events so far {total}")
173 if total < select_max_events_from_files:
174 B2INFO(f
"total events {total} are less than requested {select_max_events_from_files}")
177 return sorted(selected_file)
180 def events_in_basf2_file(file_path):
181 """Does a quick open and return of the number of entries in a basf2 file's tree object.
184 file_path (str): File path to ROOT file
187 int: Number of entries in tree.
189 f = ROOT.TFile.Open(file_path,
"READ")
190 events = f.tree.GetEntries()