11This module contains various utility functions for the prompt calibration CAF scripts to use. 
   13from basf2 
import B2INFO
 
   14from collections 
import defaultdict, OrderedDict
 
   15from itertools 
import groupby
 
   17from caf.utils 
import ExpRun, IoV
 
   18from random 
import choice, shuffle
 
   21def filter_by_max_files_per_run(files_to_iov, max_files_per_run=1, min_events_per_file=0, random_select=False):
 
   22    """This function creates a new files_to_iov dictionary by adding files 
   23    until the maximum number of files per run is reached. After this no more files
 
   26    It makes the assumption that the IoV 
is a single run, 
and that the exp_low 
and run_low of the IoV object
 
   27    can be used to create the ExpRun fr comparison of whether to add a new input file.
 
   30        files_to_iov (dict): The standard dictionary you might 
as input to a Calibration. It 
is of the form
 
   32            >>> files_to_iov = {
"file_path.root": IoV(1,1,1,1),}
 
   34        max_files_per_run (int): The maximum number of files that we will add to the output dictionary 
for each run 
in the
 
   37        min_events_per_file (int): The minimum number of events that 
is allowed to be 
in any included file
's tree.        random_select (bool): true will select random nfile and false will take first nfile.
 
   40        dict: The same style of dict 
as the input file_to_iov, but filtered down.
 
   42    B2INFO(f"Beginning filtering process to only choose {max_files_per_run} file(s) per run.")
 
   43    if min_events_per_file:
 
   44        B2INFO(f
"We also require that each file must have at least {min_events_per_file} events in the tree.")
 
   48        files_to_iov_list = list(files_to_iov.items())
 
   49        shuffle(files_to_iov_list)
 
   50        files_to_iov = type(files_to_iov)(files_to_iov_list)
 
   53    run_to_files = defaultdict(list)
 
   54    for input_file, file_iov 
in files_to_iov.items():
 
   55        run = ExpRun(exp=file_iov.exp_low, run=file_iov.run_low)
 
   56        run_files = run_to_files.get(run, 
None)
 
   57        if not run_files 
or len(run_files) < max_files_per_run:
 
   58            if not min_events_per_file 
or (min_events_per_file 
and events_in_basf2_file(input_file) >= min_events_per_file):
 
   59                B2INFO(f
"Choosing input file for {run}: {input_file}")
 
   60                run_to_files[run].append(input_file)
 
   66    new_files_to_iov = OrderedDict()
 
   67    for run, run_files 
in run_to_files.items():
 
   68        for file_path 
in run_files:
 
   70            new_files_to_iov[file_path] = IoV(*run, *run)
 
   71    return new_files_to_iov
 
   74def group_files_by_iov(files_to_iov):
 
   76    Inverts the files_to_iov dictionary to give back a dictionary of IoV -> File list 
   79        files_to_iov (dict): {"/path/to/file1.root": IoV(1,1,1,1), 
"/path/to/file2.root": IoV(1,1,1,1)}
 
   82        dict: {IoV(1,1,1,1): [
"/path/to/file1.root", 
"/path/to/file2.root"]}
 
   84    iov_to_files = OrderedDict() 
   85    for iov, g 
in groupby(files_to_iov.items(), 
lambda g: g[1]):
 
   86        files = [f[0] 
for f 
in g]
 
   87        iov_to_files[iov] = files
 
   91def filter_by_max_events_per_run(files_to_iov, max_events_per_run, random_select=False, max_events_per_file=0):
 
   93    This function creates a new files_to_iov dictionary by appending files 
   94    in order until the maximum number of events are reached per run.
 
   95    Each file contributes a maximum of events specified by 
"max_events_per_file".
 
   98        files_to_iov (dict): {
"/path/to/file.root": IoV(1,1,1,1)} type dictionary. Same style 
as used by the CAF
 
  100        max_events_per_run (int): The threshold we want to reach but stop adding files 
if we reach it.
 
  101        random_select (bool): true will select random nfile 
and false will take first nfile.
 
  102        max_events_per_file (int): true will limit the contribution 
from each file to max events specified.
 
  105        dict: The same style of dict 
as the input files_to_iov, but filtered down.
 
  109    iov_to_files = group_files_by_iov(files_to_iov)
 
  111    new_iov_to_files = OrderedDict()
 
  113    for iov, files 
in sorted(iov_to_files.items()):
 
  114        run = ExpRun(iov.exp_low, iov.run_low)
 
  116        remaining_files = files[:]
 
  118        while total < max_events_per_run 
and remaining_files:
 
  120                file_path = choice(remaining_files)
 
  121                remaining_files.remove(file_path)
 
  123                file_path = remaining_files.pop(0)
 
  124            events = events_in_basf2_file(file_path)
 
  127                B2INFO(f
"No events in {file_path}, skipping...")
 
  129            total += events 
if max_events_per_file <= 0 
or events <= max_events_per_file 
else max_events_per_file
 
  130            chosen_files.append(file_path)
 
  131            B2INFO(f
"Choosing input file for {run}: {file_path} and total events so far {total}")
 
  135            new_iov_to_files[iov] = chosen_files
 
  137            B2INFO(f
"No files chosen for {run}")
 
  140    new_files_to_iov = OrderedDict()
 
  141    for iov, files 
in new_iov_to_files.items():
 
  143            new_files_to_iov[path] = iov
 
  144    return new_files_to_iov
 
  147def filter_by_select_max_events_from_files(input_file_list, select_max_events_from_files):
 
  149    This function creates a new list by appending random files until 
  150    the maximum number of events are reached per data set. 
  153        input_file_list (list): ["/path/to/file2.root", 
"/path/to/file2.root"]
 
  154        select_max_events_from_files (int): The threshold we want to reach but stop adding files 
if we reach it.
 
  157        list: The sorted list of random files 
or empty list of 
not enought found
 
  162    while total < select_max_events_from_files:
 
  164        if not input_file_list:
 
  167        file_path = choice(input_file_list)
 
  168        input_file_list.remove(file_path)
 
  170        events = events_in_basf2_file(file_path)
 
  173            B2INFO(f
"No events in {file_path}, skipping...")
 
  177        selected_file.append(file_path)
 
  178        B2INFO(f
"Choosing random input file: {file_path} and total events so far {total}")
 
  181    if total < select_max_events_from_files:
 
  182        B2INFO(f
"total events {total} are less than requested {select_max_events_from_files}")
 
  185    return sorted(selected_file)
 
  188def events_in_basf2_file(file_path):
 
  189    """Does a quick open and return of the number of entries in a basf2 file's tree object. 
  192        file_path (str): File path to ROOT file 
  195        int: Number of entries in tree.
 
  197    f = ROOT.TFile.Open(file_path, "READ")
 
  198    events = f.tree.GetEntries()