14def populate_avail_samples(X, Y, B_reco=0):
 
   16    Shifts through the file metadata to populate a list of available dataset samples. 
   19        X (list): List of ROOT lazyarray dicts for X (input) data. 
   20        Y (list): List of ROOT lazyarray dicts for Y (ground truth) data. 
   21        B_reco (int): Reconstruction mode flag (set automatically): 
   24               \\text{Upsilon} (4S) = 0,\\ B^0 = 1,\\ B^+ = 2. 
   27        list: List of available samples for training. 
   33    for i, f 
in enumerate(Y):
 
   35        events = X[i][
"event"]
 
   38        for evt_idx, _ 
in enumerate(events):
 
   39            b_indices = [1] 
if not B_reco 
else [1, 2]
 
   41            for b_index 
in b_indices:
 
   43                lca_rows = f[b_index][
"n_LCA"][evt_idx]
 
   50                evt_b_index = x_attrs[
"b_index"][evt_idx]
 
   51                evt_primary = x_attrs[
"primary"][evt_idx]
 
   54                matched = (evt_b_index != -1) 
if not B_reco 
else (evt_b_index == int(b_index))
 
   57                if matched.sum() == 0:
 
   61                if np.sum(np.logical_and(matched, evt_primary)) < 2:
 
   65                avail_samples.append((i, evt_idx, b_index))
 
   70def preload_root_data(root_files, features, discarded):
 
   72    Load all data from root files as lazyarrays (not actually read from disk until accessed). 
   75        root_files (str): Path to ROOT files. 
   76        features (list): List of feature names. 
   77        discarded (list): List of features present in the ROOT files and not used as input, 
   78            but used to calculate other quantities (e.g. edge features). 
   81        list, list: Lists of dictionaries containing training information for input and ground-truth. 
   87        with uproot.open(f)[
"Tree"] 
as tree:
 
   89            event = tree[
"event"].array(library=
"np")
 
   92            x_dict[
"event"] = event
 
   93            x_dict[
"features"] = {
 
   94                feat: tree[feat].array(library=
"np") 
for feat 
in features
 
   96            x_dict[
"discarded"] = {
 
   97                feat: tree[feat].array(library=
"np") 
for feat 
in discarded
 
  101            x_dict[
"leaves"] = tree[
"leaves"].array(library=
"np")
 
  102            x_dict[
"primary"] = tree[
"primary"].array(library=
"np")
 
  103            x_dict[
"b_index"] = tree[
"b_index"].array(library=
"np")
 
  104            x_dict[
"mc_pdg"] = tree[
"mcPDG"].array(library=
"np")
 
  106            y_dict = {1: {}, 2: {}}
 
  110                y_dict[i][
"n_LCA"] = tree[f
"n_LCA_leaves_{i}"].array(library=
"np")
 
  111                y_dict[i][
"LCA"] = tree[f
"LCAS_{i}"].array(library=
"np")
 
  112                y_dict[i][
"LCA_leaves"] = tree[f
"LCA_leaves_{i}"].array(library=
"np")