14def populate_avail_samples(X, Y, B_reco=0):
16 Shifts through the file metadata to populate a list of available dataset samples.
19 X (list): List of ROOT lazyarray dicts for X (input) data.
20 Y (list): List of ROOT lazyarray dicts
for Y (ground truth) data.
21 B_reco (int): Reconstruction mode flag (set automatically):
24 \\text{Upsilon} (4S) = 0,\\ B^0 = 1,\\ B^+ = 2.
27 list: List of available samples
for training.
33 for i, f
in enumerate(Y):
35 events = X[i][
"event"]
38 for evt_idx, _
in enumerate(events):
39 b_indices = [1]
if not B_reco
else [1, 2]
41 for b_index
in b_indices:
43 lca_rows = f[b_index][
"n_LCA"][evt_idx]
50 evt_b_index = x_attrs[
"b_index"][evt_idx]
51 evt_primary = x_attrs[
"primary"][evt_idx]
54 matched = (evt_b_index != -1)
if not B_reco
else (evt_b_index == int(b_index))
57 if matched.sum() == 0:
61 if np.sum(np.logical_and(matched, evt_primary)) < 2:
65 avail_samples.append((i, evt_idx, b_index))
70def preload_root_data(root_files, features, discarded):
72 Load all data from root files
as lazyarrays (
not actually read
from disk until accessed).
75 root_files (str): Path to ROOT files.
76 features (list): List of feature names.
77 discarded (list): List of features present
in the ROOT files
and not used
as input,
78 but used to calculate other quantities (e.g. edge features).
81 list, list: Lists of dictionaries containing training information
for input
and ground-truth.
87 with uproot.open(f)[
"Tree"]
as tree:
89 event = tree[
"event"].array(library=
"np")
92 x_dict[
"event"] = event
93 x_dict[
"features"] = {
94 feat: tree[feat].array(library=
"np")
for feat
in features
96 x_dict[
"discarded"] = {
97 feat: tree[feat].array(library=
"np")
for feat
in discarded
101 x_dict[
"leaves"] = tree[
"leaves"].array(library=
"np")
102 x_dict[
"primary"] = tree[
"primary"].array(library=
"np")
103 x_dict[
"b_index"] = tree[
"b_index"].array(library=
"np")
104 x_dict[
"mc_pdg"] = tree[
"mcPDG"].array(library=
"np")
106 y_dict = {1: {}, 2: {}}
110 y_dict[i][
"n_LCA"] = tree[f
"n_LCA_leaves_{i}"].array(library=
"np")
111 y_dict[i][
"LCA"] = tree[f
"LCAS_{i}"].array(library=
"np")
112 y_dict[i][
"LCA_leaves"] = tree[f
"LCA_leaves_{i}"].array(library=
"np")