Belle II Software light-2406-ragdoll
01_PrepPIDTrainingSample.py
1#!/usr/bin/env python3
2
3
10
11
18
19import basf2 as b2
20import uproot
21import pidDataUtils as pdu
22import subprocess
23from os import makedirs
24
25# Read data into DataFrame
26filename = b2.find_file('mc_dstar.root', 'examples', False)
27df = uproot.open(filename)['my_ttree'].arrays(library="pd")
28print("ROOT file read into DataFrame.")
29
30# Make slim h5 files for each particle type and merge into one large file
31makedirs('data', exist_ok=True)
32pdu.make_h5(df, ['DST_D0_pi', 'DST_pi'], 'data/slim_dstar_pion.h5', pdg=211)
33print("Slim h5 file made at data/slim_dstar_pion.h5")
34
35pdu.make_h5(df, ['DST_D0_K'], 'data/slim_dstar_kaon.h5', pdg=321)
36print("Slim h5 file made at data/slim_dstar_kaon.h5")
37
38pdu.merge_h5s(['data/slim_dstar_pion.h5', 'data/slim_dstar_kaon.h5'], 'data/slim_dstar.h5')
39print("H5 files merged, written out to data/slim_dstar.h5")
40
41# Split into train/val/test sets for training
42pdu.split_h5('data/slim_dstar.h5', 'data/slim_dstar')
43print("Data in data/slim_dstar.h5 split into train/val/test files in directory: data/slim_dstar")
44
45# Now you can train weights using the pidTrainWeights script, e.g.
46# $ python3 path/to/pidTrainWeights.py ./data/slim_dstar ./models/net.pt -n 100
47script_path = b2.find_file('analysis/scripts/pidTrainWeights.py')
48subprocess.run(f"python3 {script_path} ./data/slim_dstar ./models/net.pt -n 100 --only 211 321", shell=True)