Belle II Software  release-08-01-10
01_PrepPIDTrainingSample.py
1 #!/usr/bin/env python3
2 
3 
10 
11 
18 
19 import basf2 as b2
20 import uproot
21 import pidDataUtils as pdu
22 import subprocess
23 from os import makedirs
24 
25 # Read data into DataFrame
26 filename = b2.find_file('mc_dstar.root', 'examples', False)
27 df = uproot.open(filename)['my_ttree'].arrays(library="pd")
28 print("ROOT file read into DataFrame.")
29 
30 # Make slim h5 files for each particle type and merge into one large file
31 makedirs('data', exist_ok=True)
32 pdu.make_h5(df, ['DST_D0_pi', 'DST_pi'], 'data/slim_dstar_pion.h5', pdg=211)
33 print("Slim h5 file made at data/slim_dstar_pion.h5")
34 
35 pdu.make_h5(df, ['DST_D0_K'], 'data/slim_dstar_kaon.h5', pdg=321)
36 print("Slim h5 file made at data/slim_dstar_kaon.h5")
37 
38 pdu.merge_h5s(['data/slim_dstar_pion.h5', 'data/slim_dstar_kaon.h5'], 'data/slim_dstar.h5')
39 print("H5 files merged, written out to data/slim_dstar.h5")
40 
41 # Split into train/val/test sets for training
42 pdu.split_h5('data/slim_dstar.h5', 'data/slim_dstar')
43 print("Data in data/slim_dstar.h5 split into train/val/test files in directory: data/slim_dstar")
44 
45 # Now you can train weights using the pidTrainWeights script, e.g.
46 # $ python3 path/to/pidTrainWeights.py ./data/slim_dstar ./models/net.pt -n 100
47 script_path = b2.find_file('analysis/scripts/pidTrainWeights.py')
48 subprocess.run(f"python3 {script_path} ./data/slim_dstar ./models/net.pt -n 100 --only 211 321", shell=True)