Belle II Software  light-2205-abys
regression_in_python.py
1 #!/usr/bin/env python3
2 
3 
10 
11 import basf2_mva
12 import pandas as pd
13 from root_pandas import to_root
14 import uproot
15 import numpy as np
16 from matplotlib import pyplot as plt
17 
18 
19 def train_mva_method(file_name):
20 
21  weight_file = "weightfile.root"
22 
23  general_options = basf2_mva.GeneralOptions()
24  general_options.m_datafiles = basf2_mva.vector(file_name)
25  general_options.m_treename = "tree"
26  general_options.m_identifier = weight_file
27  general_options.m_variables = basf2_mva.vector('A', 'B')
28  general_options.m_target_variable = 'C'
29 
30  regression_fastbdt_options = basf2_mva.RegressionFastBDTOptions()
31  # You can set the regression specific settings here
32  regression_fastbdt_options.setMaximalBinNumber(20)
33  # or the options specific to the base classifier
34  # base_options = regression_fastbdt_options.getBaseClassifierOptions()
35  # ...
36 
37  basf2_mva.teacher(general_options, regression_fastbdt_options)
38 
39  return weight_file
40 
41 
42 def create_random_data():
43  file_name = "data.root"
44 
45  # We generate a very simple example dataset: A and B are random and the target C is the average of A and B
46  df = pd.DataFrame({"A": np.random.rand(1000), "B": np.random.rand(1000)})
47  df["C"] = (df.A + df.B) / 2
48 
49  to_root(df, file_name, store_index=False, key="tree")
50  return file_name
51 
52 
53 def apply_expert(file_name, weight_file):
54  output_file = 'expert.root'
55  basf2_mva.expert(basf2_mva.vector(weight_file), basf2_mva.vector(file_name), 'tree', output_file)
56  return output_file
57 
58 
59 def create_plot(expert_file):
60  df = uproot.open(expert_file)["variables"].arrays(library="pd")
61  df.plot.scatter("weightfile__ptroot_C", "weightfile__ptroot", ax=plt.gca())
62  plt.xlabel("Correct")
63  plt.ylabel("Output")
64  plt.savefig("result.pdf")
65 
66 
67 if __name__ == "__main__":
68  from basf2 import conditions
69  # NOTE: do not use testing payloads in production! Any results obtained like this WILL NOT BE PUBLISHED
70  conditions.testing_payloads = [
71  'localdb/database.txt'
72  ]
73 
74  # Lets create some random data
75  file_name = create_random_data()
76 
77  # Train a regression MVA method
78  weight_file = train_mva_method(file_name)
79 
80  # Apply the trained methods on data
81  expert_file = apply_expert(file_name, weight_file)
82 
83  # And generate an example plot
84  create_plot(expert_file)