Belle II Software development
test_multiple_python.py
1#!/usr/bin/env python3
2
3from textwrap import dedent
4import unittest
5
6import pandas as pd
7import numpy as np
8import uproot
9
10import basf2_mva
11import basf2_mva_util
12
13
14def write_dummy_file(variables, size=10, target_variable="target"):
15 # Set a random seed and generate
16 rng = np.random.default_rng(42)
17 data = rng.normal(size=[size, len(variables) + 1])
18 tree = {}
19 for i, name in enumerate(variables):
20 tree[name] = data[:, i]
21 tree[target_variable] = data[:, -1] > 0.5
22 with uproot.recreate('dummy.root') as outfile:
23 outfile['tree'] = pd.DataFrame(tree, dtype=np.float64)
24
25
26def train_bdt():
27 with open("bdt.py", "w") as f:
28 f.write(
29 dedent(
30 """
31 def get_model(number_of_features, number_of_spectators, number_of_events, training_fraction, parameters):
32 from sklearn.ensemble import GradientBoostingClassifier
33 clf = GradientBoostingClassifier()
34 return State(clf)
35
36 def apply(state, X):
37 "redefine this on purpose to ensure we spot if it gets overwritten"
38 from sklearn.ensemble import GradientBoostingClassifier
39 if not isinstance(state.estimator, GradientBoostingClassifier):
40 raise TypeError(f"Wrong classifier, expected GradientBoostingClassifier, got {type(state.estimator)}")
41 assert isinstance(state.estimator, GradientBoostingClassifier), "Wrong classifier"
42 x = state.estimator.predict_proba(X)
43 return np.require(x, dtype=np.float32, requirements=['A', 'W', 'C', 'O'])
44 """
45 )
46 )
47 variables = ["var1", "var2"]
48 general_options = basf2_mva.GeneralOptions()
49 general_options.m_datafiles = basf2_mva.vector("dummy.root")
50 general_options.m_treename = "tree"
51 general_options.m_identifier = "SKLearn-BDT.xml"
52 general_options.m_variables = basf2_mva.vector(*variables)
53 general_options.m_target_variable = "target"
54 sklearn_nn_options = basf2_mva.PythonOptions()
55 sklearn_nn_options.m_framework = "sklearn"
56 sklearn_nn_options.m_steering_file = "bdt.py"
57 basf2_mva.teacher(general_options, sklearn_nn_options)
58
59
60def train_mlp():
61 with open("mlp.py", "w") as f:
62 f.write(
63 dedent(
64 """
65 def get_model(number_of_features, number_of_spectators, number_of_events, training_fraction, parameters):
66 from sklearn.neural_network import MLPClassifier
67 clf = MLPClassifier()
68 return State(clf)
69
70 def end_fit(state):
71 state.estimator = state.estimator.fit(np.vstack(state.X), np.hstack(state.y))
72 return state.estimator
73
74 def apply(state, X):
75 "redefine this on purpose to ensure we spot if it gets overwritten"
76 from sklearn.neural_network import MLPClassifier
77 if not isinstance(state.estimator, MLPClassifier):
78 raise TypeError(f"Wrong classifier, expected MLPClassifier, got {type(state.estimator)}")
79 x = state.estimator.predict_proba(X)
80 return np.require(x, dtype=np.float32, requirements=['A', 'W', 'C', 'O'])
81 """
82 )
83 )
84 variables = ["var1", "var2"]
85 general_options = basf2_mva.GeneralOptions()
86 general_options.m_datafiles = basf2_mva.vector("dummy.root")
87 general_options.m_treename = "tree"
88 general_options.m_identifier = "SKLearn-MLP.xml"
89 general_options.m_variables = basf2_mva.vector(*variables)
90 general_options.m_target_variable = "target"
91 sklearn_nn_options = basf2_mva.PythonOptions()
92 sklearn_nn_options.m_framework = "sklearn"
93 sklearn_nn_options.m_steering_file = "mlp.py"
94 basf2_mva.teacher(general_options, sklearn_nn_options)
95
96
97def get_expert(identifier):
98 import ROOT
99
100 method = basf2_mva_util.Method(identifier)
101 ROOT.Belle2.MVA.AbstractInterface.initSupportedInterfaces()
102 interfaces = ROOT.Belle2.MVA.AbstractInterface.getSupportedInterfaces()
103 interface = interfaces["Python"]
104 expert = interface.getExpert()
105 expert.load(method.weightfile)
106 return expert
107
108
109class TestMultiplePython(unittest.TestCase):
110 """
111 Test if we can run multiple python methods for the same framework at the same time (MR !4244)
112 """
113
115 """
116 Try if we can run a sklearn bdt and a sklear mlp at the same time
117 """
118 import ROOT
119 from ROOT import vector
120
121 variables = ["var1", "var2"]
122 write_dummy_file(variables)
123 train_bdt()
124 train_mlp()
125
126 # test by explicitly initializing two experts to provoke clashes
127 bdt_expert = get_expert("SKLearn-BDT.xml")
128 mlp_expert = get_expert("SKLearn-MLP.xml")
129
130 inputs = vector[vector[float]]([vector[float]([1, 2])])
131 spectators = vector[vector[float]]()
132
133 general_options = basf2_mva.GeneralOptions()
134 general_options.m_variables = basf2_mva.vector(*variables)
135 dataset = ROOT.Belle2.MVA.MultiDataset(general_options, inputs, spectators)
136
137 bdt_expert.apply(dataset)
138 mlp_expert.apply(dataset)
139
140
141if __name__ == '__main__':
142 import b2test_utils
144 unittest.main()
clean_working_directory()
Definition __init__.py:198