Belle II Software prerelease-10-00-00a
test_multiple_python.py
1#!/usr/bin/env python3
2
3from textwrap import dedent
4import unittest
5
6import pandas as pd
7import numpy as np
8import uproot
9
10import basf2_mva
11import basf2_mva_util
12
13
14def write_dummy_file(variables, size=10, target_variable="target"):
15 data = np.random.normal(size=[size, len(variables) + 1])
16 tree = {}
17 for i, name in enumerate(variables):
18 tree[name] = data[:, i]
19 tree[target_variable] = data[:, -1] > 0.5
20 with uproot.recreate('dummy.root') as outfile:
21 outfile['tree'] = pd.DataFrame(tree, dtype=np.float64)
22
23
24def train_bdt():
25 with open("bdt.py", "w") as f:
26 f.write(
27 dedent(
28 """
29 def get_model(number_of_features, number_of_spectators, number_of_events, training_fraction, parameters):
30 from sklearn.ensemble import GradientBoostingClassifier
31 clf = GradientBoostingClassifier()
32 return State(clf)
33
34 def apply(state, X):
35 "redefine this on purpose to ensure we spot if it gets overwritten"
36 from sklearn.ensemble import GradientBoostingClassifier
37 if not isinstance(state.estimator, GradientBoostingClassifier):
38 raise TypeError(f"Wrong classifier, expected GradientBoostingClassifier, got {type(state.estimator)}")
39 assert isinstance(state.estimator, GradientBoostingClassifier), "Wrong classifier"
40 x = state.estimator.predict_proba(X)
41 return np.require(x, dtype=np.float32, requirements=['A', 'W', 'C', 'O'])
42 """
43 )
44 )
45 variables = ["var1", "var2"]
46 general_options = basf2_mva.GeneralOptions()
47 general_options.m_datafiles = basf2_mva.vector("dummy.root")
48 general_options.m_treename = "tree"
49 general_options.m_identifier = "SKLearn-BDT.xml"
50 general_options.m_variables = basf2_mva.vector(*variables)
51 general_options.m_target_variable = "target"
52 sklearn_nn_options = basf2_mva.PythonOptions()
53 sklearn_nn_options.m_framework = "sklearn"
54 sklearn_nn_options.m_steering_file = "bdt.py"
55 basf2_mva.teacher(general_options, sklearn_nn_options)
56
57
58def train_mlp():
59 with open("mlp.py", "w") as f:
60 f.write(
61 dedent(
62 """
63 def get_model(number_of_features, number_of_spectators, number_of_events, training_fraction, parameters):
64 from sklearn.neural_network import MLPClassifier
65 clf = MLPClassifier()
66 return State(clf)
67
68 def end_fit(state):
69 state.estimator = state.estimator.fit(np.vstack(state.X), np.hstack(state.y))
70 return state.estimator
71
72 def apply(state, X):
73 "redefine this on purpose to ensure we spot if it gets overwritten"
74 from sklearn.neural_network import MLPClassifier
75 if not isinstance(state.estimator, MLPClassifier):
76 raise TypeError(f"Wrong classifier, expected MLPClassifier, got {type(state.estimator)}")
77 x = state.estimator.predict_proba(X)
78 return np.require(x, dtype=np.float32, requirements=['A', 'W', 'C', 'O'])
79 """
80 )
81 )
82 variables = ["var1", "var2"]
83 general_options = basf2_mva.GeneralOptions()
84 general_options.m_datafiles = basf2_mva.vector("dummy.root")
85 general_options.m_treename = "tree"
86 general_options.m_identifier = "SKLearn-MLP.xml"
87 general_options.m_variables = basf2_mva.vector(*variables)
88 general_options.m_target_variable = "target"
89 sklearn_nn_options = basf2_mva.PythonOptions()
90 sklearn_nn_options.m_framework = "sklearn"
91 sklearn_nn_options.m_steering_file = "mlp.py"
92 basf2_mva.teacher(general_options, sklearn_nn_options)
93
94
95def get_expert(identifier):
96 import ROOT
97
98 method = basf2_mva_util.Method(identifier)
99 ROOT.Belle2.MVA.AbstractInterface.initSupportedInterfaces()
100 interfaces = ROOT.Belle2.MVA.AbstractInterface.getSupportedInterfaces()
101 interface = interfaces["Python"]
102 expert = interface.getExpert()
103 expert.load(method.weightfile)
104 return expert
105
106
107class TestMultiplePython(unittest.TestCase):
108 """
109 Test if we can run multiple python methods for the same framework at the same time (MR !4244)
110 """
111
113 """
114 Try if we can run a sklearn bdt and a sklear mlp at the same time
115 """
116 import ROOT
117 from ROOT import vector
118
119 variables = ["var1", "var2"]
120 write_dummy_file(variables)
121 train_bdt()
122 train_mlp()
123
124 # test by explicitly initializing two experts to provoke clashes
125 bdt_expert = get_expert("SKLearn-BDT.xml")
126 mlp_expert = get_expert("SKLearn-MLP.xml")
127
128 inputs = vector[vector[float]]([vector[float]([1, 2])])
129 spectators = vector[vector[float]]()
130
131 general_options = basf2_mva.GeneralOptions()
132 general_options.m_variables = basf2_mva.vector(*variables)
133 dataset = ROOT.Belle2.MVA.MultiDataset(general_options, inputs, spectators)
134
135 bdt_expert.apply(dataset)
136 mlp_expert.apply(dataset)
137
138
139if __name__ == '__main__':
140 import b2test_utils
142 unittest.main()
clean_working_directory()
Definition __init__.py:194