41 from sklearn.neural_network 
import MLPClassifier
 
   42 from sklearn2pmml 
import sklearn2pmml, PMMLPipeline
 
   44 from lxml 
import etree 
as ET
 
   49 parser = argparse.ArgumentParser(description=
"Train the SVD hit time estimator")
 
   57     help=
'Global tag to use at central DB in PNNL')
 
   59 args = parser.parse_args()
 
   61 pkl_name = 
'SVDTime_Training{0}_{1}.pkl' 
   63 print(
'Reading data...')
 
   65 stockdata = pd.read_pickle(pkl_name.format(
'Sample', args.n_samples))
 
   66 bounds = pd.read_pickle(pkl_name.format(
'Bounds', args.n_samples))
 
   67 bins = pd.read_pickle(pkl_name.format(
'Bins', args.n_samples))
 
   69 timearray = bins[
'midpoint']
 
   70 timebins = np.unique(bins[[
'lower', 
'upper']])
 
   77 X = stockdata[[
's' + str(i) 
for i 
in range(1, 7)] + [
'normed_tau']]
 
   78 Y = stockdata[
't0_bin']
 
   79 X_train = X[stockdata.test > test_fraction]
 
   80 X_test = X[stockdata.test < test_fraction]
 
   81 Y_train = Y[stockdata.test > test_fraction]
 
   82 Y_test = Y[stockdata.test < test_fraction]
 
   84 classifier = MLPClassifier(
 
   85     hidden_layer_sizes=(len(timearray) - 1, len(timearray) + 1),
 
   93 print(
'Fitting the neural network...')
 
   95 nntime_fitter = PMMLPipeline([(
'claasifier', classifier)])
 
   96 nntime_fitter.fit(X_train, Y_train)
 
   98 test_score = nntime_fitter.score(X_test, Y_test)
 
   99 train_score = nntime_fitter.score(X_train, Y_train)
 
  100 print(
'Test: {}'.format(test_score))
 
  101 print(
'Train: {}'.format(train_score))
 
  102 print(
'Fitting done.')
 
  104 print(
'Writing output...')
 
  106 pmml_name = 
'SVDTimeNet.pmml' 
  107 xml_name = pmml_name.replace(
'pmml', 
'xml')
 
  108 sklearn2pmml(nntime_fitter, pmml_name, with_repr=
True)
 
  110 parser = ET.XMLParser(remove_blank_text=
True)
 
  111 net = ET.parse(pmml_name, parser)
 
  114 namespace = root.nsmap[
None]
 
  115 nsprefix = 
'{' + namespace + 
'}' 
  116 procinfo = root.find(nsprefix + 
'MiningBuildTask')
 
  119 name = ET.SubElement(procinfo, nsprefix + 
'Title')
 
  120 name.text = 
'Neural network for time shift estimation' 
  123 target = ET.SubElement(procinfo, nsprefix + 
'IntendedUse')
 
  124 basf2proc = ET.SubElement(target, nsprefix + 
'basf2process')
 
  125 basf2simulation = ET.SubElement(basf2proc, nsprefix + 
'Simulation')
 
  126 basf2simulation.text = 
'yes' 
  127 basf2reconstruction = ET.SubElement(basf2proc, nsprefix + 
'Reconstruction')
 
  128 basf2reconstruction.text = 
'yes' 
  129 sensorType = ET.SubElement(target, nsprefix + 
'SensorType')
 
  130 sensorType.text = 
'all' 
  131 sensorSide = ET.SubElement(target, nsprefix + 
'SensorSide')
 
  132 sensorSide.text = 
'all' 
  135 training = ET.SubElement(procinfo, nsprefix + 
'Training')
 
  136 source = ET.SubElement(training, nsprefix + 
'SampleSource')
 
  137 source.text = 
'Toy simulation' 
  138 genfunc = ET.SubElement(training, nsprefix + 
'Waveform')
 
  139 genfunc.text = 
'beta-prime' 
  140 num_samples = ET.SubElement(training, nsprefix + 
'SampleSize')
 
  141 train_samples = ET.SubElement(num_samples, nsprefix + 
'Training', {
'n': str(int((1 - test_fraction) * args.n_samples))})
 
  142 test_samples = ET.SubElement(num_samples, nsprefix + 
'Test', {
'n': str(int(test_fraction * args.n_samples))})
 
  144     lambda row: ET.SubElement(training, nsprefix + 
'Parameter', **{u: str(v) 
for u, v 
in row.items()}), axis=1
 
  147 netparams = ET.SubElement(procinfo, nsprefix + 
'NetworkParameters')
 
  148 inputLayer = ET.SubElement(netparams, nsprefix + 
'NetworkLayer')
 
  149 inputLayer.attrib[
'number'] = str(0)
 
  150 inputLayer.attrib[
'kind'] = 
'input' 
  151 inputLayer.attrib[
'size'] = str(7)  
 
  152 n_hidden = len(classifier.hidden_layer_sizes)
 
  153 for (iLayer, sz) 
in zip(range(1, 1 + n_hidden), classifier.hidden_layer_sizes):
 
  154     layer = ET.SubElement(netparams, nsprefix + 
'NetworkLayer')
 
  155     layer.attrib[
'number'] = str(iLayer)
 
  156     layer.attrib[
'kind'] = 
'hidden' 
  157     layer.attrib[
'size'] = str(sz)
 
  158 outputLayer = ET.SubElement(netparams, nsprefix + 
'NetworkLayer')
 
  159 outputLayer.attrib[
'number'] = str(n_hidden + 1)
 
  160 outputLayer.attrib[
'kind'] = 
'output' 
  161 outputLayer.attrib[
'size'] = str(len(timearray))
 
  163 for field 
in root.find(nsprefix + 
'DataDictionary'):
 
  164     if field.attrib[
'name'] == 
't0_bin':
 
  166             i = int(child.attrib[
'value'])
 
  167             child.attrib[
'lower'] = 
'{0:.3f}'.format(bins.loc[i, 
'lower'])
 
  168             child.attrib[
'upper'] = 
'{0:.3f}'.format(bins.loc[i, 
'upper'])
 
  169             child.attrib[
'midpoint'] = 
'{0:.3f}'.format(bins.loc[i, 
'midpoint'])
 
  171 net.write(xml_name, xml_declaration=
True, pretty_print=
True, encoding=
'utf-8')
 
  173 print(
'Output saved.')
 
  175 print(
'Saving fits...')
 
  178 amp_index = bounds[bounds.value == 
'amplitude'].index[0]
 
  179 amp_range = (bounds.ix[amp_index, 
'low'], bounds.ix[amp_index, 
'high'])
 
  180 tau_index = bounds[bounds.value == 
'tau'].index[0]
 
  181 tau_range = (bounds.ix[tau_index, 
'low'], bounds.ix[tau_index, 
'high'])
 
  182 coder = tau_encoder(amp_range, tau_range)
 
  186 Trues_test = stockdata[stockdata.test < test_fraction][[
't0', 
'amplitude', 
'tau', 
't0_bin', 
'abin']]
 
  190 probs = nntime_fitter.predict_proba(X_test)
 
  195 def fitFromProb(fw, signals, p, tau, timearray):
 
  196     t_fit = np.average(timearray, weights=p)
 
  197     t_sigma = np.sqrt(np.average((timearray - t_fit)**2, weights=p))
 
  198     weights = fw(-t_fit + np.linspace(-dt, 4 * dt, 6, endpoint=
True), tau=tau)
 
  199     weights[signals.values == 0.0] = 0.0
 
  200     norm = 1.0 / np.inner(weights, weights)
 
  201     a_fit = np.inner(signals, weights) * norm
 
  202     a_sigma = np.sqrt(norm)
 
  203     residuals = signals - a_fit * weights
 
  204     ndf = np.sum(np.ones_like(signals[signals > 0])) - 2  
 
  205     chi2_ndf = np.inner(residuals, residuals) / ndf
 
  215 probdf = pd.DataFrame(probs)
 
  216 probdf.index = X_test.index
 
  217 probdf.to_pickle(
'SVDTime_TrainingProbs_{0}.pkl'.format(args.n_samples))
 
  220     lambda row: fitFromProb(
 
  222         row[[
's' + str(i) 
for i 
in range(1, 7)]],
 
  224         coder.decode(row[
'normed_tau']),
 
  228 fits[
't_true'] = Trues_test[
't0']
 
  229 fits[
'tau'] = Trues_test[
'tau']
 
  230 fits[
'a_true'] = Trues_test[
'amplitude']
 
  231 fits[
't_bin'] = Trues_test[
't0_bin']
 
  232 fits[
'a_bin'] = Trues_test[
'abin']
 
  234 fits.to_pickle(
'SVDTime_TrainingFits_{0}.pkl'.format(args.n_samples))
 
  236 print(
'Writing classifier...')
 
  238 with open(
'classifier.pkl', 
'wb') 
as f:
 
  239     pickle.dump(classifier, f)
 
  241 with open(
'classifier.txt', 
'w') 
as cdump:
 
  242     cdump.write(
"Classifier coefficients:\n")
 
  243     for iLayer 
in range(len(classifier.coefs_)):
 
  244         cdump.write(
'Layer: {0}\n'.format(iLayer))
 
  245         nrows = classifier.coefs_[iLayer].shape[0]
 
  246         ncols = classifier.coefs_[iLayer].shape[1]
 
  247         cdump.write(
'Weights:\n')
 
  248         for col 
in range(ncols):
 
  249             s = 
" ".join([str(classifier.coefs_[iLayer][row, col]) 
for row 
in range(nrows)])
 
  253         cdump.write(
'Intercepts:\n')
 
  254         s = 
" ".join([str(classifier.intercepts_[iLayer][col]) 
for col 
in range(ncols)])
 
  258 print(
"Learning phase completed.")