#!/usr/bin/env python3
# -*- coding: utf-8 -*-
##########################################################################
# BASF2 (Belle Analysis Framework 2) #
# Copyright(C) 2016-2019 Belle II Collaboration #
# #
# Author: The Belle II Collaboration #
# Contributors: Jochen Gemmler #
# #
# This software is provided "as is" without any warranty. #
##########################################################################
import json
import os
import basf2_mva
from basf2 import B2ERROR, B2FATAL
import basf2
from ROOT import Belle2
import variables.utils as vu
import modularAnalysis as ma
from ROOT import gSystem
gSystem.Load('libanalysis.so')
# make ROOT compatible available
Belle2.Variable.Manager
Belle2.Variable.Manager.Instance()
def get_variables(particle_list, ranked_variable, variables=None, particleNumber=1):
""" creates variable name pattern requested by the basf2 variable getVariableByRank()
:param particle_list:
:param ranked_variable:
:param variables:
:param particleNumber:
:return:
"""
var_list = []
for var in variables:
for i_num in range(1, particleNumber + 1):
var_list.append('getVariableByRank(' + particle_list + ', ' + ranked_variable + ', ' + var + ', ' +
str(i_num) + ')')
return var_list
def construct_default_variable_names(particle_lists=None, ranked_variable='p', variables=None, particleNumber=5):
""" construct default variables (that are sorted by charge and ranked by momentum)
:param particle_lists:
:param ranked_variable:
:param variables:
:param particleNumber:
:return:
"""
if particle_lists is None:
particle_lists = ['pi+:pos_charged', 'pi+:neg_charged']
variable_names = []
for p_list in particle_lists:
variable_names += get_variables(p_list, ranked_variable, variables, particleNumber)
# make root compatible
root_compatible_list = []
for var in variable_names:
root_compatible_list.append(Belle2.makeROOTCompatible(var))
return root_compatible_list
[docs]def DeepFlavorTagger(particle_lists, mode='expert', working_dir='', uniqueIdentifier='standard', variable_list=None,
target='qrCombined', overwrite=False,
transform_to_probability=False, signal_fraction=-1.0, classifier_args=None,
train_valid_fraction=.92, mva_steering_file='analysis/scripts/dft/tensorflow_dnn_interface.py',
maskName='',
path=None):
"""
Interfacing for the DeepFlavorTagger. This function can be used for training (``teacher``), preparation of
training datasets (``sampler``) and inference (``expert``).
This function requires reconstructed B meson signal particle list and where an RestOfEvent is built.
:param particle_lists: string or list[string], particle list(s) of the reconstructed signal B meson
:param mode: string, valid modes are ``expert`` (default), ``teacher``, ``sampler``
:param working_dir: string, working directory for the method
:param uniqueIdentifier: string, database identifier for the method
:param variable_list: list[string], name of the basf2 variables used for discrimination
:param target: string, target variable
:param overwrite: bool, overwrite already (locally!) existing training
:param transform_to_probability: bool, enable a purity transformation to compensate potential over-training,
can only be set during training
:param signal_fraction: float, (experimental) signal fraction override,
transform to output to a probability if an uneven signal/background fraction is used in the training data,
can only be set during training
:param classifier_args: dictionary, costumized arguments for the mlp
possible attributes of the dictionary are:
lr_dec_rate: learning rate decay rate
lr_init: learning rate initial value
mom_init: momentum initial value
min_epochs: minimal number of epochs
max_epochs: maximal number of epochs
stop_epochs: epochs to stop without improvements on the validation set for early stopping
batch_size: batch size
seed: random seed for tensorflow
layers: [[layer name, activation function, input_width, output_width, init_bias, init_weights],..]
wd_coeffs: weight decay coefficients, length of layers
cuda_visible_devices: selection of cuda devices
tensorboard_dir: addition directory for logging the training process
:param train_valid_fraction: float, train-valid fraction (.92). If transform to probability is
enabled, train valid fraction will be splitted to a test set (.5)
:param maskName: get ROE particles from a specified ROE mask
:param path: basf2 path obj
:return: None
"""
if isinstance(particle_lists, str):
particle_lists = [particle_lists]
if mode not in ['expert', 'teacher', 'sampler']:
B2FATAL('Invalid mode %s' % mode)
if variable_list is None and mode in ['sampler', 'teacher']:
variable_list = ['useCMSFrame(p)', 'useCMSFrame(cosTheta)', 'useCMSFrame(phi)', 'Kid', 'eid', 'muid', 'prid',
'nCDCHits', 'nPXDHits', 'nSVDHits', 'dz', 'dr', 'chiProb']
if variable_list is not None and mode is 'expert':
B2ERROR('DFT: Variables from identifier file are used. Input variables will be ignored.')
if classifier_args is None:
classifier_args = {}
else:
assert isinstance(classifier_args, dict)
classifier_args['transform_to_prob'] = transform_to_probability
output_file_name = os.path.join(working_dir, uniqueIdentifier + '_training_data.root')
# create roe specific paths
roe_path = basf2.create_path()
dead_end_path = basf2.create_path()
# define dft specific lists to enable multiple calls, if someone really wants to do that
extension = particle_lists[0].replace(':', '_to_')
roe_particle_list_cut = ''
roe_particle_list = 'pi+:dft' + '_' + extension
tree_name = 'dft_variables'
# filter rest of events only for specific particle list
ma.signalSideParticleListsFilter(particle_lists, 'hasRestOfEventTracks > 0', roe_path, dead_end_path)
# TODO: particles with empty rest of events seems not to show up in efficiency statistics anymore
# create final state particle lists
ma.fillParticleList(roe_particle_list, roe_particle_list_cut, path=roe_path)
dft_particle_lists = ['pi+:pos_charged', 'pi+:neg_charged']
pos_cut = 'charge > 0 and isInRestOfEvent == 1 and passesROEMask(' + maskName + ') > 0.5 and p < infinity'
neg_cut = 'charge < 0 and isInRestOfEvent == 1 and passesROEMask(' + maskName + ') > 0.5 and p < infinity'
ma.cutAndCopyList(dft_particle_lists[0], roe_particle_list, pos_cut, writeOut=True, path=roe_path)
ma.cutAndCopyList(dft_particle_lists[1], roe_particle_list, neg_cut, writeOut=True, path=roe_path)
# sort pattern for tagging specific variables
rank_variable = 'p'
# rank_variable = 'useCMSFrame(p)'
# create tagging specific variables
if mode is not 'expert':
features = get_variables(dft_particle_lists[0], rank_variable, variable_list, particleNumber=5)
features += get_variables(dft_particle_lists[1], rank_variable, variable_list, particleNumber=5)
for particles in dft_particle_lists:
ma.rankByHighest(particles, rank_variable, path=roe_path)
if mode is 'sampler':
if os.path.isfile(output_file_name) and not overwrite:
B2FATAL('Outputfile %s already exists. Aborting writeout.' % output_file_name)
# and add target
all_variables = features + [target]
# write to ntuples
ma.variablesToNtuple('', all_variables, tree_name, output_file_name, roe_path)
# write the command line output for the extern teacher to a file
extern_command = 'basf2_mva_teacher --datafile {output_file_name} --treename {tree_name}' \
' --identifier {identifier} --variables "{variables_string}" --target_variable {target}' \
' --method Python --training_fraction {fraction}' \
" --config '{classifier_args}' --framework tensorflow" \
' --steering_file {steering_file}'\
''.format(output_file_name=output_file_name, tree_name=tree_name,
identifier=uniqueIdentifier,
variables_string='" "'.join(features), target=target,
classifier_args=json.dumps(classifier_args), fraction=train_valid_fraction,
steering_file=mva_steering_file)
with open(os.path.join(working_dir, uniqueIdentifier + '_teacher_command'), 'w') as f:
f.write(extern_command)
elif mode is 'teacher':
if not os.path.isfile(output_file_name):
B2FATAL('There is no training data file available. Run flavor tagger in sampler mode first.')
general_options = basf2_mva.GeneralOptions()
general_options.m_datafiles = basf2_mva.vector(output_file_name)
general_options.m_treename = tree_name
general_options.m_target_variable = target
general_options.m_variables = basf2_mva.vector(*features)
general_options.m_identifier = uniqueIdentifier
specific_options = basf2_mva.PythonOptions()
specific_options.m_framework = 'tensorflow'
specific_options.m_steering_file = mva_steering_file
specific_options.m_training_fraction = train_valid_fraction
specific_options.m_config = json.dumps(classifier_args)
basf2_mva.teacher(general_options, specific_options)
elif mode is 'expert':
flavorTaggerInfoBuilder = basf2.register_module('FlavorTaggerInfoBuilder')
path.add_module(flavorTaggerInfoBuilder)
expert_module = basf2.register_module('MVAExpert')
expert_module.param('listNames', particle_lists)
expert_module.param('identifier', uniqueIdentifier)
expert_module.param('extraInfoName', 'dnn_output')
expert_module.param('signalFraction', signal_fraction)
roe_path.add_module(expert_module)
flavorTaggerInfoFiller = basf2.register_module('FlavorTaggerInfoFiller')
flavorTaggerInfoFiller.param('DNNmlp', True)
roe_path.add_module(flavorTaggerInfoFiller)
# Create standard alias for the output of the flavor tagger
vu._variablemanager.addAlias('DNN_qrCombined', 'qrOutput(DNN)')
path.for_each('RestOfEvent', 'RestOfEvents', roe_path)