Belle II Software development
Teacher Class Reference

Public Member Functions

 __init__ (self, typing.Sequence[config.Particle] particles, config.FeiConfiguration config)
 
 upload (self, str channel)
 
 do_all_trainings (self)
 

Static Public Member Functions

 create_fake_weightfile (str channel)
 
 check_if_weightfile_is_fake (str filename)
 

Public Attributes

 particles = particles
 list of config.Particle objects
 
 config = config
 config.FeiConfiguration object
 

Static Public Attributes

 MaximumNumberOfMVASamples = int(1e7)
 Maximum number of events per class, the sampling rates are chosen so that the training data does not exceed this number.
 
 MinimumNumberOfMVASamples = int(5e2)
 Minimum number of events per class if the training data contains less events the channel is not used due to low statistics.
 

Detailed Description

Performs all necessary trainings for all training data files which are
available but where there is no weight file available yet.
This class is usually used by the do_trainings function below, to perform the necessary trainings after each stage.
The trainings are run in parallel using multi-threading of python.
Each training is done by a subprocess call, the training command (passed by config.externTeacher) can be either
  * basf2_mva_teacher, the training will be done directly on the machine
  * externClustTeacher, the training will be submitted to the batch system of KEKCC

Definition at line 681 of file core.py.

Constructor & Destructor Documentation

◆ __init__()

__init__ ( self,
typing.Sequence[config.Particle] particles,
config.FeiConfiguration config )
Create a new Teacher object
@param particles list of config.Particle objects
@param config config.FeiConfiguration object

Definition at line 698 of file core.py.

698 def __init__(self, particles: typing.Sequence[config.Particle], config: config.FeiConfiguration):
699 """
700 Create a new Teacher object
701 @param particles list of config.Particle objects
702 @param config config.FeiConfiguration object
703 """
704
705 self.particles = particles
706
707 self.config = config
708

Member Function Documentation

◆ check_if_weightfile_is_fake()

check_if_weightfile_is_fake ( str filename)
static
Checks if the provided filename is a fake-weight file or not
@param filename the filename of the weight file

Definition at line 737 of file core.py.

737 def check_if_weightfile_is_fake(filename: str):
738 """
739 Checks if the provided filename is a fake-weight file or not
740 @param filename the filename of the weight file
741 """
742 try:
743 return '<method>Trivial</method>' in open(filename).readlines()[2]
744 except BaseException:
745 return True
746 return True
747

◆ create_fake_weightfile()

create_fake_weightfile ( str channel)
static
Create a fake weight file using the trivial method, it will always return 0.0
@param channel for which we create a fake weight file

Definition at line 710 of file core.py.

710 def create_fake_weightfile(channel: str):
711 """
712 Create a fake weight file using the trivial method, it will always return 0.0
713 @param channel for which we create a fake weight file
714 """
715 content = f"""
716 <?xml version="1.0" encoding="utf-8"?>
717 <method>Trivial</method>
718 <weightfile>{channel}.xml</weightfile>
719 <treename>tree</treename>
720 <target_variable>isSignal</target_variable>
721 <weight_variable>__weight__</weight_variable>
722 <signal_class>1</signal_class>
723 <max_events>0</max_events>
724 <number_feature_variables>1</number_feature_variables>
725 <variable0>M</variable0>
726 <number_spectator_variables>0</number_spectator_variables>
727 <number_data_files>1</number_data_files>
728 <datafile0>train.root</datafile0>
729 <Trivial_version>1</Trivial_version>
730 <Trivial_output>0</Trivial_output>
731 <signal_fraction>0.066082567</signal_fraction>
732 """
733 with open(f'{channel}.xml', "w") as f:
734 f.write(content)
735

◆ do_all_trainings()

do_all_trainings ( self)
Do all trainings for which we find training data

Definition at line 759 of file core.py.

759 def do_all_trainings(self):
760 """
761 Do all trainings for which we find training data
762 """
763 # Always avoid the top-level 'import ROOT'.
764 import ROOT # noqa
765 # FEI uses multi-threading for parallel execution of tasks therefore
766 # the ROOT gui-thread is disabled, which otherwise interferes sometimes
767 ROOT.PyConfig.StartGuiThread = False
768 job_list = []
769
770 all_stage_particles = get_stages_from_particles(self.particles)
771 if self.config.cache is None:
772 stagesToTrain = range(1, len(all_stage_particles)+1)
773 else:
774 stagesToTrain = [self.config.cache]
775
776 filename = 'training_input.root'
777 if os.path.isfile(filename):
778 f = ROOT.TFile.Open(filename, 'read')
779 if f.IsZombie():
780 B2WARNING(f'Training of MVC failed: {filename}. ROOT file corrupt. No weight files will be provided.')
781 elif len([k.GetName() for k in f.GetListOfKeys()]) == 0:
782 B2WARNING(
783 f'Training of MVC failed: {filename}. ROOT file has no trees. No weight files will be provided.')
784 else:
785 for istage in stagesToTrain:
786 for particle in all_stage_particles[istage-1]:
787 for channel in particle.channels:
788 weightfile = f'{channel.label}.xml'
789 if basf2_mva.available(weightfile):
790 B2INFO(f"FEI-core: Skipping {weightfile}, already available")
791 continue
792 else:
793 treeName = ROOT.Belle2.MakeROOTCompatible.makeROOTCompatible(f'{channel.label} variables')
794 keys = [m for m in f.GetListOfKeys() if treeName in m.GetName()]
795 if not keys:
796 B2WARNING("Training of MVC failed. "
797 f"Couldn't find tree for channel {channel}. Ignoring channel.")
798 continue
799 elif len(keys) > 1:
800 B2WARNING(f"Found more than one tree for channel {channel}. Taking first tree from: {keys}")
801 tree = keys[0].ReadObj()
802 total_entries = tree.GetEntries()
803 nSig = tree.GetEntries(f'{channel.mvaConfig.target}==1.0')
804 nBg = tree.GetEntries(f'{channel.mvaConfig.target}==0.0')
805 B2INFO(
806 f'FEI-core: Number of events for channel: {channel.label}, '
807 f'Total: {total_entries}, Signal: {nSig}, Background: {nBg}')
808 if nSig < Teacher.MinimumNumberOfMVASamples:
809 B2WARNING("Training of MVC failed. "
810 f"Tree contains too few signal events {nSig}. Ignoring channel {channel}.")
811 self.create_fake_weightfile(channel.label)
812 self.upload(channel.label)
813 continue
814 if nBg < Teacher.MinimumNumberOfMVASamples:
815 B2WARNING("Training of MVC failed. "
816 f"Tree contains too few bckgrd events {nBg}. Ignoring channel {channel}.")
817 self.create_fake_weightfile(channel.label)
818 self.upload(channel.label)
819 continue
820 variable_str = "' '".join(channel.mvaConfig.variables)
821
822 spectators = list(channel.mvaConfig.spectators.keys())
823 if channel.mvaConfig.sPlotVariable is not None:
824 spectators.append(channel.mvaConfig.sPlotVariable)
825 spectators_str = "' '".join(spectators)
826
827 treeName = ROOT.Belle2.MakeROOTCompatible.makeROOTCompatible(f'{channel.label} variables')
828 command = (f"{self.config.externTeacher}"
829 f" --method '{channel.mvaConfig.method}'"
830 f" --target_variable '{channel.mvaConfig.target}'"
831 f" --treename '{treeName}'"
832 f" --datafile 'training_input.root'"
833 f" --signal_class 1"
834 f" --variables '{variable_str}'"
835 f" --identifier '{weightfile}'")
836 if len(spectators) > 0:
837 command += f" --spectators '{spectators_str}'"
838 command += f" {channel.mvaConfig.config} > '{channel.label}'.log 2>&1"
839 B2INFO(f"Used following command to invoke teacher: \n {command}")
840 job_list.append((channel.label, command))
841 f.Close()
842
843 if len(job_list) > 0:
844 p = multiprocessing.Pool(None, maxtasksperchild=1)
845 func = functools.partial(subprocess.call, shell=True)
846 p.map(func, [c for _, c in job_list])
847 p.close()
848 p.join()
849 weightfiles = []
850 for name, _ in job_list:
851 if not basf2_mva.available(f'{name}.xml'):
852 B2WARNING("Training of MVC failed. For unknown reasons, check the logfile", f'{name}.log')
853 self.create_fake_weightfile(name)
854 weightfiles.append(self.upload(name))
855 return weightfiles
856
857

◆ upload()

upload ( self,
str channel )
Upload the weight file into the condition database
@param channel whose weight file is uploaded

Definition at line 748 of file core.py.

748 def upload(self, channel: str):
749 """
750 Upload the weight file into the condition database
751 @param channel whose weight file is uploaded
752 """
753 disk = f'{channel}.xml'
754 dbase = f'{self.config.prefix}_{channel}'
755 basf2_mva.upload(disk, dbase)
756 print(f"FEI-core: Uploading {dbase} to localdb")
757 return (disk, dbase)
758

Member Data Documentation

◆ config

config = config

config.FeiConfiguration object

Definition at line 707 of file core.py.

◆ MaximumNumberOfMVASamples

MaximumNumberOfMVASamples = int(1e7)
static

Maximum number of events per class, the sampling rates are chosen so that the training data does not exceed this number.

Definition at line 693 of file core.py.

◆ MinimumNumberOfMVASamples

MinimumNumberOfMVASamples = int(5e2)
static

Minimum number of events per class if the training data contains less events the channel is not used due to low statistics.

Definition at line 696 of file core.py.

◆ particles

particles = particles

list of config.Particle objects

Definition at line 705 of file core.py.


The documentation for this class was generated from the following file: