Belle II Software development
Teacher Class Reference

Public Member Functions

 __init__ (self, typing.Sequence[config.Particle] particles, config.FeiConfiguration config)
 
 upload (self, str channel)
 
 do_all_trainings (self)
 

Static Public Member Functions

 create_fake_weightfile (str channel)
 
 check_if_weightfile_is_fake (str filename)
 

Public Attributes

 particles = particles
 list of config.Particle objects
 
 config = config
 config.FeiConfiguration object
 

Static Public Attributes

 MaximumNumberOfMVASamples = int(1e7)
 Maximum number of events per class, the sampling rates are chosen so that the training data does not exceed this number.
 
 MinimumNumberOfMVASamples = int(5e2)
 Minimum number of events per class if the training data contains less events the channel is not used due to low statistics.
 

Detailed Description

Performs all necessary trainings for all training data files which are
available but where there is no weight file available yet.
This class is usually used by the do_trainings function below, to perform the necessary trainings after each stage.
The trainings are run in parallel using multi-threading of python.
Each training is done by a subprocess call, the training command (passed by config.externTeacher) can be either
  * basf2_mva_teacher, the training will be done directly on the machine
  * externClustTeacher, the training will be submitted to the batch system of KEKCC

Definition at line 675 of file core.py.

Constructor & Destructor Documentation

◆ __init__()

__init__ ( self,
typing.Sequence[config.Particle] particles,
config.FeiConfiguration config )
Create a new Teacher object
@param particles list of config.Particle objects
@param config config.FeiConfiguration object

Definition at line 692 of file core.py.

692 def __init__(self, particles: typing.Sequence[config.Particle], config: config.FeiConfiguration):
693 """
694 Create a new Teacher object
695 @param particles list of config.Particle objects
696 @param config config.FeiConfiguration object
697 """
698
699 self.particles = particles
700
701 self.config = config
702

Member Function Documentation

◆ check_if_weightfile_is_fake()

check_if_weightfile_is_fake ( str filename)
static
Checks if the provided filename is a fake-weight file or not
@param filename the filename of the weight file

Definition at line 731 of file core.py.

731 def check_if_weightfile_is_fake(filename: str):
732 """
733 Checks if the provided filename is a fake-weight file or not
734 @param filename the filename of the weight file
735 """
736 try:
737 return '<method>Trivial</method>' in open(filename).readlines()[2]
738 except BaseException:
739 return True
740 return True
741

◆ create_fake_weightfile()

create_fake_weightfile ( str channel)
static
Create a fake weight file using the trivial method, it will always return 0.0
@param channel for which we create a fake weight file

Definition at line 704 of file core.py.

704 def create_fake_weightfile(channel: str):
705 """
706 Create a fake weight file using the trivial method, it will always return 0.0
707 @param channel for which we create a fake weight file
708 """
709 content = f"""
710 <?xml version="1.0" encoding="utf-8"?>
711 <method>Trivial</method>
712 <weightfile>{channel}.xml</weightfile>
713 <treename>tree</treename>
714 <target_variable>isSignal</target_variable>
715 <weight_variable>__weight__</weight_variable>
716 <signal_class>1</signal_class>
717 <max_events>0</max_events>
718 <number_feature_variables>1</number_feature_variables>
719 <variable0>M</variable0>
720 <number_spectator_variables>0</number_spectator_variables>
721 <number_data_files>1</number_data_files>
722 <datafile0>train.root</datafile0>
723 <Trivial_version>1</Trivial_version>
724 <Trivial_output>0</Trivial_output>
725 <signal_fraction>0.066082567</signal_fraction>
726 """
727 with open(f'{channel}.xml', "w") as f:
728 f.write(content)
729

◆ do_all_trainings()

do_all_trainings ( self)
Do all trainings for which we find training data

Definition at line 753 of file core.py.

753 def do_all_trainings(self):
754 """
755 Do all trainings for which we find training data
756 """
757 # Always avoid the top-level 'import ROOT'.
758 import ROOT # noqa
759 # FEI uses multi-threading for parallel execution of tasks therefore
760 # the ROOT gui-thread is disabled, which otherwise interferes sometimes
761 ROOT.PyConfig.StartGuiThread = False
762 job_list = []
763
764 all_stage_particles = get_stages_from_particles(self.particles)
765 if self.config.cache is None:
766 stagesToTrain = range(1, len(all_stage_particles)+1)
767 else:
768 stagesToTrain = [self.config.cache]
769
770 filename = 'training_input.root'
771 if os.path.isfile(filename):
772 f = ROOT.TFile.Open(filename, 'read')
773 if f.IsZombie():
774 B2WARNING(f'Training of MVC failed: {filename}. ROOT file corrupt. No weight files will be provided.')
775 elif len([k.GetName() for k in f.GetListOfKeys()]) == 0:
776 B2WARNING(
777 f'Training of MVC failed: {filename}. ROOT file has no trees. No weight files will be provided.')
778 else:
779 for istage in stagesToTrain:
780 for particle in all_stage_particles[istage-1]:
781 for channel in particle.channels:
782 weightfile = f'{channel.label}.xml'
783 if basf2_mva.available(weightfile):
784 B2INFO(f"FEI-core: Skipping {weightfile}, already available")
785 continue
786 else:
787 treeName = ROOT.Belle2.MakeROOTCompatible.makeROOTCompatible(f'{channel.label} variables')
788 keys = [m for m in f.GetListOfKeys() if treeName in m.GetName()]
789 if not keys:
790 B2WARNING("Training of MVC failed. "
791 f"Couldn't find tree for channel {channel}. Ignoring channel.")
792 continue
793 elif len(keys) > 1:
794 B2WARNING(f"Found more than one tree for channel {channel}. Taking first tree from: {keys}")
795 tree = keys[0].ReadObj()
796 total_entries = tree.GetEntries()
797 nSig = tree.GetEntries(f'{channel.mvaConfig.target}==1.0')
798 nBg = tree.GetEntries(f'{channel.mvaConfig.target}==0.0')
799 B2INFO(
800 f'FEI-core: Number of events for channel: {channel.label}, '
801 f'Total: {total_entries}, Signal: {nSig}, Background: {nBg}')
802 if nSig < Teacher.MinimumNumberOfMVASamples:
803 B2WARNING("Training of MVC failed. "
804 f"Tree contains too few signal events {nSig}. Ignoring channel {channel}.")
805 self.create_fake_weightfile(channel.label)
806 self.upload(channel.label)
807 continue
808 if nBg < Teacher.MinimumNumberOfMVASamples:
809 B2WARNING("Training of MVC failed. "
810 f"Tree contains too few bckgrd events {nBg}. Ignoring channel {channel}.")
811 self.create_fake_weightfile(channel.label)
812 self.upload(channel.label)
813 continue
814 variable_str = "' '".join(channel.mvaConfig.variables)
815
816 spectators = list(channel.mvaConfig.spectators.keys())
817 if channel.mvaConfig.sPlotVariable is not None:
818 spectators.append(channel.mvaConfig.sPlotVariable)
819 spectators_str = "' '".join(spectators)
820
821 treeName = ROOT.Belle2.MakeROOTCompatible.makeROOTCompatible(f'{channel.label} variables')
822 command = (f"{self.config.externTeacher}"
823 f" --method '{channel.mvaConfig.method}'"
824 f" --target_variable '{channel.mvaConfig.target}'"
825 f" --treename '{treeName}'"
826 f" --datafile 'training_input.root'"
827 f" --signal_class 1"
828 f" --variables '{variable_str}'"
829 f" --identifier '{weightfile}'")
830 if len(spectators) > 0:
831 command += f" --spectators '{spectators_str}'"
832 command += f" {channel.mvaConfig.config} > '{channel.label}'.log 2>&1"
833 B2INFO(f"Used following command to invoke teacher: \n {command}")
834 job_list.append((channel.label, command))
835 f.Close()
836
837 if len(job_list) > 0:
838 p = multiprocessing.Pool(None, maxtasksperchild=1)
839 func = functools.partial(subprocess.call, shell=True)
840 p.map(func, [c for _, c in job_list])
841 p.close()
842 p.join()
843 weightfiles = []
844 for name, _ in job_list:
845 if not basf2_mva.available(f'{name}.xml'):
846 B2WARNING("Training of MVC failed. For unknown reasons, check the logfile", f'{name}.log')
847 self.create_fake_weightfile(name)
848 weightfiles.append(self.upload(name))
849 return weightfiles
850
851

◆ upload()

upload ( self,
str channel )
Upload the weight file into the condition database
@param channel whose weight file is uploaded

Definition at line 742 of file core.py.

742 def upload(self, channel: str):
743 """
744 Upload the weight file into the condition database
745 @param channel whose weight file is uploaded
746 """
747 disk = f'{channel}.xml'
748 dbase = f'{self.config.prefix}_{channel}'
749 basf2_mva.upload(disk, dbase)
750 print(f"FEI-core: Uploading {dbase} to localdb")
751 return (disk, dbase)
752

Member Data Documentation

◆ config

config = config

config.FeiConfiguration object

Definition at line 701 of file core.py.

◆ MaximumNumberOfMVASamples

MaximumNumberOfMVASamples = int(1e7)
static

Maximum number of events per class, the sampling rates are chosen so that the training data does not exceed this number.

Definition at line 687 of file core.py.

◆ MinimumNumberOfMVASamples

MinimumNumberOfMVASamples = int(5e2)
static

Minimum number of events per class if the training data contains less events the channel is not used due to low statistics.

Definition at line 690 of file core.py.

◆ particles

particles = particles

list of config.Particle objects

Definition at line 699 of file core.py.


The documentation for this class was generated from the following file: