Belle II Software  release-06-00-14
Histograms Class Reference
Inheritance diagram for Histograms:
Collaboration diagram for Histograms:

Public Member Functions

def __init__ (self, data, column, masks=dict(), weight_column=None, bins=100, equal_frequency=True, range_in_std=None)
 
def get_hist (self, name=None)
 
def get_summed_hist (self, names)
 
def get_efficiency (self, signal_names)
 
def get_true_positives (self, signal_names)
 
def get_false_positives (self, bckgrd_names)
 
def get_purity (self, signal_names, bckgrd_names)
 
def get_signal_to_noise (self, signal_names, bckgrd_names)
 
def get_purity_per_bin (self, signal_names, bckgrd_names)
 

Static Public Attributes

 hist = None
 Histogram of the full data.
 
 bins = None
 Binning.
 
 bin_centers = None
 Bin centers.
 
 bin_widths = None
 Bin widths.
 
 hists = None
 Dictionary of histograms for the given masks.
 

Detailed Description

Extracts information from a pandas.DataFrame and stores it
in a binned format.
Therefore the size independent from the size of the pandas.DataFrame.
Used by the plotting routines below.

Definition at line 43 of file histogram.py.

Constructor & Destructor Documentation

◆ __init__()

def __init__ (   self,
  data,
  column,
  masks = dict(),
  weight_column = None,
  bins = 100,
  equal_frequency = True,
  range_in_std = None 
)
Creates a common binning of the given column of the given pandas.Dataframe,
and stores for each given mask the histogram of the column
@param data pandas.DataFrame  like object containing column and weight_column
@param column string identifiying the column in the pandas.DataFrame which is binned.
@param masks dictionary of names and boolean arrays, which select the data
             used for the creation of histograms with these names
@param weight_column identifiying the column in the pandas.DataFrame which is used as weight
@param bins use given bins instead of default 100
@param equal_frequency perform an equal_frequency binning
@param range_in_std show only the data in a windows around +- range_in_std * standard_deviation around the mean

Definition at line 62 of file histogram.py.

62  def __init__(self, data, column, masks=dict(), weight_column=None, bins=100, equal_frequency=True, range_in_std=None):
63  """
64  Creates a common binning of the given column of the given pandas.Dataframe,
65  and stores for each given mask the histogram of the column
66  @param data pandas.DataFrame like object containing column and weight_column
67  @param column string identifiying the column in the pandas.DataFrame which is binned.
68  @param masks dictionary of names and boolean arrays, which select the data
69  used for the creation of histograms with these names
70  @param weight_column identifiying the column in the pandas.DataFrame which is used as weight
71  @param bins use given bins instead of default 100
72  @param equal_frequency perform an equal_frequency binning
73  @param range_in_std show only the data in a windows around +- range_in_std * standard_deviation around the mean
74  """
75  isfinite = numpy.isfinite(data[column])
76  if range_in_std is not None:
77  mean, std = weighted_mean_and_std(data[column][isfinite],
78  None if weight_column is None else data[weight_column][isfinite])
79  # Everything outside mean +- range_in_std * std is considered infinite
80  isfinite = isfinite & (data[column] > (mean - range_in_std * std)) & (data[column] < (mean + range_in_std * std))
81 
82  if equal_frequency:
83  if data[column][isfinite].size > 0:
84  bins = numpy.unique(numpy.percentile(data[column][isfinite], q=range(bins + 1)))
85  else:
86  print('Empty Array')
87  bins = [1]
88  # If all values are unique, we make at least one bin
89  if len(bins) == 1:
90  bins = numpy.array([bins[0]-1, bins[0]+1])
91 
92  self.hist, self.bins = numpy.histogram(data[column][isfinite], bins=bins,
93  weights=None if weight_column is None else data[weight_column])
94  self.bin_centers = (self.bins + numpy.roll(self.bins, 1))[1:] / 2.0
95  # Subtract a small number from the bin width, otherwise the errorband plot is unstable.
96  self.bin_widths = (self.bins - numpy.roll(self.bins, 1))[1:] - 0.00001
97  self.hists = dict()
98  for name, mask in masks.items():
99  self.hists[name] = numpy.histogram(data[column][mask & isfinite], bins=self.bins,
100  weights=None if weight_column is None else data[weight_column][mask & isfinite])[0]
101 

Member Function Documentation

◆ get_efficiency()

def get_efficiency (   self,
  signal_names 
)
Return the cumulative efficiency in each bin of the sum of the histograms with the given names.
@param  signal_names of the histograms
@return numpy.array with hist data, numpy.array with corresponding binomial errors

Definition at line 123 of file histogram.py.

◆ get_false_positives()

def get_false_positives (   self,
  bckgrd_names 
)
Return the cumulative false positives in each bin of the sum of the histograms with the given names.
@param names names of the histograms
@return numpy.array with hist data, numpy.array with corresponding binomial errors

Definition at line 150 of file histogram.py.

◆ get_hist()

def get_hist (   self,
  name = None 
)
Return histogram with the given name. If none returns histogram of the full data.
@param name name of the histogram
@return numpy.array with hist data, numpy.array with corresponding poisson errors

Definition at line 102 of file histogram.py.

◆ get_purity()

def get_purity (   self,
  signal_names,
  bckgrd_names 
)
Return the cumulative purity in each bin of the sum of the histograms with the given names.
@param names names of the histograms
@return numpy.array with hist data, numpy.array with corresponding binomial errors

Definition at line 161 of file histogram.py.

◆ get_purity_per_bin()

def get_purity_per_bin (   self,
  signal_names,
  bckgrd_names 
)
Return the purity in each bin of the sum of the histograms with the given names.
@param names names of the histograms
@return numpy.array with hist data, numpy.array with corresponding binomial errors

Definition at line 191 of file histogram.py.

◆ get_signal_to_noise()

def get_signal_to_noise (   self,
  signal_names,
  bckgrd_names 
)
Return the cumulative signal to noise ratio in each bin of the sum of the histograms with the given names.
@param names names of the histograms
@return numpy.array with hist data, numpy.array with corresponding binomial errors

Definition at line 176 of file histogram.py.

◆ get_summed_hist()

def get_summed_hist (   self,
  names 
)
Return the sum of histograms with the given names.
@param names names of the histograms
@return numpy.array with hist data, numpy.array with corresponding poisson errors

Definition at line 112 of file histogram.py.

◆ get_true_positives()

def get_true_positives (   self,
  signal_names 
)
Return the cumulative true positives in each bin of the sum of the histograms with the given names.
@param names names of the histograms
@return numpy.array with hist data, numpy.array with corresponding binomial errors

Definition at line 139 of file histogram.py.


The documentation for this class was generated from the following file: