Belle II Software development
Histograms Class Reference

Public Member Functions

def __init__ (self, data, column, masks=dict(), weight_column=None, bins=100, equal_frequency=True, range_in_std=None)
 
def get_hist (self, name=None)
 
def get_summed_hist (self, names)
 
def get_efficiency (self, signal_names)
 
def get_true_positives (self, signal_names)
 
def get_false_positives (self, bckgrd_names)
 
def get_purity (self, signal_names, bckgrd_names)
 
def get_signal_to_noise (self, signal_names, bckgrd_names)
 
def get_purity_per_bin (self, signal_names, bckgrd_names)
 

Public Attributes

 bins
 create histogram
 
 bin_centers
 bin centers
 
 bin_widths
 Subtract a small number from the bin width, otherwise the errorband plot is unstable.
 
 hists
 initialize empty dictionary for histograms
 

Static Public Attributes

None hist = None
 Histogram of the full data.
 
None bins = None
 Binning.
 
None bin_centers = None
 Bin centers.
 
None bin_widths = None
 Bin widths.
 
None hists = None
 Dictionary of histograms for the given masks.
 

Detailed Description

Extracts information from a pandas.DataFrame and stores it
in a binned format.
Therefore the size independent from the size of the pandas.DataFrame.
Used by the plotting routines below.

Definition at line 42 of file histogram.py.

Constructor & Destructor Documentation

◆ __init__()

def __init__ (   self,
  data,
  column,
  masks = dict(),
  weight_column = None,
  bins = 100,
  equal_frequency = True,
  range_in_std = None 
)
Creates a common binning of the given column of the given pandas.Dataframe,
and stores for each given mask the histogram of the column
@param data pandas.DataFrame  like object containing column and weight_column
@param column string identifiying the column in the pandas.DataFrame which is binned.
@param masks dictionary of names and boolean arrays, which select the data
             used for the creation of histograms with these names
@param weight_column identifiying the column in the pandas.DataFrame which is used as weight
@param bins use given bins instead of default 100
@param equal_frequency perform an equal_frequency binning
@param range_in_std show only the data in a windows around +- range_in_std * standard_deviation around the mean

Definition at line 61 of file histogram.py.

61 def __init__(self, data, column, masks=dict(), weight_column=None, bins=100, equal_frequency=True, range_in_std=None):
62 """
63 Creates a common binning of the given column of the given pandas.Dataframe,
64 and stores for each given mask the histogram of the column
65 @param data pandas.DataFrame like object containing column and weight_column
66 @param column string identifiying the column in the pandas.DataFrame which is binned.
67 @param masks dictionary of names and boolean arrays, which select the data
68 used for the creation of histograms with these names
69 @param weight_column identifiying the column in the pandas.DataFrame which is used as weight
70 @param bins use given bins instead of default 100
71 @param equal_frequency perform an equal_frequency binning
72 @param range_in_std show only the data in a windows around +- range_in_std * standard_deviation around the mean
73 """
74 isfinite = numpy.isfinite(data[column])
75 if range_in_std is not None:
76 mean, std = weighted_mean_and_std(data[column][isfinite],
77 None if weight_column is None else data[weight_column][isfinite])
78 # Everything outside mean +- range_in_std * std is considered infinite
79 isfinite = isfinite & (data[column] > (mean - range_in_std * std)) & (data[column] < (mean + range_in_std * std))
80
81 if equal_frequency:
82 if data[column][isfinite].size > 0:
83 bins = numpy.unique(numpy.percentile(data[column][isfinite], q=range(bins + 1)))
84 else:
85 print('Empty Array')
86 bins = [1]
87 # If all values are unique, we make at least one bin
88 if len(bins) == 1:
89 bins = numpy.array([bins[0]-1, bins[0]+1])
90
91
92 self.hist, self.bins = numpy.histogram(data[column][isfinite], bins=bins,
93 weights=None if weight_column is None else data[weight_column])
94
95 self.bin_centers = (self.bins + numpy.roll(self.bins, 1))[1:] / 2.0
96
97 self.bin_widths = (self.bins - numpy.roll(self.bins, 1))[1:] - 0.00001
98
99 self.hists = dict()
100 for name, mask in masks.items():
101 self.hists[name] = numpy.histogram(data[column][mask & isfinite], bins=self.bins,
102 weights=None if weight_column is None else data[weight_column][mask & isfinite])[0]
103

Member Function Documentation

◆ get_efficiency()

def get_efficiency (   self,
  signal_names 
)
Return the cumulative efficiency in each bin of the sum of the histograms with the given names.
@param  signal_names of the histograms
@return numpy.array with hist data, numpy.array with corresponding binomial errors

Definition at line 125 of file histogram.py.

125 def get_efficiency(self, signal_names):
126 """
127 Return the cumulative efficiency in each bin of the sum of the histograms with the given names.
128 @param signal_names of the histograms
129 @return numpy.array with hist data, numpy.array with corresponding binomial errors
130 """
131 signal, _ = self.get_summed_hist(signal_names)
132 cumsignal = (signal.sum() - signal.cumsum()).astype('float')
133
134 efficiency = 0
135 efficiency_error = 0
136 if signal.sum() > 0:
137 efficiency = cumsignal / signal.sum()
138 efficiency_error = binom_error(cumsignal, signal.sum())
139 return efficiency, efficiency_error
140

◆ get_false_positives()

def get_false_positives (   self,
  bckgrd_names 
)
Return the cumulative false positives in each bin of the sum of the histograms with the given names.
@param names names of the histograms
@return numpy.array with hist data, numpy.array with corresponding binomial errors

Definition at line 152 of file histogram.py.

152 def get_false_positives(self, bckgrd_names):
153 """
154 Return the cumulative false positives in each bin of the sum of the histograms with the given names.
155 @param names names of the histograms
156 @return numpy.array with hist data, numpy.array with corresponding binomial errors
157 """
158 background, _ = self.get_summed_hist(bckgrd_names)
159 cumbackground = (background.sum() - background.cumsum()).astype('float')
160 background_error = poisson_error(cumbackground)
161 return cumbackground, background_error
162

◆ get_hist()

def get_hist (   self,
  name = None 
)
Return histogram with the given name. If none returns histogram of the full data.
@param name name of the histogram
@return numpy.array with hist data, numpy.array with corresponding poisson errors

Definition at line 104 of file histogram.py.

104 def get_hist(self, name=None):
105 """
106 Return histogram with the given name. If none returns histogram of the full data.
107 @param name name of the histogram
108 @return numpy.array with hist data, numpy.array with corresponding poisson errors
109 """
110 if name is None:
111 return self.hist, poisson_error(self.hist)
112 return self.get_summed_hist([name])
113

◆ get_purity()

def get_purity (   self,
  signal_names,
  bckgrd_names 
)
Return the cumulative purity in each bin of the sum of the histograms with the given names.
@param names names of the histograms
@return numpy.array with hist data, numpy.array with corresponding binomial errors

Definition at line 163 of file histogram.py.

163 def get_purity(self, signal_names, bckgrd_names):
164 """
165 Return the cumulative purity in each bin of the sum of the histograms with the given names.
166 @param names names of the histograms
167 @return numpy.array with hist data, numpy.array with corresponding binomial errors
168 """
169 signal, _ = self.get_summed_hist(signal_names)
170 bckgrd, _ = self.get_summed_hist(bckgrd_names)
171 cumsignal = (signal.sum() - signal.cumsum()).astype('float')
172 cumbckgrd = (bckgrd.sum() - bckgrd.cumsum()).astype('float')
173
174 purity = cumsignal / (cumsignal + cumbckgrd)
175 purity_error = binom_error(cumsignal, cumsignal + cumbckgrd)
176 return purity, purity_error
177

◆ get_purity_per_bin()

def get_purity_per_bin (   self,
  signal_names,
  bckgrd_names 
)
Return the purity in each bin of the sum of the histograms with the given names.
@param names names of the histograms
@return numpy.array with hist data, numpy.array with corresponding binomial errors

Definition at line 193 of file histogram.py.

193 def get_purity_per_bin(self, signal_names, bckgrd_names):
194 """
195 Return the purity in each bin of the sum of the histograms with the given names.
196 @param names names of the histograms
197 @return numpy.array with hist data, numpy.array with corresponding binomial errors
198 """
199 signal, _ = self.get_summed_hist(signal_names)
200 bckgrd, _ = self.get_summed_hist(bckgrd_names)
201 signal = signal.astype('float')
202 bckgrd = bckgrd.astype('float')
203
204 purity = signal / (signal + bckgrd)
205 purity_error = binom_error(signal, signal + bckgrd)
206 return purity, purity_error

◆ get_signal_to_noise()

def get_signal_to_noise (   self,
  signal_names,
  bckgrd_names 
)
Return the cumulative signal to noise ratio in each bin of the sum of the histograms with the given names.
@param names names of the histograms
@return numpy.array with hist data, numpy.array with corresponding binomial errors

Definition at line 178 of file histogram.py.

178 def get_signal_to_noise(self, signal_names, bckgrd_names):
179 """
180 Return the cumulative signal to noise ratio in each bin of the sum of the histograms with the given names.
181 @param names names of the histograms
182 @return numpy.array with hist data, numpy.array with corresponding binomial errors
183 """
184 signal, _ = self.get_summed_hist(signal_names)
185 bckgrd, _ = self.get_summed_hist(bckgrd_names)
186 cumsignal = (signal.sum() - signal.cumsum()).astype('float')
187 cumbckgrd = (bckgrd.sum() - bckgrd.cumsum()).astype('float')
188
189 signal2noise = cumsignal / (cumsignal + cumbckgrd)**0.5
190 signal2noise_error = numpy.sqrt(cumsignal / (cumsignal + cumbckgrd) + (cumsignal / (2 * (cumsignal + cumbckgrd)))**2)
191 return signal2noise, signal2noise_error
192

◆ get_summed_hist()

def get_summed_hist (   self,
  names 
)
Return the sum of histograms with the given names.
@param names names of the histograms
@return numpy.array with hist data, numpy.array with corresponding poisson errors

Definition at line 114 of file histogram.py.

114 def get_summed_hist(self, names):
115 """
116 Return the sum of histograms with the given names.
117 @param names names of the histograms
118 @return numpy.array with hist data, numpy.array with corresponding poisson errors
119 """
120 default = numpy.zeros(len(self.bin_centers))
121 hist = numpy.sum(self.hists.get(v, default) for v in names)
122 hist_error = poisson_error(hist)
123 return hist, hist_error
124

◆ get_true_positives()

def get_true_positives (   self,
  signal_names 
)
Return the cumulative true positives in each bin of the sum of the histograms with the given names.
@param names names of the histograms
@return numpy.array with hist data, numpy.array with corresponding binomial errors

Definition at line 141 of file histogram.py.

141 def get_true_positives(self, signal_names):
142 """
143 Return the cumulative true positives in each bin of the sum of the histograms with the given names.
144 @param names names of the histograms
145 @return numpy.array with hist data, numpy.array with corresponding binomial errors
146 """
147 signal, _ = self.get_summed_hist(signal_names)
148 cumsignal = (signal.sum() - signal.cumsum()).astype('float')
149 signal_error = poisson_error(cumsignal)
150 return cumsignal, signal_error
151

Member Data Documentation

◆ bin_centers [1/2]

None bin_centers = None
static

Bin centers.

Definition at line 55 of file histogram.py.

◆ bin_centers [2/2]

bin_centers

bin centers

Definition at line 95 of file histogram.py.

◆ bin_widths [1/2]

None bin_widths = None
static

Bin widths.

Definition at line 57 of file histogram.py.

◆ bin_widths [2/2]

bin_widths

Subtract a small number from the bin width, otherwise the errorband plot is unstable.

Definition at line 97 of file histogram.py.

◆ bins [1/2]

None bins = None
static

Binning.

Definition at line 53 of file histogram.py.

◆ bins [2/2]

bins

create histogram

Definition at line 92 of file histogram.py.

◆ hist

None hist = None
static

Histogram of the full data.

Definition at line 51 of file histogram.py.

◆ hists [1/2]

None hists = None
static

Dictionary of histograms for the given masks.

Definition at line 59 of file histogram.py.

◆ hists [2/2]

hists

initialize empty dictionary for histograms

Definition at line 99 of file histogram.py.


The documentation for this class was generated from the following file: