Belle II Software development
Histograms Class Reference

Public Member Functions

 __init__ (self, data, column, masks=dict(), weight_column=None, bins=100, equal_frequency=True, range_in_std=None)
 
 get_hist (self, name=None)
 
 get_summed_hist (self, names)
 
 get_efficiency (self, signal_names)
 
 get_true_positives (self, signal_names)
 
 get_false_positives (self, bckgrd_names)
 
 get_purity (self, signal_names, bckgrd_names)
 
 get_signal_to_noise (self, signal_names, bckgrd_names)
 
 get_purity_per_bin (self, signal_names, bckgrd_names)
 

Static Public Attributes

 hist = None
 Histogram of the full data.
 
 bins = None
 Binning.
 
tuple bin_centers = None
 Bin centers.
 
tuple bin_widths = None
 Bin widths.
 
 hists = None
 Dictionary of histograms for the given masks.
 

Detailed Description

Extracts information from a pandas.DataFrame and stores it
in a binned format.
Therefore the size independent from the size of the pandas.DataFrame.
Used by the plotting routines below.

Definition at line 42 of file histogram.py.

Constructor & Destructor Documentation

◆ __init__()

__init__ ( self,
data,
column,
masks = dict(),
weight_column = None,
bins = 100,
equal_frequency = True,
range_in_std = None )
Creates a common binning of the given column of the given pandas.Dataframe,
and stores for each given mask the histogram of the column
@param data pandas.DataFrame  like object containing column and weight_column
@param column string identifiying the column in the pandas.DataFrame which is binned.
@param masks dictionary of names and boolean arrays, which select the data
             used for the creation of histograms with these names
@param weight_column identifiying the column in the pandas.DataFrame which is used as weight
@param bins use given bins instead of default 100
@param equal_frequency perform an equal_frequency binning
@param range_in_std show only the data in a windows around +- range_in_std * standard_deviation around the mean

Definition at line 61 of file histogram.py.

61 def __init__(self, data, column, masks=dict(), weight_column=None, bins=100, equal_frequency=True, range_in_std=None):
62 """
63 Creates a common binning of the given column of the given pandas.Dataframe,
64 and stores for each given mask the histogram of the column
65 @param data pandas.DataFrame like object containing column and weight_column
66 @param column string identifiying the column in the pandas.DataFrame which is binned.
67 @param masks dictionary of names and boolean arrays, which select the data
68 used for the creation of histograms with these names
69 @param weight_column identifiying the column in the pandas.DataFrame which is used as weight
70 @param bins use given bins instead of default 100
71 @param equal_frequency perform an equal_frequency binning
72 @param range_in_std show only the data in a windows around +- range_in_std * standard_deviation around the mean
73 """
74 isfinite = numpy.isfinite(data[column])
75 if range_in_std is not None:
76 mean, std = weighted_mean_and_std(data[column][isfinite],
77 None if weight_column is None else data[weight_column][isfinite])
78 # Everything outside mean +- range_in_std * std is considered infinite
79 isfinite = isfinite & (data[column] > (mean - range_in_std * std)) & (data[column] < (mean + range_in_std * std))
80
81 if equal_frequency:
82 if data[column][isfinite].size > 0:
83 bins = numpy.unique(numpy.percentile(data[column][isfinite], q=range(bins + 1)))
84 else:
85 print('Empty Array')
86 bins = [1]
87 # If all values are unique, we make at least one bin
88 if len(bins) == 1:
89 bins = numpy.array([bins[0]-1, bins[0]+1])
90
91
92 self.hist, self.bins = numpy.histogram(data[column][isfinite], bins=bins,
93 weights=None if weight_column is None else data[weight_column])
94
95 self.bin_centers = (self.bins + numpy.roll(self.bins, 1))[1:] / 2.0
96
97 self.bin_widths = (self.bins - numpy.roll(self.bins, 1))[1:] - 0.00001
98
99 self.hists = dict()
100 for name, mask in masks.items():
101 self.hists[name] = numpy.histogram(data[column][mask & isfinite], bins=self.bins,
102 weights=None if weight_column is None else data[weight_column][mask & isfinite])[0]
103

Member Function Documentation

◆ get_efficiency()

get_efficiency ( self,
signal_names )
Return the cumulative efficiency in each bin of the sum of the histograms with the given names.
@param  signal_names of the histograms
@return numpy.array with hist data, numpy.array with corresponding binomial errors

Definition at line 125 of file histogram.py.

125 def get_efficiency(self, signal_names):
126 """
127 Return the cumulative efficiency in each bin of the sum of the histograms with the given names.
128 @param signal_names of the histograms
129 @return numpy.array with hist data, numpy.array with corresponding binomial errors
130 """
131 signal, _ = self.get_summed_hist(signal_names)
132 cumsignal = (signal.sum() - signal.cumsum()).astype('float')
133
134 efficiency = 0
135 efficiency_error = 0
136 if signal.sum() > 0:
137 efficiency = cumsignal / signal.sum()
138 efficiency_error = binom_error(cumsignal, signal.sum())
139 return efficiency, efficiency_error
140

◆ get_false_positives()

get_false_positives ( self,
bckgrd_names )
Return the cumulative false positives in each bin of the sum of the histograms with the given names.
@param names names of the histograms
@return numpy.array with hist data, numpy.array with corresponding binomial errors

Definition at line 152 of file histogram.py.

152 def get_false_positives(self, bckgrd_names):
153 """
154 Return the cumulative false positives in each bin of the sum of the histograms with the given names.
155 @param names names of the histograms
156 @return numpy.array with hist data, numpy.array with corresponding binomial errors
157 """
158 background, _ = self.get_summed_hist(bckgrd_names)
159 cumbackground = (background.sum() - background.cumsum()).astype('float')
160 background_error = poisson_error(cumbackground)
161 return cumbackground, background_error
162

◆ get_hist()

get_hist ( self,
name = None )
Return histogram with the given name. If none returns histogram of the full data.
@param name name of the histogram
@return numpy.array with hist data, numpy.array with corresponding poisson errors

Definition at line 104 of file histogram.py.

104 def get_hist(self, name=None):
105 """
106 Return histogram with the given name. If none returns histogram of the full data.
107 @param name name of the histogram
108 @return numpy.array with hist data, numpy.array with corresponding poisson errors
109 """
110 if name is None:
111 return self.hist, poisson_error(self.hist)
112 return self.get_summed_hist([name])
113

◆ get_purity()

get_purity ( self,
signal_names,
bckgrd_names )
Return the cumulative purity in each bin of the sum of the histograms with the given names.
@param names names of the histograms
@return numpy.array with hist data, numpy.array with corresponding binomial errors

Definition at line 163 of file histogram.py.

163 def get_purity(self, signal_names, bckgrd_names):
164 """
165 Return the cumulative purity in each bin of the sum of the histograms with the given names.
166 @param names names of the histograms
167 @return numpy.array with hist data, numpy.array with corresponding binomial errors
168 """
169 signal, _ = self.get_summed_hist(signal_names)
170 bckgrd, _ = self.get_summed_hist(bckgrd_names)
171 cumsignal = (signal.sum() - signal.cumsum()).astype('float')
172 cumbckgrd = (bckgrd.sum() - bckgrd.cumsum()).astype('float')
173
174 with numpy.errstate(divide='ignore', invalid='ignore'):
175 purity = cumsignal / (cumsignal + cumbckgrd)
176 purity_error = binom_error(cumsignal, cumsignal + cumbckgrd)
177 return purity, purity_error
178

◆ get_purity_per_bin()

get_purity_per_bin ( self,
signal_names,
bckgrd_names )
Return the purity in each bin of the sum of the histograms with the given names.
@param names names of the histograms
@return numpy.array with hist data, numpy.array with corresponding binomial errors

Definition at line 195 of file histogram.py.

195 def get_purity_per_bin(self, signal_names, bckgrd_names):
196 """
197 Return the purity in each bin of the sum of the histograms with the given names.
198 @param names names of the histograms
199 @return numpy.array with hist data, numpy.array with corresponding binomial errors
200 """
201 signal, _ = self.get_summed_hist(signal_names)
202 bckgrd, _ = self.get_summed_hist(bckgrd_names)
203 signal = signal.astype('float')
204 bckgrd = bckgrd.astype('float')
205
206 with numpy.errstate(divide='ignore', invalid='ignore'):
207 purity = signal / (signal + bckgrd)
208 purity_error = binom_error(signal, signal + bckgrd)
209 return purity, purity_error

◆ get_signal_to_noise()

get_signal_to_noise ( self,
signal_names,
bckgrd_names )
Return the cumulative signal to noise ratio in each bin of the sum of the histograms with the given names.
@param names names of the histograms
@return numpy.array with hist data, numpy.array with corresponding binomial errors

Definition at line 179 of file histogram.py.

179 def get_signal_to_noise(self, signal_names, bckgrd_names):
180 """
181 Return the cumulative signal to noise ratio in each bin of the sum of the histograms with the given names.
182 @param names names of the histograms
183 @return numpy.array with hist data, numpy.array with corresponding binomial errors
184 """
185 signal, _ = self.get_summed_hist(signal_names)
186 bckgrd, _ = self.get_summed_hist(bckgrd_names)
187 cumsignal = (signal.sum() - signal.cumsum()).astype('float')
188 cumbckgrd = (bckgrd.sum() - bckgrd.cumsum()).astype('float')
189
190 with numpy.errstate(divide='ignore', invalid='ignore'):
191 signal2noise = cumsignal / (cumsignal + cumbckgrd)**0.5
192 signal2noise_error = numpy.sqrt(cumsignal * cumbckgrd) / (cumsignal + cumbckgrd)
193 return signal2noise, signal2noise_error
194

◆ get_summed_hist()

get_summed_hist ( self,
names )
Return the sum of histograms with the given names.
@param names names of the histograms
@return numpy.array with hist data, numpy.array with corresponding poisson errors

Definition at line 114 of file histogram.py.

114 def get_summed_hist(self, names):
115 """
116 Return the sum of histograms with the given names.
117 @param names names of the histograms
118 @return numpy.array with hist data, numpy.array with corresponding poisson errors
119 """
120 default = numpy.zeros(len(self.bin_centers))
121 hist = numpy.sum(self.hists.get(v, default) for v in names)
122 hist_error = poisson_error(hist)
123 return hist, hist_error
124

◆ get_true_positives()

get_true_positives ( self,
signal_names )
Return the cumulative true positives in each bin of the sum of the histograms with the given names.
@param names names of the histograms
@return numpy.array with hist data, numpy.array with corresponding binomial errors

Definition at line 141 of file histogram.py.

141 def get_true_positives(self, signal_names):
142 """
143 Return the cumulative true positives in each bin of the sum of the histograms with the given names.
144 @param names names of the histograms
145 @return numpy.array with hist data, numpy.array with corresponding binomial errors
146 """
147 signal, _ = self.get_summed_hist(signal_names)
148 cumsignal = (signal.sum() - signal.cumsum()).astype('float')
149 signal_error = poisson_error(cumsignal)
150 return cumsignal, signal_error
151

Member Data Documentation

◆ bin_centers

tuple bin_centers = None
static

Bin centers.

bin centers

Definition at line 55 of file histogram.py.

◆ bin_widths

tuple bin_widths = None
static

Bin widths.

bin centers

Subtract a small number from the bin width, otherwise the errorband plot is unstable.

Definition at line 57 of file histogram.py.

◆ bins

bins = None
static

Binning.

Definition at line 53 of file histogram.py.

◆ hist

hist = None
static

Histogram of the full data.

create histogram

Definition at line 51 of file histogram.py.

◆ hists

hists = None
static

Dictionary of histograms for the given masks.

bin centers

Subtract a small number from the bin width, otherwise the errorband plot is unstable.

initialize empty dictionary for histograms

Definition at line 59 of file histogram.py.


The documentation for this class was generated from the following file: