Belle II Software light-2601-hyperion
ReweighterParticle Class Reference

Public Member Functions

str get_varname (self, str varname)
 
list get_binning_variables (self)
 
list get_pdg_variables (self)
 
None generate_variations (self, int n_variations, np.ndarray rho_sys=None, np.ndarray rho_stat=None)
 
np.ndarray get_covariance (self, int n_variations, np.ndarray rho_sys=None, np.ndarray rho_stat=None)
 
str __str__ (self)
 
 plot_coverage (self, fig=None, axs=None)
 

Public Attributes

 variable_aliases
 Variable aliases of the weight table.
 
 prefix = self.variable_aliases[varname]
 Prefix of the particle in the ntuple.
 
str type = "PID":
 Add the mcPDG code requirement for PID particle.
 
 weight_name = weights.T
 Weight column name that will be added to the ntuple.
 
 pdg_binning
 Kinematic binning of the weight table per particle.
 

Static Public Attributes

pd merged_table .DataFrame
 Type of the particle (PID or FEI)
 
list column_names = None
 Internal list of the names of the weight columns.
 
int sys_seed = None
 Random seed for systematics.
 
np cov = None
 Covariance matrix corresponds to the total uncertainty.
 
bool syscorr = True
 When true assume systematics are 100% correlated.
 
float coverage = None
 Coverage of the user ntuple.
 
dict plot_values = None
 Values for the plots.
 

Detailed Description

Class that stores the information of a particle.

Definition at line 45 of file sysvar.py.

Member Function Documentation

◆ __str__()

str __str__ ( self)
Converts the object to a string.

Definition at line 169 of file sysvar.py.

169 def __str__(self) -> str:
170 """
171 Converts the object to a string.
172 """
173 separator = '------------------'
174 title = 'ReweighterParticle'
175 prefix_str = f'Type: {self.type} Prefix: {self.prefix}'
176 columns = _weight_cols
177 merged_table_str = f'Merged table:\n{self.merged_table[columns].describe()}'
178 pdg_binning_str = 'PDG binning:\n'
179 for pdgs in self.pdg_binning:
180 pdg_binning_str += f'{pdgs}: {self.pdg_binning[pdgs]}\n'
181 return '\n'.join([separator, title, prefix_str, merged_table_str, pdg_binning_str]) + separator
182

◆ generate_variations()

None generate_variations ( self,
int n_variations,
np.ndarray rho_sys = None,
np.ndarray rho_stat = None )
Generates variations of weights according to the uncertainties

Definition at line 117 of file sysvar.py.

120 rho_stat: np.ndarray = None) -> None:
121 """
122 Generates variations of weights according to the uncertainties
123 """
124 self.merged_table['stat_error'] = self.merged_table[["data_MC_uncertainty_stat_up",
125 "data_MC_uncertainty_stat_dn"]].max(axis=1)
126 self.merged_table['sys_error'] = self.merged_table[["data_MC_uncertainty_sys_up",
127 "data_MC_uncertainty_sys_dn"]].max(axis=1)
128 self.merged_table["error"] = np.sqrt(self.merged_table["stat_error"] ** 2 + self.merged_table["sys_error"] ** 2)
129 means = self.merged_table["data_MC_ratio"].values
130
131 self.column_names = [f"{self.weight_name}_{i}" for i in range(n_variations)]
132 cov = self.get_covariance(n_variations, rho_sys, rho_stat)
133 weights = cov + means
134 self.merged_table[self.weight_name] = self.merged_table["data_MC_ratio"]
135 self.merged_table[self.column_names] = weights.T
136 self.column_names.insert(0, self.weight_name)
137

◆ get_binning_variables()

list get_binning_variables ( self)
Returns the list of variables that are used for the binning

Definition at line 100 of file sysvar.py.

100 def get_binning_variables(self) -> list:
101 """
102 Returns the list of variables that are used for the binning
103 """
104 variables = set(sum([list(d.keys()) for d in self.pdg_binning.values()], []))
105 return [f'{self.get_varname(var)}' for var in variables]
106

◆ get_covariance()

np.ndarray get_covariance ( self,
int n_variations,
np.ndarray rho_sys = None,
np.ndarray rho_stat = None )
Returns the covariance matrix of the weights

Definition at line 138 of file sysvar.py.

141 rho_stat: np.ndarray = None) -> np.ndarray:
142 """
143 Returns the covariance matrix of the weights
144 """
145 len_means = len(self.merged_table["data_MC_ratio"])
146 zeros = np.zeros(len_means)
147 if self.cov is None:
148 if rho_sys is None:
149 if self.syscorr:
150 rho_sys = np.ones((len_means, len_means))
151 else:
152 rho_sys = np.identity(len_means)
153 if rho_stat is None:
154 rho_stat = np.identity(len_means)
155 sys_cov = np.matmul(
156 np.matmul(np.diag(self.merged_table['sys_error']), rho_sys), np.diag(self.merged_table['sys_error'])
157 )
158 stat_cov = np.matmul(
159 np.matmul(np.diag(self.merged_table['stat_error']), rho_stat), np.diag(self.merged_table['stat_error'])
160 )
161 np.random.seed(self.sys_seed)
162 sys = np.random.multivariate_normal(zeros, sys_cov, n_variations)
163 np.random.seed(None)
164 stat = np.random.multivariate_normal(zeros, stat_cov, n_variations)
165 return sys + stat
166 errors = np.random.multivariate_normal(zeros, self.cov, n_variations)
167 return errors
168

◆ get_pdg_variables()

list get_pdg_variables ( self)
Returns the list of variables that are used for the PDG codes

Definition at line 107 of file sysvar.py.

107 def get_pdg_variables(self) -> list:
108 """
109 Returns the list of variables that are used for the PDG codes
110 """
111 pdg_vars = ['PDG']
112
113 if self.type == "PID":
114 pdg_vars += ['mcPDG']
115 return [f'{self.get_varname(var)}' for var in pdg_vars]
116

◆ get_varname()

str get_varname ( self,
str varname )
Returns the variable name with the prefix and use alias if defined.

Definition at line 89 of file sysvar.py.

89 def get_varname(self, varname: str) -> str:
90 """
91 Returns the variable name with the prefix and use alias if defined.
92 """
93 name = varname
94 if self.variable_aliases and varname in self.variable_aliases:
95 name = self.variable_aliases[varname]
96 if name.startswith(self.prefix):
97 return name
98 return f'{self.prefix}{name}'
99

◆ plot_coverage()

plot_coverage ( self,
fig = None,
axs = None )
Plots the coverage of the ntuple.

Definition at line 183 of file sysvar.py.

183 def plot_coverage(self, fig=None, axs=None):
184 """
185 Plots the coverage of the ntuple.
186 """
187 if self.plot_values is None:
188 return
189 vars = set(sum([list(d.keys()) for d in self.plot_values.values()], []))
190 if fig is None:
191 fig, axs = plt.subplots(len(self.plot_values), len(vars), figsize=(5*len(vars), 3*len(self.plot_values)), dpi=120)
192 axs = np.array(axs)
193 if len(axs.shape) < 1:
194 axs = axs.reshape(len(self.plot_values), len(vars))
195 bin_plt = {'linewidth': 3, 'linestyle': '--', 'color': '0.5'}
196 fig.suptitle(f'{self.type} particle {self.prefix.strip("_")}')
197 for (reco_pdg, mc_pdg), ax_row in zip(self.plot_values, axs):
198 for var, ax in zip(self.plot_values[(reco_pdg, mc_pdg)], ax_row):
199 ymin = 0
200 ymax = self.plot_values[(reco_pdg, mc_pdg)][var][1].max()*1.1
201 # Plot binning
202 if self.type == 'PID':
203 ax.vlines(self.pdg_binning[(reco_pdg, mc_pdg)][var], ymin, ymax,
204 label='Binning',
205 alpha=0.8,
206 **bin_plt)
207 elif self.type == 'FEI':
208 values = np.array([int(val[4:]) for val in self.pdg_binning[(reco_pdg, mc_pdg)][var]])
209 ax.bar(values+0.5,
210 np.ones(len(values))*ymax,
211 width=1,
212 alpha=0.5,
213 label='Binning',
214 **bin_plt)
215 rest = np.setdiff1d(self.plot_values[(reco_pdg, mc_pdg)][var][0], values)
216 ax.bar(rest+0.5,
217 np.ones(len(rest))*ymax,
218 width=1,
219 alpha=0.2,
220 label='Rest category',
221 **bin_plt)
222 # Plot values
223 widths = (self.plot_values[(reco_pdg, mc_pdg)][var][0][1:] - self.plot_values[(reco_pdg, mc_pdg)][var][0][:-1])
224 centers = self.plot_values[(reco_pdg, mc_pdg)][var][0][:-1] + widths/2
225 ax.bar(centers,
226 self.plot_values[(reco_pdg, mc_pdg)][var][1],
227 width=widths,
228 label='Values',
229 alpha=0.8)
230 ax.set_title(f'True {pdg.to_name(mc_pdg)} to reco {pdg.to_name(reco_pdg)} coverage')
231 ax.set_xlabel(var)
232 axs[-1][-1].legend()
233 fig.tight_layout()
234 return fig, axs
235
236

Member Data Documentation

◆ column_names

list column_names = None
static

Internal list of the names of the weight columns.

Definition at line 72 of file sysvar.py.

◆ cov

np cov = None
static

Covariance matrix corresponds to the total uncertainty.

Definition at line 78 of file sysvar.py.

◆ coverage

float coverage = None
static

Coverage of the user ntuple.

Definition at line 84 of file sysvar.py.

◆ merged_table

pd merged_table .DataFrame
static

Type of the particle (PID or FEI)

Merged table of the weights

Definition at line 57 of file sysvar.py.

◆ pdg_binning

pdg_binning

Kinematic binning of the weight table per particle.

Definition at line 179 of file sysvar.py.

◆ plot_values

plot_values = None
static

Values for the plots.

Definition at line 87 of file sysvar.py.

◆ prefix

prefix = self.variable_aliases[varname]

Prefix of the particle in the ntuple.

Definition at line 96 of file sysvar.py.

◆ sys_seed

sys_seed = None
static

Random seed for systematics.

Definition at line 75 of file sysvar.py.

◆ syscorr

bool syscorr = True
static

When true assume systematics are 100% correlated.

Definition at line 81 of file sysvar.py.

◆ type

str type = "PID":

Add the mcPDG code requirement for PID particle.

Definition at line 113 of file sysvar.py.

◆ variable_aliases

variable_aliases

Variable aliases of the weight table.

Definition at line 94 of file sysvar.py.

◆ weight_name

weight_name = weights.T

Weight column name that will be added to the ntuple.

Definition at line 136 of file sysvar.py.


The documentation for this class was generated from the following file: