Belle II Software development
ReweighterParticle Class Reference

Public Member Functions

str get_varname (self, str varname)
 
list get_binning_variables (self)
 
list get_pdg_variables (self)
 
None generate_variations (self, int n_variations, np.ndarray rho_sys=None, np.ndarray rho_stat=None)
 
np.ndarray get_covariance (self, int n_variations, np.ndarray rho_sys=None, np.ndarray rho_stat=None)
 
str __str__ (self)
 
 plot_coverage (self, fig=None, axs=None)
 

Public Attributes

 variable_aliases
 Variable aliases of the weight table.
 
 prefix = self.variable_aliases[varname]
 Prefix of the particle in the ntuple.
 
str type = "PID":
 Add the mcPDG code requirement for PID particle.
 
 weight_name = weights.T
 Weight column name that will be added to the ntuple.
 
 pdg_binning
 Kinematic binning of the weight table per particle.
 

Static Public Attributes

pd merged_table .DataFrame
 Type of the particle (PID or FEI)
 
list column_names = None
 Internal list of the names of the weight columns.
 
int sys_seed = None
 Random seed for systematics.
 
np cov = None
 Covariance matrix corresponds to the total uncertainty.
 
bool syscorr = True
 When true assume systematics are 100% correlated.
 
float coverage = None
 Coverage of the user ntuple.
 
dict plot_values = None
 Values for the plots.
 

Detailed Description

Class that stores the information of a particle.

Definition at line 36 of file sysvar.py.

Member Function Documentation

◆ __str__()

str __str__ ( self)
Converts the object to a string.

Definition at line 160 of file sysvar.py.

160 def __str__(self) -> str:
161 """
162 Converts the object to a string.
163 """
164 separator = '------------------'
165 title = 'ReweighterParticle'
166 prefix_str = f'Type: {self.type} Prefix: {self.prefix}'
167 columns = _weight_cols
168 merged_table_str = f'Merged table:\n{self.merged_table[columns].describe()}'
169 pdg_binning_str = 'PDG binning:\n'
170 for pdgs in self.pdg_binning:
171 pdg_binning_str += f'{pdgs}: {self.pdg_binning[pdgs]}\n'
172 return '\n'.join([separator, title, prefix_str, merged_table_str, pdg_binning_str]) + separator
173

◆ generate_variations()

None generate_variations ( self,
int n_variations,
np.ndarray rho_sys = None,
np.ndarray rho_stat = None )
Generates variations of weights according to the uncertainties

Definition at line 108 of file sysvar.py.

111 rho_stat: np.ndarray = None) -> None:
112 """
113 Generates variations of weights according to the uncertainties
114 """
115 self.merged_table['stat_error'] = self.merged_table[["data_MC_uncertainty_stat_up",
116 "data_MC_uncertainty_stat_dn"]].max(axis=1)
117 self.merged_table['sys_error'] = self.merged_table[["data_MC_uncertainty_sys_up",
118 "data_MC_uncertainty_sys_dn"]].max(axis=1)
119 self.merged_table["error"] = np.sqrt(self.merged_table["stat_error"] ** 2 + self.merged_table["sys_error"] ** 2)
120 means = self.merged_table["data_MC_ratio"].values
121
122 self.column_names = [f"{self.weight_name}_{i}" for i in range(n_variations)]
123 cov = self.get_covariance(n_variations, rho_sys, rho_stat)
124 weights = cov + means
125 self.merged_table[self.weight_name] = self.merged_table["data_MC_ratio"]
126 self.merged_table[self.column_names] = weights.T
127 self.column_names.insert(0, self.weight_name)
128

◆ get_binning_variables()

list get_binning_variables ( self)
Returns the list of variables that are used for the binning

Definition at line 91 of file sysvar.py.

91 def get_binning_variables(self) -> list:
92 """
93 Returns the list of variables that are used for the binning
94 """
95 variables = set(sum([list(d.keys()) for d in self.pdg_binning.values()], []))
96 return [f'{self.get_varname(var)}' for var in variables]
97

◆ get_covariance()

np.ndarray get_covariance ( self,
int n_variations,
np.ndarray rho_sys = None,
np.ndarray rho_stat = None )
Returns the covariance matrix of the weights

Definition at line 129 of file sysvar.py.

132 rho_stat: np.ndarray = None) -> np.ndarray:
133 """
134 Returns the covariance matrix of the weights
135 """
136 len_means = len(self.merged_table["data_MC_ratio"])
137 zeros = np.zeros(len_means)
138 if self.cov is None:
139 if rho_sys is None:
140 if self.syscorr:
141 rho_sys = np.ones((len_means, len_means))
142 else:
143 rho_sys = np.identity(len_means)
144 if rho_stat is None:
145 rho_stat = np.identity(len_means)
146 sys_cov = np.matmul(
147 np.matmul(np.diag(self.merged_table['sys_error']), rho_sys), np.diag(self.merged_table['sys_error'])
148 )
149 stat_cov = np.matmul(
150 np.matmul(np.diag(self.merged_table['stat_error']), rho_stat), np.diag(self.merged_table['stat_error'])
151 )
152 np.random.seed(self.sys_seed)
153 sys = np.random.multivariate_normal(zeros, sys_cov, n_variations)
154 np.random.seed(None)
155 stat = np.random.multivariate_normal(zeros, stat_cov, n_variations)
156 return sys + stat
157 errors = np.random.multivariate_normal(zeros, self.cov, n_variations)
158 return errors
159

◆ get_pdg_variables()

list get_pdg_variables ( self)
Returns the list of variables that are used for the PDG codes

Definition at line 98 of file sysvar.py.

98 def get_pdg_variables(self) -> list:
99 """
100 Returns the list of variables that are used for the PDG codes
101 """
102 pdg_vars = ['PDG']
103
104 if self.type == "PID":
105 pdg_vars += ['mcPDG']
106 return [f'{self.get_varname(var)}' for var in pdg_vars]
107

◆ get_varname()

str get_varname ( self,
str varname )
Returns the variable name with the prefix and use alias if defined.

Definition at line 80 of file sysvar.py.

80 def get_varname(self, varname: str) -> str:
81 """
82 Returns the variable name with the prefix and use alias if defined.
83 """
84 name = varname
85 if self.variable_aliases and varname in self.variable_aliases:
86 name = self.variable_aliases[varname]
87 if name.startswith(self.prefix):
88 return name
89 return f'{self.prefix}{name}'
90

◆ plot_coverage()

plot_coverage ( self,
fig = None,
axs = None )
Plots the coverage of the ntuple.

Definition at line 174 of file sysvar.py.

174 def plot_coverage(self, fig=None, axs=None):
175 """
176 Plots the coverage of the ntuple.
177 """
178 if self.plot_values is None:
179 return
180 vars = set(sum([list(d.keys()) for d in self.plot_values.values()], []))
181 if fig is None:
182 fig, axs = plt.subplots(len(self.plot_values), len(vars), figsize=(5*len(vars), 3*len(self.plot_values)), dpi=120)
183 axs = np.array(axs)
184 if len(axs.shape) < 1:
185 axs = axs.reshape(len(self.plot_values), len(vars))
186 bin_plt = {'linewidth': 3, 'linestyle': '--', 'color': '0.5'}
187 fig.suptitle(f'{self.type} particle {self.prefix.strip("_")}')
188 for (reco_pdg, mc_pdg), ax_row in zip(self.plot_values, axs):
189 for var, ax in zip(self.plot_values[(reco_pdg, mc_pdg)], ax_row):
190 ymin = 0
191 ymax = self.plot_values[(reco_pdg, mc_pdg)][var][1].max()*1.1
192 # Plot binning
193 if self.type == 'PID':
194 ax.vlines(self.pdg_binning[(reco_pdg, mc_pdg)][var], ymin, ymax,
195 label='Binning',
196 alpha=0.8,
197 **bin_plt)
198 elif self.type == 'FEI':
199 values = np.array([int(val[4:]) for val in self.pdg_binning[(reco_pdg, mc_pdg)][var]])
200 ax.bar(values+0.5,
201 np.ones(len(values))*ymax,
202 width=1,
203 alpha=0.5,
204 label='Binning',
205 **bin_plt)
206 rest = np.setdiff1d(self.plot_values[(reco_pdg, mc_pdg)][var][0], values)
207 ax.bar(rest+0.5,
208 np.ones(len(rest))*ymax,
209 width=1,
210 alpha=0.2,
211 label='Rest category',
212 **bin_plt)
213 # Plot values
214 widths = (self.plot_values[(reco_pdg, mc_pdg)][var][0][1:] - self.plot_values[(reco_pdg, mc_pdg)][var][0][:-1])
215 centers = self.plot_values[(reco_pdg, mc_pdg)][var][0][:-1] + widths/2
216 ax.bar(centers,
217 self.plot_values[(reco_pdg, mc_pdg)][var][1],
218 width=widths,
219 label='Values',
220 alpha=0.8)
221 ax.set_title(f'True {pdg.to_name(mc_pdg)} to reco {pdg.to_name(reco_pdg)} coverage')
222 ax.set_xlabel(var)
223 axs[-1][-1].legend()
224 fig.tight_layout()
225 return fig, axs
226
227

Member Data Documentation

◆ column_names

list column_names = None
static

Internal list of the names of the weight columns.

Definition at line 63 of file sysvar.py.

◆ cov

np cov = None
static

Covariance matrix corresponds to the total uncertainty.

Definition at line 69 of file sysvar.py.

◆ coverage

float coverage = None
static

Coverage of the user ntuple.

Definition at line 75 of file sysvar.py.

◆ merged_table

pd merged_table .DataFrame
static

Type of the particle (PID or FEI)

Merged table of the weights

Definition at line 48 of file sysvar.py.

◆ pdg_binning

pdg_binning

Kinematic binning of the weight table per particle.

Definition at line 170 of file sysvar.py.

◆ plot_values

plot_values = None
static

Values for the plots.

Definition at line 78 of file sysvar.py.

◆ prefix

prefix = self.variable_aliases[varname]

Prefix of the particle in the ntuple.

Definition at line 87 of file sysvar.py.

◆ sys_seed

sys_seed = None
static

Random seed for systematics.

Definition at line 66 of file sysvar.py.

◆ syscorr

bool syscorr = True
static

When true assume systematics are 100% correlated.

Definition at line 72 of file sysvar.py.

◆ type

str type = "PID":

Add the mcPDG code requirement for PID particle.

Definition at line 104 of file sysvar.py.

◆ variable_aliases

variable_aliases

Variable aliases of the weight table.

Definition at line 85 of file sysvar.py.

◆ weight_name

weight_name = weights.T

Weight column name that will be added to the ntuple.

Definition at line 127 of file sysvar.py.


The documentation for this class was generated from the following file: