Belle II Software development
ReweighterParticle Class Reference

Public Member Functions

str get_varname (self, str varname)
 
list get_binning_variables (self)
 
list get_pdg_variables (self)
 
None generate_variations (self, int n_variations, np.ndarray rho_sys=None, np.ndarray rho_stat=None)
 
np.ndarray get_covariance (self, int n_variations, np.ndarray rho_sys=None, np.ndarray rho_stat=None)
 
str __str__ (self)
 
def plot_coverage (self, fig=None, axs=None)
 

Public Attributes

 type
 Add the mcPDG code requirement for PID particle.
 
 column_names
 Names of the varied weight columns.
 

Static Public Attributes

str prefix
 Prefix of the particle in the ntuple.
 
str type
 Type of the particle (PID or FEI)
 
pd merged_table .DataFrame
 Merged table of the weights.
 
dict pdg_binning
 Kinematic binning of the weight table per particle.
 
dict variable_aliases
 Variable aliases of the weight table.
 
str weight_name
 Weight column name that will be added to the ntuple.
 
list column_names = None
 Internal list of the names of the weight columns.
 
int sys_seed = None
 Random seed for systematics.
 
np cov = None
 Covariance matrix corresponds to the total uncertainty.
 
bool syscorr = True
 When true assume systematics are 100% correlated.
 
float coverage = None
 Coverage of the user ntuple.
 
dict plot_values = None
 Values for the plots.
 

Detailed Description

Class that stores the information of a particle.

Definition at line 36 of file sysvar.py.

Member Function Documentation

◆ __str__()

str __str__ (   self)
Converts the object to a string.

Definition at line 156 of file sysvar.py.

156 def __str__(self) -> str:
157 """
158 Converts the object to a string.
159 """
160 separator = '------------------'
161 title = 'ReweighterParticle'
162 prefix_str = f'Type: {self.type} Prefix: {self.prefix}'
163 columns = _weight_cols
164 merged_table_str = f'Merged table:\n{self.merged_table[columns].describe()}'
165 pdg_binning_str = 'PDG binning:\n'
166 for pdgs in self.pdg_binning:
167 pdg_binning_str += f'{pdgs}: {self.pdg_binning[pdgs]}\n'
168 return '\n'.join([separator, title, prefix_str, merged_table_str, pdg_binning_str]) + separator
169

◆ generate_variations()

None generate_variations (   self,
int  n_variations,
np.ndarray   rho_sys = None,
np.ndarray   rho_stat = None 
)
Generates variations of weights according to the uncertainties

Definition at line 104 of file sysvar.py.

107 rho_stat: np.ndarray = None) -> None:
108 """
109 Generates variations of weights according to the uncertainties
110 """
111 self.merged_table['stat_error'] = self.merged_table[["data_MC_uncertainty_stat_up",
112 "data_MC_uncertainty_stat_dn"]].max(axis=1)
113 self.merged_table['sys_error'] = self.merged_table[["data_MC_uncertainty_sys_up",
114 "data_MC_uncertainty_sys_dn"]].max(axis=1)
115 self.merged_table["error"] = np.sqrt(self.merged_table["stat_error"] ** 2 + self.merged_table["sys_error"] ** 2)
116 means = self.merged_table["data_MC_ratio"].values
117
118 self.column_names = [f"{self.weight_name}_{i}" for i in range(n_variations)]
119 cov = self.get_covariance(n_variations, rho_sys, rho_stat)
120 weights = cov + means
121 self.merged_table[self.weight_name] = self.merged_table["data_MC_ratio"]
122 self.merged_table[self.column_names] = weights.T
123 self.column_names.insert(0, self.weight_name)
124

◆ get_binning_variables()

list get_binning_variables (   self)
Returns the list of variables that are used for the binning

Definition at line 87 of file sysvar.py.

87 def get_binning_variables(self) -> list:
88 """
89 Returns the list of variables that are used for the binning
90 """
91 variables = set(sum([list(d.keys()) for d in self.pdg_binning.values()], []))
92 return [f'{self.get_varname(var)}' for var in variables]
93

◆ get_covariance()

np.ndarray get_covariance (   self,
int  n_variations,
np.ndarray   rho_sys = None,
np.ndarray   rho_stat = None 
)
Returns the covariance matrix of the weights

Definition at line 125 of file sysvar.py.

128 rho_stat: np.ndarray = None) -> np.ndarray:
129 """
130 Returns the covariance matrix of the weights
131 """
132 len_means = len(self.merged_table["data_MC_ratio"])
133 zeros = np.zeros(len_means)
134 if self.cov is None:
135 if rho_sys is None:
136 if self.syscorr:
137 rho_sys = np.ones((len_means, len_means))
138 else:
139 rho_sys = np.identity(len_means)
140 if rho_stat is None:
141 rho_stat = np.identity(len_means)
142 sys_cov = np.matmul(
143 np.matmul(np.diag(self.merged_table['sys_error']), rho_sys), np.diag(self.merged_table['sys_error'])
144 )
145 stat_cov = np.matmul(
146 np.matmul(np.diag(self.merged_table['stat_error']), rho_stat), np.diag(self.merged_table['stat_error'])
147 )
148 np.random.seed(self.sys_seed)
149 sys = np.random.multivariate_normal(zeros, sys_cov, n_variations)
150 np.random.seed(None)
151 stat = np.random.multivariate_normal(zeros, stat_cov, n_variations)
152 return sys + stat
153 errors = np.random.multivariate_normal(zeros, self.cov, n_variations)
154 return errors
155

◆ get_pdg_variables()

list get_pdg_variables (   self)
Returns the list of variables that are used for the PDG codes

Definition at line 94 of file sysvar.py.

94 def get_pdg_variables(self) -> list:
95 """
96 Returns the list of variables that are used for the PDG codes
97 """
98 pdg_vars = ['PDG']
99
100 if self.type == "PID":
101 pdg_vars += ['mcPDG']
102 return [f'{self.get_varname(var)}' for var in pdg_vars]
103

◆ get_varname()

str get_varname (   self,
str  varname 
)
Returns the variable name with the prefix and use alias if defined.

Definition at line 76 of file sysvar.py.

76 def get_varname(self, varname: str) -> str:
77 """
78 Returns the variable name with the prefix and use alias if defined.
79 """
80 name = varname
81 if self.variable_aliases and varname in self.variable_aliases:
82 name = self.variable_aliases[varname]
83 if name.startswith(self.prefix):
84 return name
85 return f'{self.prefix}{name}'
86

◆ plot_coverage()

def plot_coverage (   self,
  fig = None,
  axs = None 
)
Plots the coverage of the ntuple.

Definition at line 170 of file sysvar.py.

170 def plot_coverage(self, fig=None, axs=None):
171 """
172 Plots the coverage of the ntuple.
173 """
174 if self.plot_values is None:
175 return
176 vars = set(sum([list(d.keys()) for d in self.plot_values.values()], []))
177 if fig is None:
178 fig, axs = plt.subplots(len(self.plot_values), len(vars), figsize=(5*len(vars), 3*len(self.plot_values)), dpi=120)
179 axs = np.array(axs)
180 if len(axs.shape) < 1:
181 axs = axs.reshape(len(self.plot_values), len(vars))
182 bin_plt = {'linewidth': 3, 'linestyle': '--', 'color': '0.5'}
183 fig.suptitle(f'{self.type} particle {self.prefix.strip("_")}')
184 for (reco_pdg, mc_pdg), ax_row in zip(self.plot_values, axs):
185 for var, ax in zip(self.plot_values[(reco_pdg, mc_pdg)], ax_row):
186 ymin = 0
187 ymax = self.plot_values[(reco_pdg, mc_pdg)][var][1].max()*1.1
188 # Plot binning
189 if self.type == 'PID':
190 ax.vlines(self.pdg_binning[(reco_pdg, mc_pdg)][var], ymin, ymax,
191 label='Binning',
192 alpha=0.8,
193 **bin_plt)
194 elif self.type == 'FEI':
195 values = np.array([int(val[4:]) for val in self.pdg_binning[(reco_pdg, mc_pdg)][var]])
196 ax.bar(values+0.5,
197 np.ones(len(values))*ymax,
198 width=1,
199 alpha=0.5,
200 label='Binning',
201 **bin_plt)
202 rest = np.setdiff1d(self.plot_values[(reco_pdg, mc_pdg)][var][0], values)
203 ax.bar(rest+0.5,
204 np.ones(len(rest))*ymax,
205 width=1,
206 alpha=0.2,
207 label='Rest category',
208 **bin_plt)
209 # Plot values
210 widths = (self.plot_values[(reco_pdg, mc_pdg)][var][0][1:] - self.plot_values[(reco_pdg, mc_pdg)][var][0][:-1])
211 centers = self.plot_values[(reco_pdg, mc_pdg)][var][0][:-1] + widths/2
212 ax.bar(centers,
213 self.plot_values[(reco_pdg, mc_pdg)][var][1],
214 width=widths,
215 label='Values',
216 alpha=0.8)
217 ax.set_title(f'True {pdg.to_name(mc_pdg)} to reco {pdg.to_name(reco_pdg)} coverage')
218 ax.set_xlabel(var)
219 axs[-1][-1].legend()
220 fig.tight_layout()
221 return fig, axs
222
223

Member Data Documentation

◆ column_names [1/2]

list column_names = None
static

Internal list of the names of the weight columns.

Definition at line 59 of file sysvar.py.

◆ column_names [2/2]

column_names

Names of the varied weight columns.

Definition at line 118 of file sysvar.py.

◆ cov

np cov = None
static

Covariance matrix corresponds to the total uncertainty.

Definition at line 65 of file sysvar.py.

◆ coverage

float coverage = None
static

Coverage of the user ntuple.

Definition at line 71 of file sysvar.py.

◆ merged_table

pd merged_table .DataFrame
static

Merged table of the weights.

Definition at line 47 of file sysvar.py.

◆ pdg_binning

dict pdg_binning
static

Kinematic binning of the weight table per particle.

Definition at line 50 of file sysvar.py.

◆ plot_values

dict plot_values = None
static

Values for the plots.

Definition at line 74 of file sysvar.py.

◆ prefix

str prefix
static

Prefix of the particle in the ntuple.

Definition at line 41 of file sysvar.py.

◆ sys_seed

int sys_seed = None
static

Random seed for systematics.

Definition at line 62 of file sysvar.py.

◆ syscorr

bool syscorr = True
static

When true assume systematics are 100% correlated.

Definition at line 68 of file sysvar.py.

◆ type [1/2]

str type
static

Type of the particle (PID or FEI)

Definition at line 44 of file sysvar.py.

◆ type [2/2]

type

Add the mcPDG code requirement for PID particle.

Definition at line 100 of file sysvar.py.

◆ variable_aliases

dict variable_aliases
static

Variable aliases of the weight table.

Definition at line 53 of file sysvar.py.

◆ weight_name

str weight_name
static

Weight column name that will be added to the ntuple.

Definition at line 56 of file sysvar.py.


The documentation for this class was generated from the following file: