Belle II Software development
ReweighterParticle Class Reference

Public Member Functions

str get_varname (self, str varname)
 
list get_binning_variables (self)
 
list get_pdg_variables (self)
 
None generate_variations (self, int n_variations, np.ndarray rho_sys=None, np.ndarray rho_stat=None)
 
np.ndarray get_covariance (self, int n_variations, np.ndarray rho_sys=None, np.ndarray rho_stat=None)
 
str __str__ (self)
 
def plot_coverage (self, fig=None, axs=None)
 

Public Attributes

 type
 Add the mcPDG code requirement for PID particle.
 
 column_names
 Names of the varied weight columns.
 

Static Public Attributes

str prefix
 Prefix of the particle in the ntuple.
 
str type
 Type of the particle (PID or FEI)
 
pd merged_table .DataFrame
 Merged table of the weights.
 
dict pdg_binning
 Kinematic binning of the weight table per particle.
 
dict variable_aliases
 Variable aliases of the weight table.
 
str weight_name
 Weight column name that will be added to the ntuple.
 
list column_names = None
 Internal list of the names of the weight columns.
 
int sys_seed = None
 Random seed for systematics.
 
np cov = None
 Covariance matrix corresponds to the total uncertainty.
 
bool syscorr = True
 When true assume systematics are 100% correlated.
 
float coverage = None
 Coverage of the user ntuple.
 
dict plot_values = None
 Values for the plots.
 

Detailed Description

Class that stores the information of a particle.

Definition at line 34 of file sysvar.py.

Member Function Documentation

◆ __str__()

str __str__ (   self)
Converts the object to a string.

Definition at line 154 of file sysvar.py.

154 def __str__(self) -> str:
155 """
156 Converts the object to a string.
157 """
158 separator = '------------------'
159 title = 'ReweighterParticle'
160 prefix_str = f'Type: {self.type} Prefix: {self.prefix}'
161 columns = _weight_cols
162 merged_table_str = f'Merged table:\n{self.merged_table[columns].describe()}'
163 pdg_binning_str = 'PDG binning:\n'
164 for pdgs in self.pdg_binning:
165 pdg_binning_str += f'{pdgs}: {self.pdg_binning[pdgs]}\n'
166 return '\n'.join([separator, title, prefix_str, merged_table_str, pdg_binning_str]) + separator
167

◆ generate_variations()

None generate_variations (   self,
int  n_variations,
np.ndarray   rho_sys = None,
np.ndarray   rho_stat = None 
)
Generates variations of weights according to the uncertainties

Definition at line 102 of file sysvar.py.

105 rho_stat: np.ndarray = None) -> None:
106 """
107 Generates variations of weights according to the uncertainties
108 """
109 self.merged_table['stat_error'] = self.merged_table[["data_MC_uncertainty_stat_up",
110 "data_MC_uncertainty_stat_dn"]].max(axis=1)
111 self.merged_table['sys_error'] = self.merged_table[["data_MC_uncertainty_sys_up",
112 "data_MC_uncertainty_sys_dn"]].max(axis=1)
113 self.merged_table["error"] = np.sqrt(self.merged_table["stat_error"] ** 2 + self.merged_table["sys_error"] ** 2)
114 means = self.merged_table["data_MC_ratio"].values
115
116 self.column_names = [f"{self.weight_name}_{i}" for i in range(n_variations)]
117 cov = self.get_covariance(n_variations, rho_sys, rho_stat)
118 weights = cov + means
119 self.merged_table[self.weight_name] = self.merged_table["data_MC_ratio"]
120 self.merged_table[self.column_names] = weights.T
121 self.column_names.insert(0, self.weight_name)
122

◆ get_binning_variables()

list get_binning_variables (   self)
Returns the list of variables that are used for the binning

Definition at line 85 of file sysvar.py.

85 def get_binning_variables(self) -> list:
86 """
87 Returns the list of variables that are used for the binning
88 """
89 variables = set(sum([list(d.keys()) for d in self.pdg_binning.values()], []))
90 return [f'{self.get_varname(var)}' for var in variables]
91

◆ get_covariance()

np.ndarray get_covariance (   self,
int  n_variations,
np.ndarray   rho_sys = None,
np.ndarray   rho_stat = None 
)
Returns the covariance matrix of the weights

Definition at line 123 of file sysvar.py.

126 rho_stat: np.ndarray = None) -> np.ndarray:
127 """
128 Returns the covariance matrix of the weights
129 """
130 len_means = len(self.merged_table["data_MC_ratio"])
131 zeros = np.zeros(len_means)
132 if self.cov is None:
133 if rho_sys is None:
134 if self.syscorr:
135 rho_sys = np.ones((len_means, len_means))
136 else:
137 rho_sys = np.identity(len_means)
138 if rho_stat is None:
139 rho_stat = np.identity(len_means)
140 sys_cov = np.matmul(
141 np.matmul(np.diag(self.merged_table['sys_error']), rho_sys), np.diag(self.merged_table['sys_error'])
142 )
143 stat_cov = np.matmul(
144 np.matmul(np.diag(self.merged_table['stat_error']), rho_stat), np.diag(self.merged_table['stat_error'])
145 )
146 np.random.seed(self.sys_seed)
147 sys = np.random.multivariate_normal(zeros, sys_cov, n_variations)
148 np.random.seed(None)
149 stat = np.random.multivariate_normal(zeros, stat_cov, n_variations)
150 return sys + stat
151 errors = np.random.multivariate_normal(zeros, self.cov, n_variations)
152 return errors
153

◆ get_pdg_variables()

list get_pdg_variables (   self)
Returns the list of variables that are used for the PDG codes

Definition at line 92 of file sysvar.py.

92 def get_pdg_variables(self) -> list:
93 """
94 Returns the list of variables that are used for the PDG codes
95 """
96 pdg_vars = ['PDG']
97
98 if self.type == "PID":
99 pdg_vars += ['mcPDG']
100 return [f'{self.get_varname(var)}' for var in pdg_vars]
101

◆ get_varname()

str get_varname (   self,
str  varname 
)
Returns the variable name with the prefix and use alias if defined.

Definition at line 74 of file sysvar.py.

74 def get_varname(self, varname: str) -> str:
75 """
76 Returns the variable name with the prefix and use alias if defined.
77 """
78 name = varname
79 if self.variable_aliases and varname in self.variable_aliases:
80 name = self.variable_aliases[varname]
81 if name.startswith(self.prefix):
82 return name
83 return f'{self.prefix}{name}'
84

◆ plot_coverage()

def plot_coverage (   self,
  fig = None,
  axs = None 
)
Plots the coverage of the ntuple.

Definition at line 168 of file sysvar.py.

168 def plot_coverage(self, fig=None, axs=None):
169 """
170 Plots the coverage of the ntuple.
171 """
172 if self.plot_values is None:
173 return
174 vars = set(sum([list(d.keys()) for d in self.plot_values.values()], []))
175 if fig is None:
176 fig, axs = plt.subplots(len(self.plot_values), len(vars), figsize=(5*len(vars), 3*len(self.plot_values)), dpi=120)
177 axs = np.array(axs)
178 if len(axs.shape) < 1:
179 axs = axs.reshape(len(self.plot_values), len(vars))
180 bin_plt = {'linewidth': 3, 'linestyle': '--', 'color': '0.5'}
181 fig.suptitle(f'{self.type} particle {self.prefix.strip("_")}')
182 for (reco_pdg, mc_pdg), ax_row in zip(self.plot_values, axs):
183 for var, ax in zip(self.plot_values[(reco_pdg, mc_pdg)], ax_row):
184 ymin = 0
185 ymax = self.plot_values[(reco_pdg, mc_pdg)][var][1].max()*1.1
186 # Plot binning
187 if self.type == 'PID':
188 ax.vlines(self.pdg_binning[(reco_pdg, mc_pdg)][var], ymin, ymax,
189 label='Binning',
190 alpha=0.8,
191 **bin_plt)
192 elif self.type == 'FEI':
193 values = np.array([int(val[4:]) for val in self.pdg_binning[(reco_pdg, mc_pdg)][var]])
194 ax.bar(values+0.5,
195 np.ones(len(values))*ymax,
196 width=1,
197 alpha=0.5,
198 label='Binning',
199 **bin_plt)
200 rest = np.setdiff1d(self.plot_values[(reco_pdg, mc_pdg)][var][0], values)
201 ax.bar(rest+0.5,
202 np.ones(len(rest))*ymax,
203 width=1,
204 alpha=0.2,
205 label='Rest category',
206 **bin_plt)
207 # Plot values
208 widths = (self.plot_values[(reco_pdg, mc_pdg)][var][0][1:] - self.plot_values[(reco_pdg, mc_pdg)][var][0][:-1])
209 centers = self.plot_values[(reco_pdg, mc_pdg)][var][0][:-1] + widths/2
210 ax.bar(centers,
211 self.plot_values[(reco_pdg, mc_pdg)][var][1],
212 width=widths,
213 label='Values',
214 alpha=0.8)
215 ax.set_title(f'True {pdg.to_name(mc_pdg)} to reco {pdg.to_name(reco_pdg)} coverage')
216 ax.set_xlabel(var)
217 axs[-1][-1].legend()
218 fig.tight_layout()
219 return fig, axs
220
221

Member Data Documentation

◆ column_names [1/2]

list column_names = None
static

Internal list of the names of the weight columns.

Definition at line 57 of file sysvar.py.

◆ column_names [2/2]

column_names

Names of the varied weight columns.

Definition at line 116 of file sysvar.py.

◆ cov

np cov = None
static

Covariance matrix corresponds to the total uncertainty.

Definition at line 63 of file sysvar.py.

◆ coverage

float coverage = None
static

Coverage of the user ntuple.

Definition at line 69 of file sysvar.py.

◆ merged_table

pd merged_table .DataFrame
static

Merged table of the weights.

Definition at line 45 of file sysvar.py.

◆ pdg_binning

dict pdg_binning
static

Kinematic binning of the weight table per particle.

Definition at line 48 of file sysvar.py.

◆ plot_values

dict plot_values = None
static

Values for the plots.

Definition at line 72 of file sysvar.py.

◆ prefix

str prefix
static

Prefix of the particle in the ntuple.

Definition at line 39 of file sysvar.py.

◆ sys_seed

int sys_seed = None
static

Random seed for systematics.

Definition at line 60 of file sysvar.py.

◆ syscorr

bool syscorr = True
static

When true assume systematics are 100% correlated.

Definition at line 66 of file sysvar.py.

◆ type [1/2]

str type
static

Type of the particle (PID or FEI)

Definition at line 42 of file sysvar.py.

◆ type [2/2]

type

Add the mcPDG code requirement for PID particle.

Definition at line 98 of file sysvar.py.

◆ variable_aliases

dict variable_aliases
static

Variable aliases of the weight table.

Definition at line 51 of file sysvar.py.

◆ weight_name

str weight_name
static

Weight column name that will be added to the ntuple.

Definition at line 54 of file sysvar.py.


The documentation for this class was generated from the following file: