Belle II Software development
SVDSimBase.py
1#!/usr/bin/env python
2
3
10
11"""
12SVDSimBase.py
13==========
14The module contains basic functions for generation and analysis of SVD strip data.
15
16Description:
17This is a set of utility functions and constant declarations for simulation of SVD strip data
18to use in training timing networks or for other purposes. These include wave functions (gamma,
19cubic polynomial and beta-prime).
20
21Usage:
22Just import the module to use it.
23"""
24
25import numpy as np
26import pandas as pd
27from scipy.stats import norm, uniform
28
29dt = 31.44 # Time interval between APV25 samples
30threshold_cut = 3 # Samples with signal-to-noise less than 3 are zero.
31
32wexp_default_tau = 55 # ns
33
34
35def wexp(t, tau=wexp_default_tau):
36 '''
37 Gamma waveform
38 wexp(t, tau) = t/tau * exp(1-t/tau) if t > 0 else 0,
39 normalized to peak value 1.
40 t - nummpy vector of times
41 tau - (scalar) waveform decay time
42 return: numpy vector of wexp(t, tau) values at times t
43 '''
44 z = t / tau
45 return np.clip(z * np.exp(1.0 - z), 0.0, 100.0)
46
47
48w3_default_tau = 200 # ns
49
50
51def w3(t, tau=w3_default_tau):
52 ''' Cubic waveform
53 w3(t, tau) = 27/4 * t/tau * (1-t/tau)**2 if 0 < t < tau else 0,
54 normalized to peak value 1.
55 t - numpy vector of times
56 tau - (scalar) width of the distribution
57 return - numpy vector of w3(t,tau) values at times t
58 '''
59 z = np.clip(t / tau, 0, 1)
60 return 27 / 4 * z * (z - 1) * (z - 1)
61
62
63bp_default_tau = 250 # ns
64
65
66def betaprime_wave(t, tau=bp_default_tau):
67 ''' Beta-prime waveform
68 betaprime_wave(t, tau) = 149.012 * (t/tau)**2 * (1 + t/tau)**10 if t > 0 else 0
69 t - numpy vector of times
70 tau - (scalar) width of the distribution
71 return - numpy vector of betaprime_wave values of betaprime_wave at times t
72 '''
73 z = np.clip(t / tau, 0, 1000)
74 return 149.012 * z**2 / (1 + z)**10
75
76
77def test3(s, threshold):
78 '''Test for 3 consecutive non-zero APV samples
79 A six-tuple of APV25 samples is only accepted if it has 3 consecutive samples over threshold.
80 s - 6 apv samples
81 return - True if there are 3 consecutive samples over threshold, otherwise False.
82 '''
83 cons_before = np.zeros(len(s))
84 for i in range(3):
85 cons_before[i:] += (s > threshold)[0:6 - i]
86 return np.max(cons_before) >= 3
87
88
89'''
90This is the list of all configurations of over-the-threhsold samples that pass the 3-over-the-threshold-samples test.
91'''
92residual_configs = [
93 [3, 4, 5],
94 [2, 3, 4],
95 [2, 3, 4, 5],
96 [1, 3, 4, 5],
97 [1, 2, 3],
98 [1, 2, 3, 5],
99 [1, 2, 3, 4],
100 [1, 2, 3, 4, 5],
101 [0, 3, 4, 5],
102 [0, 2, 3, 4],
103 [0, 2, 3, 4, 5],
104 [0, 1, 3, 4, 5],
105 [0, 1, 2],
106 [0, 1, 2, 5],
107 [0, 1, 2, 4],
108 [0, 1, 2, 4, 5],
109 [0, 1, 2, 3],
110 [0, 1, 2, 3, 5],
111 [0, 1, 2, 3, 4],
112 [0, 1, 2, 3, 4, 5]
113]
114
115
116def gen_signal(ampl, t0, tau, sigma=1, w=betaprime_wave, tau_sigma=0.0):
117 '''Generate random sample of 6 APV signals, properly censored.
118 The produced data are normalized to sigma = 1 and the given amplitude.
119 The procedure is:
120 - real_amplitude = sigma * amlitude
121 - generate waveform, add noise, and convert to integer values
122 - divide signals by sigma
123 For large amplitudes, the function adds gaussian noise to generated waveform.
124 For amplitudes around and below the threshold, the function randomly selects a
125 subset of samples over threshold and generates over-threhold noises from left-censored
126 gaussian distributions.
127 ampl - amplitude
128 t0 - time shift
129 tau - decay time or width of the waveform
130 sigma - nosie (default 1: we work with S/N rather than raw signals)
131 tau_sigma - width of tau jitter
132 '''
133 # reconstruct amplitude
134 gen_amplitude = sigma * ampl
135 # threshold for data generration from cdf
136 gen_thr = int(sigma * threshold_cut + 1.0 - 1.0e-9) - 0.5
137 res = np.zeros(6)
138 # Add tau jitter, if desired.
139 if tau_sigma > 0:
140 tau += np.random.randn() * tau_sigma
141 res0 = gen_amplitude * w(np.linspace(-dt - t0, 4 * dt - t0, 6, endpoint=True), tau)
142 if test3(res0, threshold_cut * sigma):
143 res = (res0 + sigma * np.random.randn(6) + 0.5).astype(int)
144 # We just repeat if the configuration doesn't pass (should happen rarely)
145 while not test3(res, threshold_cut * sigma):
146 res = (res0 + sigma * np.random.randn(6) + 0.5).astype(int)
147 res[res < threshold_cut * sigma] = 0
148 else: # low-amp mode
149 # calculate probabilities of saamples above threhold
150 p_over = 1.0 - norm.cdf((gen_thr - res0) / sigma)
151 # calculate probabilities of over-threhold configurations
152 pconfs = np.array([np.prod(p_over[conf]) for conf in residual_configs])
153 pconfs /= np.sum(pconfs)
154 cconfs = np.cumsum(pconfs)
155 # select random configuration
156 u_conf = uniform.rvs()
157 i_conf = 0 # meaning configuration 0
158 while u_conf > cconfs[i_conf]:
159 i_conf += 1
160 u_res = uniform.rvs(0, 1, len(residual_configs[i_conf]))
161 res = np.zeros(6)
162 res[residual_configs[i_conf]] = res0[residual_configs[i_conf]] + \
163 sigma * norm.ppf(1 - u_res * p_over[residual_configs[i_conf]])
164 res = (res + 0.5).astype(int)
165 return res / sigma
166
167# ==============================================================================
168
169
171 # ------------------------------------------------------------------------------
172 '''
173 A simple class to encode and decode tau values for network training
174 based on amplitude and tau ranges.
175 '''
176
177 def __init__(self, amp_range, tau_range):
178 """
179 creates the class
180 """
181
182 self.amp_min, self.amp_max = amp_range
183
184 self.tau_min, self.tau_max = tau_range
185
186 self.at_ratio = (self.amp_max - self.amp_min) / (self.tau_max - self.tau_min)
187
188 def encode(self, tau):
189 """
190 encoder of tau values for network training
191 """
192 return (self.amp_min + self.at_ratio * (tau - self.tau_min))
193
194 def decode(self, etau):
195 """
196 decoder of tau values for network training
197 """
198 return (self.tau_min + 1.0 / self.at_ratio * (etau - self.amp_min))
199
200
202 """
203 This class generates a Pandas dataframe with a random sample of SVD strip signals with specified size and parameters.
204 NB:
205 1. We generate time bins from quantiles, do we want a regular grid?
206 2. Have to think of possible irregular grid.
207 """
208
209 def __init__(self, t0_bounds, tau_bounds, amplitude_bounds, sigma_bounds, tau_sigma, bin_size, wf=betaprime_wave):
210 """
211 The constructor takes the following parameters:
212 t0_bounds is a tuple, (t0_min, t0_max)
213 tau_bounds is a tuple (tau_min, tau_max)
214 amplitude_bounds is a tuple (amp_min, amp_max)
215 sigma_bounds is a tuple (sigma_min, sigma_max)
216 bin_size is the % fraction of t0_min, t0_max interval corresponding to a single output t0 bin.
217 """
218
219
220 self.t0_min, self.t0_max = t0_bounds
221
222 self.tau_min, self.tau_max = tau_bounds
223
224 self.amp_min, self.amp_max = amplitude_bounds
225
226 self.sigma_min, self.sigma_max = sigma_bounds
227
228 self.tau_coder = tau_encoder(amplitude_bounds, tau_bounds)
229
230 self.tau_sigma = tau_sigma
231
232 self.bin_size = bin_size
233
234 self.wf = wf
235
236 def get_t0_bounds(self):
237 '''
238 Get t0 bounds of sampling space
239 '''
240 return (self.t0_min, self.t0_max)
241
242 def get_amp_bounds(self):
243 '''
244 Get amplitude bounds of sampling space
245 '''
246 return (self.amp_min, self.amp_max)
247
248 def get_tau_bounds(self):
249 '''
250 Get tau bounds of sampling space
251 '''
252 return (self.tau_min, self.tau_max)
253
254 def set_tau_bounds(self, tau_min, tau_max):
255 '''
256 Set width limits for the simulation.
257 '''
258
259 self.tau_min = tau_min
260 self.tau_max = tau_max
261
262 def set_tau_sigma(self, tau_sigma):
263 '''
264 Set width jitter for the simulation.
265 '''
266 self.tau_sigma = tau_sigma
267
268 def get_tau_sigma(self):
269 '''
270 Get width jitter for the simulation.
271 '''
272 return self.tau_sigma
273
275 '''
276 Get sigma bounds
277 '''
278 return (self.sigma_min, self.sigma_max)
279
280 def generate(self, sample_size):
281 '''
282 Generate sample_size samples.
283 '''
284
285 self.n_samples = sample_size
286
287 self.stockdata = pd.DataFrame({
288 'test': np.random.uniform(size=self.n_samples),
289 't0': np.random.uniform(self.t0_min, self.t0_max, size=self.n_samples),
290 'tau': np.random.uniform(self.tau_min, self.tau_max, size=self.n_samples),
291 'sigma': np.random.uniform(self.sigma_min, self.sigma_max, size=self.n_samples),
292 'amplitude': np.random.uniform(self.amp_min, self.amp_max, size=self.n_samples),
293 's1': np.zeros(self.n_samples),
294 's2': np.zeros(self.n_samples),
295 's3': np.zeros(self.n_samples),
296 's4': np.zeros(self.n_samples),
297 's5': np.zeros(self.n_samples),
298 's6': np.zeros(self.n_samples)
299 })
300 self.stockdata['normed_tau'] = self.stockdata.apply(lambda row: self.tau_coder.encode(row.tau), axis=1)
301 orderedcols = ['test', 'amplitude', 't0', 'tau', 'sigma', 's1', 's2', 's3', 's4', 's5', 's6', 'normed_tau']
302 self.stockdata = self.stockdata[orderedcols]
303 # This is where the data are generated
304 self.stockdata[['s' + str(i) for i in range(1, 7)]] = self.stockdata.apply(lambda row: pd.Series(
305 gen_signal(row.amplitude, row.t0, row.tau, row.sigma, tau_sigma=self.tau_sigma, w=self.wf)), axis=1)
306
307 self.t0_bins = np.percentile(self.stockdata.t0, np.arange(0, 101, self.bin_size))
308 self.t0_bins[0] = self.t0_bins[0] - 0.1
309 self.t0_bins[-1] = self.t0_bins[-1] + 0.1
310 self.stockdata['t0_bin'] = np.digitize(self.stockdata.t0, self.t0_bins)
311
312 self.t0_bin_times = self.stockdata['t0'].groupby(self.stockdata.t0_bin).aggregate(np.mean)
313 abins = np.arange(self.amp_min, self.amp_max + 1, 1)
314 self.stockdata['abin'] = np.digitize(self.stockdata.amplitude, abins)
315 return self.stockdata
316
317 def get_t0_array(self):
318 '''
319 Get array of mean t0's for classifier bins
320 '''
321 return self.t0_bin_times
322
323 def get_t0_bins(self):
324 '''
325 Get array of mean t0's for classifier bins
326 '''
327 return pd.Series(self.t0_bins)
328
329
330# ==============================================================================
331# Tau (scale) conversion and encoding
332# ------------------------------------------------------------------------------
333'''
334Empirical ranges of raw waveform width values from February 2017 testbeam data.
335These values are only used for scaling and not critical.
336'''
337raw_tau_min = 27
338raw_tau_max = 45
339
340
341def tau_hao2real(hao_tau):
342 '''
343 Convert Hao's raw tau (integral, in latency units) to correct betaprime scale. Includes scaling and fit adjustment.
344 '''
345 return 3.93 / 0.50305 * hao_tau
346
347
348# ------------------------------------------------------------------------------
349
350if __name__ == '__main__':
351 print('Basic functions for simulation of SVD strip data.\nImport to use.')
def set_tau_sigma(self, tau_sigma)
Definition: SVDSimBase.py:262
def set_tau_bounds(self, tau_min, tau_max)
Definition: SVDSimBase.py:254
def __init__(self, t0_bounds, tau_bounds, amplitude_bounds, sigma_bounds, tau_sigma, bin_size, wf=betaprime_wave)
Definition: SVDSimBase.py:209
t0_bin_times
undocumented variable
Definition: SVDSimBase.py:312
t0_bins
undocumented variable
Definition: SVDSimBase.py:307
def generate(self, sample_size)
Definition: SVDSimBase.py:280
def decode(self, etau)
Definition: SVDSimBase.py:194
def __init__(self, amp_range, tau_range)
Definition: SVDSimBase.py:177