Belle II Software  release-05-01-25
SVDSimBase.py
1 #!/usr/bin/env python
2 # coding: utf8
3 
4 """
5 SVDSimBase.py
6 ==========
7 The module contains basic functions for generation and analysis of SVD strip data.
8 
9 :author: Peter Kvasnicka
10 
11 Description:
12 This is a set of utility functions and constant declarations for simulation of SVD strip data
13 to use in training timing networks or for other purposes. These include wave functions (gamma,
14 cubic polynomial and beta-prime).
15 
16 Usage:
17 Just import the module to use it.
18 """
19 
20 import math
21 import numpy as np
22 import pandas as pd
23 from scipy.stats import norm, uniform
24 
25 dt = 31.44 # Time interval between APV25 samples
26 threshold_cut = 3 # Samples with signal-to-noise less than 3 are zero.
27 
28 wexp_default_tau = 55 # ns
29 
30 
31 def wexp(t, tau=wexp_default_tau):
32  '''
33  Gamma waveform
34  wexp(t, tau) = t/tau * exp(1-t/tau) if t > 0 else 0,
35  normalized to peak value 1.
36  t - nummpy vector of times
37  tau - (scalar) waveform decay time
38  return: numpy vector of wexp(t, tau) values at times t
39  '''
40  z = t / tau
41  return np.clip(z * np.exp(1.0 - z), 0.0, 100.0)
42 
43 
44 w3_default_tau = 200 # ns
45 
46 
47 def w3(t, tau=w3_default_tau):
48  ''' Cubic waveform
49  w3(t, tau) = 27/4 * t/tau * (1-t/tau)**2 if 0 < t < tau else 0,
50  normalized to peak value 1.
51  t - numpy vector of times
52  tau - (scalar) width of the distribution
53  return - numpy vector of w3(t,tau) values at times t
54  '''
55  z = np.clip(t / tau, 0, 1)
56  return 27 / 4 * z * (z - 1) * (z - 1)
57 
58 
59 bp_default_tau = 250 # ns
60 
61 
62 def betaprime_wave(t, tau=bp_default_tau):
63  ''' Beta-prime waveform
64  betaprime_wave(t, tau) = 149.012 * (t/tau)**2 * (1 + t/tau)**10 if t > 0 else 0
65  t - numpy vector of times
66  tau - (scalar) width of the distribution
67  return - numpy vector of betaprime_wave values of betaprime_wave at times t
68  '''
69  z = np.clip(t / tau, 0, 1000)
70  return 149.012 * z**2 / (1 + z)**10
71 
72 
73 def test3(s, threshold):
74  '''Test for 3 consecutive non-zero APV samples
75  A six-tuple of APV25 samples is only accepted if it has 3 consecutive samples over threshold.
76  s - 6 apv samples
77  return - True if there are 3 consecutive samples over threshold, otherwise False.
78  '''
79  cons_before = np.zeros(len(s))
80  for i in range(3):
81  cons_before[i:] += (s > threshold)[0:6 - i]
82  return np.max(cons_before) >= 3
83 
84 
85 '''
86 This is the list of all configurations of over-the-threhsold samples that pass the 3-over-the-threshold-samples test.
87 '''
88 residual_configs = [
89  [3, 4, 5],
90  [2, 3, 4],
91  [2, 3, 4, 5],
92  [1, 3, 4, 5],
93  [1, 2, 3],
94  [1, 2, 3, 5],
95  [1, 2, 3, 4],
96  [1, 2, 3, 4, 5],
97  [0, 3, 4, 5],
98  [0, 2, 3, 4],
99  [0, 2, 3, 4, 5],
100  [0, 1, 3, 4, 5],
101  [0, 1, 2],
102  [0, 1, 2, 5],
103  [0, 1, 2, 4],
104  [0, 1, 2, 4, 5],
105  [0, 1, 2, 3],
106  [0, 1, 2, 3, 5],
107  [0, 1, 2, 3, 4],
108  [0, 1, 2, 3, 4, 5]
109 ]
110 
111 
112 def gen_signal(ampl, t0, tau, sigma=1, w=betaprime_wave, tau_sigma=0.0):
113  '''Generate random sample of 6 APV signals, properly censored.
114  The produced data are normalized to sigma = 1 and the given amplitude.
115  The procedure is:
116  - real_amplitude = sigma * amlitude
117  - generate waveform, add noise, and convert to integer values
118  - divide signals by sigma
119  For large amplitudes, the function adds gaussian noise to generated waveform.
120  For amplitudes around and below the threshold, the function randomly selects a
121  subset of samples over threshold and generates over-threhold noises from left-censored
122  gaussian distributions.
123  ampl - amplitude
124  t0 - time shift
125  tau - decay time or width of the waveform
126  sigma - nosie (default 1: we work with S/N rather than raw signals)
127  tau_sigma - width of tau jitter
128  '''
129  # reconstruct amplitude
130  gen_amplitude = sigma * ampl
131  # threshold for data generration from cdf
132  gen_thr = int(sigma * threshold_cut + 1.0 - 1.0e-9) - 0.5
133  res = np.zeros(6)
134  # Add tau jitter, if desired.
135  if tau_sigma > 0:
136  tau += np.random.randn() * tau_sigma
137  res0 = gen_amplitude * w(np.linspace(-dt - t0, 4 * dt - t0, 6, endpoint=True), tau)
138  if test3(res0, threshold_cut * sigma):
139  res = (res0 + sigma * np.random.randn(6) + 0.5).astype(int)
140  # We just repeat if the configuration doesn't pass (should happen rarely)
141  while not test3(res, threshold_cut * sigma):
142  res = (res0 + sigma * np.random.randn(6) + 0.5).astype(int)
143  res[res < threshold_cut * sigma] = 0
144  else: # low-amp mode
145  # calculate probabilities of saamples above threhold
146  p_over = 1.0 - norm.cdf((gen_thr - res0) / sigma)
147  # calculate probabilities of over-threhold configurations
148  pconfs = np.array([np.prod(p_over[conf]) for conf in residual_configs])
149  pconfs /= np.sum(pconfs)
150  cconfs = np.cumsum(pconfs)
151  # select random configuration
152  u_conf = uniform.rvs()
153  i_conf = 0 # meaning configuration 0
154  while u_conf > cconfs[i_conf]:
155  i_conf += 1
156  u_res = uniform.rvs(0, 1, len(residual_configs[i_conf]))
157  res = np.zeros(6)
158  res[residual_configs[i_conf]] = res0[residual_configs[i_conf]] + \
159  sigma * norm.ppf(1 - u_res * p_over[residual_configs[i_conf]])
160  res = (res + 0.5).astype(int)
161  return res / sigma
162 
163 # ==============================================================================
164 
165 
167  # ------------------------------------------------------------------------------
168  '''
169  A simple class to encode and decode tau values for network training
170  based on amplitude and tau ranges.
171  '''
172 
173  def __init__(self, amp_range, tau_range):
174  """
175  creates the class
176  """
177  self.amp_min, self.amp_max = amp_range
178  self.tau_min, self.tau_max = tau_range
179  self.at_ratio = (self.amp_max - self.amp_min) / (self.tau_max - self.tau_min)
180 
181  def encode(self, tau):
182  """
183  encoder of tau values for network training
184  """
185  return (self.amp_min + self.at_ratio * (tau - self.tau_min))
186 
187  def decode(self, etau):
188  """
189  decoder of tau values for network training
190  """
191  return (self.tau_min + 1.0 / self.at_ratio * (etau - self.amp_min))
192 
193 
195  """
196  This class generates a Pandas dataframe with a random sample of SVD strip signals with specified size and parameters.
197  NB:
198  1. We generate time bins from quantiles, do we want a regular grid?
199  2. Have to think of possible irregular grid.
200  """
201 
202  def __init__(self, t0_bounds, tau_bounds, amplitude_bounds, sigma_bounds, tau_sigma, bin_size, wf=betaprime_wave):
203  """
204  The constructor takes the following parameters:
205  t0_bounds is a tuple, (t0_min, t0_max)
206  tau_bounds is a tuple (tau_min, tau_max)
207  amplitude_bounds is a tuple (amp_min, amp_max)
208  sigma_bounds is a tuple (sigma_min, sigma_max)
209  bin_size is the % fraction of t0_min, t0_max interval corresponding to a single output t0 bin.
210  """
211 
212  self.t0_min, self.t0_max = t0_bounds
213  self.tau_min, self.tau_max = tau_bounds
214  self.amp_min, self.amp_max = amplitude_bounds
215  self.sigma_min, self.sigma_max = sigma_bounds
216  self.tau_coder = tau_encoder(amplitude_bounds, tau_bounds)
217  self.tau_sigma = tau_sigma
218  self.bin_size = bin_size
219  self.wf = wf
220 
221  def get_t0_bounds(self):
222  '''
223  Get t0 bounds of sampling space
224  '''
225  return (self.t0_min, self.t0_max)
226 
227  def get_amp_bounds(self):
228  '''
229  Get amplitude bounds of sampling space
230  '''
231  return (self.amp_min, self.amp_max)
232 
233  def get_tau_bounds(self):
234  '''
235  Get tau bounds of sampling space
236  '''
237  return (self.tau_min, self.tau_max)
238 
239  def set_tau_bounds(self, tau_min, tau_max):
240  '''
241  Set width limits for the simulation.
242  '''
243  self.tau_min = tau_min
244  self.tau_max = tau_max
245 
246  def set_tau_sigma(self, tau_sigma):
247  '''
248  Set width jitter for the simulation.
249  '''
250  self.tau_sigma = tau_sigma
251 
252  def get_tau_sigma(self):
253  '''
254  Get width jitter for the simulation.
255  '''
256  return self.tau_sigma
257 
258  def get_sigma_bounds(self):
259  '''
260  Get sigma bounds
261  '''
262  return (self.sigma_min, self.sigma_max)
263 
264  def generate(self, sample_size):
265  '''
266  Generate sample_size samples.
267  '''
268  self.n_samples = sample_size
269  self.stockdata = pd.DataFrame({
270  'test': np.random.uniform(size=self.n_samples),
271  't0': np.random.uniform(self.t0_min, self.t0_max, size=self.n_samples),
272  'tau': np.random.uniform(self.tau_min, self.tau_max, size=self.n_samples),
273  'sigma': np.random.uniform(self.sigma_min, self.sigma_max, size=self.n_samples),
274  'amplitude': np.random.uniform(self.amp_min, self.amp_max, size=self.n_samples),
275  's1': np.zeros(self.n_samples),
276  's2': np.zeros(self.n_samples),
277  's3': np.zeros(self.n_samples),
278  's4': np.zeros(self.n_samples),
279  's5': np.zeros(self.n_samples),
280  's6': np.zeros(self.n_samples)
281  })
282  self.stockdata['normed_tau'] = self.stockdata.apply(lambda row: self.tau_coder.encode(row.tau), axis=1)
283  orderedcols = ['test', 'amplitude', 't0', 'tau', 'sigma', 's1', 's2', 's3', 's4', 's5', 's6', 'normed_tau']
284  self.stockdata = self.stockdata[orderedcols]
285  # This is where the data are generated
286  self.stockdata[['s' + str(i) for i in range(1, 7)]] = self.stockdata.apply(lambda row: pd.Series(
287  gen_signal(row.amplitude, row.t0, row.tau, row.sigma, tau_sigma=self.tau_sigma, w=self.wf)), axis=1)
288 
289  self.t0_bins = np.percentile(self.stockdata.t0, np.arange(0, 101, self.bin_size))
290  self.t0_bins[0] = self.t0_bins[0] - 0.1
291  self.t0_bins[-1] = self.t0_bins[-1] + 0.1
292  self.stockdata['t0_bin'] = np.digitize(self.stockdata.t0, self.t0_bins)
293 
294  self.t0_bin_times = self.stockdata['t0'].groupby(self.stockdata.t0_bin).aggregate(np.mean)
295  abins = np.arange(self.amp_min, self.amp_max + 1, 1)
296  self.stockdata['abin'] = np.digitize(self.stockdata.amplitude, abins)
297  return self.stockdata
298 
299  def get_t0_array(self):
300  '''
301  Get array of mean t0's for classifier bins
302  '''
303  return self.t0_bin_times
304 
305  def get_t0_bins(self):
306  '''
307  Get array of mean t0's for classifier bins
308  '''
309  return pd.Series(self.t0_bins)
310 
311 
312 # ==============================================================================
313 # Tau (scale) conversion and encoding
314 # ------------------------------------------------------------------------------
315 '''
316 Empirical ranges of raw waveform width values from February 2017 testbeam data.
317 These values are only used for scaling and not critical.
318 '''
319 raw_tau_min = 27
320 raw_tau_max = 45
321 
322 
323 def tau_hao2real(hao_tau):
324  '''
325  Convert Hao's raw tau (integral, in latency units) to correct betaprime scale. Includes scaling and fit adjustment.
326  '''
327  return 3.93 / 0.50305 * hao_tau
328 
329 
330 # ------------------------------------------------------------------------------
331 
332 if __name__ == '__main__':
333  print('Basic functions for simulation of SVD strip data.\nImport to use.')
svd.SVDSimBase.SampleGenerator.amp_max
amp_max
Definition: SVDSimBase.py:214
svd.SVDSimBase.SampleGenerator.get_amp_bounds
def get_amp_bounds(self)
Definition: SVDSimBase.py:227
svd.SVDSimBase.SampleGenerator.t0_bins
t0_bins
undocumented variable
Definition: SVDSimBase.py:289
svd.SVDSimBase.tau_encoder.encode
def encode(self, tau)
Definition: SVDSimBase.py:181
svd.SVDSimBase.SampleGenerator.set_tau_bounds
def set_tau_bounds(self, tau_min, tau_max)
Definition: SVDSimBase.py:239
svd.SVDSimBase.SampleGenerator.stockdata
stockdata
Definition: SVDSimBase.py:269
svd.SVDSimBase.tau_encoder.tau_max
tau_max
Definition: SVDSimBase.py:178
svd.SVDSimBase.SampleGenerator.set_tau_sigma
def set_tau_sigma(self, tau_sigma)
Definition: SVDSimBase.py:246
svd.SVDSimBase.SampleGenerator.generate
def generate(self, sample_size)
Definition: SVDSimBase.py:264
svd.SVDSimBase.SampleGenerator.sigma_max
sigma_max
Definition: SVDSimBase.py:215
svd.SVDSimBase.SampleGenerator
Definition: SVDSimBase.py:194
svd.SVDSimBase.SampleGenerator.get_t0_array
def get_t0_array(self)
Definition: SVDSimBase.py:299
svd.SVDSimBase.SampleGenerator.__init__
def __init__(self, t0_bounds, tau_bounds, amplitude_bounds, sigma_bounds, tau_sigma, bin_size, wf=betaprime_wave)
Definition: SVDSimBase.py:202
svd.SVDSimBase.tau_encoder.at_ratio
at_ratio
Definition: SVDSimBase.py:179
svd.SVDSimBase.SampleGenerator.get_sigma_bounds
def get_sigma_bounds(self)
Definition: SVDSimBase.py:258
svd.SVDSimBase.SampleGenerator.get_t0_bins
def get_t0_bins(self)
Definition: SVDSimBase.py:305
svd.SVDSimBase.SampleGenerator.t0_max
t0_max
Definition: SVDSimBase.py:212
svd.SVDSimBase.SampleGenerator.get_tau_bounds
def get_tau_bounds(self)
Definition: SVDSimBase.py:233
svd.SVDSimBase.tau_encoder.__init__
def __init__(self, amp_range, tau_range)
Definition: SVDSimBase.py:173
svd.SVDSimBase.tau_encoder.decode
def decode(self, etau)
Definition: SVDSimBase.py:187
svd.SVDSimBase.SampleGenerator.t0_bin_times
t0_bin_times
undocumented variable
Definition: SVDSimBase.py:294
svd.SVDSimBase.SampleGenerator.tau_coder
tau_coder
Definition: SVDSimBase.py:216
svd.SVDSimBase.tau_encoder
Definition: SVDSimBase.py:166
svd.SVDSimBase.SampleGenerator.tau_max
tau_max
Definition: SVDSimBase.py:213
svd.SVDSimBase.SampleGenerator.get_t0_bounds
def get_t0_bounds(self)
Definition: SVDSimBase.py:221
svd.SVDSimBase.SampleGenerator.wf
wf
Definition: SVDSimBase.py:219
svd.SVDSimBase.SampleGenerator.tau_sigma
tau_sigma
Definition: SVDSimBase.py:217
svd.SVDSimBase.SampleGenerator.n_samples
n_samples
Definition: SVDSimBase.py:268
svd.SVDSimBase.SampleGenerator.tau_min
tau_min
Definition: SVDSimBase.py:243
svd.SVDSimBase.SampleGenerator.get_tau_sigma
def get_tau_sigma(self)
Definition: SVDSimBase.py:252
svd.SVDSimBase.tau_encoder.amp_max
amp_max
Definition: SVDSimBase.py:177
svd.SVDSimBase.SampleGenerator.bin_size
bin_size
Definition: SVDSimBase.py:218