Belle II Software development
SVDTimeNet_Simulate.py
1
8
9
10# ## Generate a data sample for SVDTime Neural Network training
11#
12# This script generates a toy data sample for neural network training.
13# The result is stored as pickle and additional pickles contain parameters
14# of training.
15#
16# TO DO / FIX:
17# * We can make the generation faster using C++.
18# * If we stored in ROOT, we wouldn't need to keep all data in memory.
19#
20# Packages required:
21# - pandas
22# - SVDSiimBase
23# - argparse
24
25# In[1]:
26
27import argparse
28import numpy as np
29import pandas as pd
30
31from svd.SVDSimBase import dt, SampleGenerator
32
33# ### Sample generation
34#
35# Generate a pandas dataframe containing a large number of waveform samples and truth data, and pickle it.
36# The data will be used as training and test data.
37# Waveform widths (tau), amplitudes and time shifts are sampled uniformly from a large set of feasible values.
38# Additionally, waveform widths (tau) are jittered using a normal
39# distribution with 5 ns width: that is, the "true" values are slightly
40# off. This is to robustify against imprecise knowledge of waveform width
41# or shape.
42
43parser = argparse.ArgumentParser(description="Simulation of toy data for training of SVD hit time esitmator")
44
45parser.add_argument(
46 '--nsamples',
47 dest='n_samples',
48 action='store',
49 default=1000000,
50 type=int,
51 help='Size of the sample to generate'
52)
53parser.add_argument(
54 '--low-t0',
55 dest='t0_low',
56 action='store',
57 default=-2.5 * dt,
58 type=float,
59 help='Lower bound of t0 distribution, ns')
60parser.add_argument(
61 '--high-t0',
62 dest='t0_high',
63 action='store',
64 default=1.5 * dt,
65 type=float,
66 help='Upper bound of t0 distribution, ns')
67parser.add_argument(
68 '--low-tau',
69 dest='tau_low',
70 action='store',
71 default=200,
72 type=float,
73 help='Lower bound of tau distribution, ns')
74parser.add_argument(
75 '--high-tau',
76 dest='tau_high',
77 action='store',
78 default=350,
79 type=float,
80 help='Upper bound of tau distribution, ns')
81parser.add_argument(
82 '--jitter-tau',
83 dest='tau_jitter',
84 action='store',
85 default=5,
86 type=float,
87 help='RMS of tau jitter, 0 - no jitter, ns')
88parser.add_argument(
89 '--low-amp',
90 dest='amp_low',
91 action='store',
92 default=3,
93 type=float,
94 help='Lower bound of amplitude distribution, S/N units')
95parser.add_argument(
96 '--high-amp',
97 dest='amp_high',
98 action='store',
99 default=100,
100 type=float,
101 help='Upper bound of amplitude distribution, S/N units')
102parser.add_argument(
103 '--low-sigma',
104 dest='sigma_low',
105 action='store',
106 default=1,
107 type=float,
108 help='Lower bound of sigma distribution, S/N units')
109parser.add_argument(
110 '--high-sigma',
111 dest='sigma_high',
112 action='store',
113 default=5,
114 type=float,
115 help='Upper bound of sigma distribution, S/N units')
116parser.add_argument(
117 '--bin_size',
118 dest='bin_size',
119 action='store',
120 default=3,
121 type=float,
122 help='Size of t0 PDF bin, ns')
123
124args = parser.parse_args()
125
126generator = SampleGenerator(
127 (args.t0_low, args.t0_high),
128 (args.tau_low, args.tau_high),
129 (args.amp_low, args.amp_high),
130 (args.sigma_low, args.sigma_high),
131 args.tau_jitter,
132 args.bin_size)
133
134print(f'Generating {args.n_samples} samples...')
135sample = generator.generate(args.n_samples)
136
137# Create a bin table
138timearray = generator.get_t0_array()
139timebins = generator.get_t0_bins()
140bins = pd.DataFrame({
141 'midpoint': timearray,
142 'lower': timebins.values[:-1],
143 'upper': timebins.values[1:]
144})
145
146# Create a table of simulation bounds
147bounds = pd.DataFrame({
148 'value': np.array(['t0', 'amplitude', 'tau', 'sigma']),
149 'sampling': np.array(['uniform', 'uniform', 'uniform', 'uniform']),
150 'low': [
151 generator.get_t0_bounds()[0],
152 generator.get_amp_bounds()[0],
153 args.tau_low,
154 generator.get_sigma_bounds()[0]
155 ],
156 'high': [
157 generator.get_t0_bounds()[1],
158 generator.get_amp_bounds()[1],
159 args.tau_high,
160 generator.get_sigma_bounds()[1]
161 ]
162})
163orderedcols = ['value', 'sampling', 'low', 'high']
164bounds = bounds[orderedcols]
165
166print('Samples created.')
167
168output_name = 'SVDTime_Training{0}_{1}.pkl'
169
170# There will be three trees: sample, bins, bounds.
171
172sample.to_pickle(output_name.format('Sample', args.n_samples))
173bins.to_pickle(output_name.format('Bins', args.n_samples))
174bounds.to_pickle(output_name.format('Bounds', args.n_samples))
175
176print('Done.\nResults saved to {}.'.format(output_name.format(
177 '{Sample, Bins, Bounds}', args.n_samples)))