Belle II Software  release-08-01-10
SVDTimeNet_Simulate.py
1 
8 
9 # coding: utf-8
10 
11 # ## Generate a data sample for SVDTime Neural Network training
12 #
13 # This script generates a toy data sample for neural network training.
14 # The result is stored as pickle and additional pickles contain parameters
15 # of training.
16 #
17 # TO DO / FIX:
18 # * We can make the generation faster using C++.
19 # * If we stored in ROOT, we wouldn't need to keep all data in memory.
20 #
21 # Packages required:
22 # - pandas
23 # - SVDSiimBase
24 # - argparse
25 
26 # In[1]:
27 
28 import argparse
29 import numpy as np
30 import pandas as pd
31 
32 from svd.SVDSimBase import dt, SampleGenerator
33 
34 # ### Sample generation
35 #
36 # Generate a pandas dataframe containing a large number of waveform samples and truth data, and pickle it.
37 # The data will be used as training and test data.
38 # Waveform widths (tau), amplitudes and time shifts are sampled uniformly from a large set of feasible values.
39 # Additionally, waveform widths (tau) are jittered using a normal
40 # distribution with 5 ns width: that is, the "true" values are slightly
41 # off. This is to robustify against imprecise knowledge of waveform width
42 # or shape.
43 
44 parser = argparse.ArgumentParser(description="Simulation of toy data for training of SVD hit time esitmator")
45 
46 parser.add_argument(
47  '--nsamples',
48  dest='n_samples',
49  action='store',
50  default=1000000,
51  type=int,
52  help='Size of the sample to generate'
53 )
54 parser.add_argument(
55  '--low-t0',
56  dest='t0_low',
57  action='store',
58  default=-2.5 * dt,
59  type=float,
60  help='Lower bound of t0 distribution, ns')
61 parser.add_argument(
62  '--high-t0',
63  dest='t0_high',
64  action='store',
65  default=1.5 * dt,
66  type=float,
67  help='Upper bound of t0 distribution, ns')
68 parser.add_argument(
69  '--low-tau',
70  dest='tau_low',
71  action='store',
72  default=200,
73  type=float,
74  help='Lower bound of tau distribution, ns')
75 parser.add_argument(
76  '--high-tau',
77  dest='tau_high',
78  action='store',
79  default=350,
80  type=float,
81  help='Upper bound of tau distribution, ns')
82 parser.add_argument(
83  '--jitter-tau',
84  dest='tau_jitter',
85  action='store',
86  default=5,
87  type=float,
88  help='RMS of tau jitter, 0 - no jitter, ns')
89 parser.add_argument(
90  '--low-amp',
91  dest='amp_low',
92  action='store',
93  default=3,
94  type=float,
95  help='Lower bound of amplitude distribution, S/N units')
96 parser.add_argument(
97  '--high-amp',
98  dest='amp_high',
99  action='store',
100  default=100,
101  type=float,
102  help='Upper bound of amplitude distribution, S/N units')
103 parser.add_argument(
104  '--low-sigma',
105  dest='sigma_low',
106  action='store',
107  default=1,
108  type=float,
109  help='Lower bound of sigma distribution, S/N units')
110 parser.add_argument(
111  '--high-sigma',
112  dest='sigma_high',
113  action='store',
114  default=5,
115  type=float,
116  help='Upper bound of sigma distribution, S/N units')
117 parser.add_argument(
118  '--bin_size',
119  dest='bin_size',
120  action='store',
121  default=3,
122  type=float,
123  help='Size of t0 PDF bin, ns')
124 
125 args = parser.parse_args()
126 
127 generator = SampleGenerator(
128  (args.t0_low, args.t0_high),
129  (args.tau_low, args.tau_high),
130  (args.amp_low, args.amp_high),
131  (args.sigma_low, args.sigma_high),
132  args.tau_jitter,
133  args.bin_size)
134 
135 print('Generating {0} samples...'.format(args.n_samples))
136 sample = generator.generate(args.n_samples)
137 
138 # Create a bin table
139 timearray = generator.get_t0_array()
140 timebins = generator.get_t0_bins()
141 bins = pd.DataFrame({
142  'midpoint': timearray,
143  'lower': timebins.values[:-1],
144  'upper': timebins.values[1:]
145 })
146 
147 # Create a table of simulation bounds
148 bounds = pd.DataFrame({
149  'value': np.array(['t0', 'amplitude', 'tau', 'sigma']),
150  'sampling': np.array(['uniform', 'uniform', 'uniform', 'uniform']),
151  'low': [
152  generator.get_t0_bounds()[0],
153  generator.get_amp_bounds()[0],
154  args.tau_low,
155  generator.get_sigma_bounds()[0]
156  ],
157  'high': [
158  generator.get_t0_bounds()[1],
159  generator.get_amp_bounds()[1],
160  args.tau_high,
161  generator.get_sigma_bounds()[1]
162  ]
163 })
164 orderedcols = ['value', 'sampling', 'low', 'high']
165 bounds = bounds[orderedcols]
166 
167 print('Samples created.')
168 
169 output_name = 'SVDTime_Training{0}_{1}.pkl'
170 
171 # There will be three trees: sample, bins, bounds.
172 
173 sample.to_pickle(output_name.format('Sample', args.n_samples))
174 bins.to_pickle(output_name.format('Bins', args.n_samples))
175 bounds.to_pickle(output_name.format('Bounds', args.n_samples))
176 
177 print('Done.\nResults saved to {0}.'.format(output_name.format(
178  '{Sample, Bins, Bounds}', args.n_samples)))