Belle II Software  release-05-02-19
SVDTimeNet_Simulate.py
1 
2 # coding: utf-8
3 
4 # ## Generate a data sample for SVDTime Neural Network training
5 #
6 # This script generates a toy data sample for neural network training.
7 # The result is stored as pickle and additional pickles contain parameters
8 # of training.
9 #
10 # TO DO / FIX:
11 # * We can make the generation faster using C++.
12 # * If we stored in ROOT, we wouldn't need to keep all data in memory.
13 #
14 # Packages required:
15 # - pandas
16 # - SVDSiimBase
17 # - argparse
18 
19 # In[1]:
20 
21 import pandas as pd
22 from svd.SVDSimBase import *
23 import argparse
24 
25 # ### Sample generation
26 #
27 # Generate a pandas dataframe containing a large number of waveform samples and truth data, and pickle it.
28 # The data will be used as training and test data.
29 # Waveform widths (tau), amplitudes and time shifts are sampled uniformly from a large set of feasible values.
30 # Additionally, waveform widths (tau) are jittered using a normal
31 # distribution with 5 ns width: that is, the "true" values are slightly
32 # off. This is to robustify against imprecise knowledge of waveform width
33 # or shape.
34 
35 parser = argparse.ArgumentParser(description="Simulation of toy data for training of SVD hit time esitmator")
36 
37 parser.add_argument(
38  '--nsamples',
39  dest='n_samples',
40  action='store',
41  default=1000000,
42  type=int,
43  help='Size of the sample to generate'
44 )
45 parser.add_argument(
46  '--low-t0',
47  dest='t0_low',
48  action='store',
49  default=-2.5 * dt,
50  type=float,
51  help='Lower bound of t0 distribution, ns')
52 parser.add_argument(
53  '--high-t0',
54  dest='t0_high',
55  action='store',
56  default=1.5 * dt,
57  type=float,
58  help='Upper bound of t0 distribution, ns')
59 parser.add_argument(
60  '--low-tau',
61  dest='tau_low',
62  action='store',
63  default=200,
64  type=float,
65  help='Lower bound of tau distribution, ns')
66 parser.add_argument(
67  '--high-tau',
68  dest='tau_high',
69  action='store',
70  default=350,
71  type=float,
72  help='Upper bound of tau distribution, ns')
73 parser.add_argument(
74  '--jitter-tau',
75  dest='tau_jitter',
76  action='store',
77  default=5,
78  type=float,
79  help='RMS of tau jitter, 0 - no jitter, ns')
80 parser.add_argument(
81  '--low-amp',
82  dest='amp_low',
83  action='store',
84  default=3,
85  type=float,
86  help='Lower bound of amplitude distribution, S/N units')
87 parser.add_argument(
88  '--high-amp',
89  dest='amp_high',
90  action='store',
91  default=100,
92  type=float,
93  help='Upper bound of amplitude distribution, S/N units')
94 parser.add_argument(
95  '--low-sigma',
96  dest='sigma_low',
97  action='store',
98  default=1,
99  type=float,
100  help='Lower bound of sigma distribution, S/N units')
101 parser.add_argument(
102  '--high-sigma',
103  dest='sigma_high',
104  action='store',
105  default=5,
106  type=float,
107  help='Upper bound of sigma distribution, S/N units')
108 parser.add_argument(
109  '--bin_size',
110  dest='bin_size',
111  action='store',
112  default=3,
113  type=float,
114  help='Size of t0 PDF bin, ns')
115 
116 args = parser.parse_args()
117 
118 generator = SampleGenerator(
119  (args.t0_low, args.t0_high),
120  (args.tau_low, args.tau_high),
121  (args.amp_low, args.amp_high),
122  (args.sigma_low, args.sigma_high),
123  args.tau_jitter,
124  args.bin_size)
125 
126 print('Generating {0} samples...'.format(args.n_samples))
127 sample = generator.generate(args.n_samples)
128 
129 # Create a bin table
130 timearray = generator.get_t0_array()
131 timebins = generator.get_t0_bins()
132 bins = pd.DataFrame({
133  'midpoint': timearray,
134  'lower': timebins.values[:-1],
135  'upper': timebins.values[1:]
136 })
137 
138 # Create a table of simulation bounds
139 bounds = pd.DataFrame({
140  'value': np.array(['t0', 'amplitude', 'tau', 'sigma']),
141  'sampling': np.array(['uniform', 'uniform', 'uniform', 'uniform']),
142  'low': [
143  generator.get_t0_bounds()[0],
144  generator.get_amp_bounds()[0],
145  args.tau_low,
146  generator.get_sigma_bounds()[0]
147  ],
148  'high': [
149  generator.get_t0_bounds()[1],
150  generator.get_amp_bounds()[1],
151  args.tau_high,
152  generator.get_sigma_bounds()[1]
153  ]
154 })
155 orderedcols = ['value', 'sampling', 'low', 'high']
156 bounds = bounds[orderedcols]
157 
158 print('Samples created.')
159 
160 output_name = 'SVDTime_Training{0}_{1}.pkl'
161 
162 # There will be three trees: sample, bins, bounds.
163 
164 sample.to_pickle(output_name.format('Sample', args.n_samples))
165 bins.to_pickle(output_name.format('Bins', args.n_samples))
166 bounds.to_pickle(output_name.format('Bounds', args.n_samples))
167 
168 print('Done.\nResults saved to {0}.'.format(output_name.format(
169  '{Sample, Bins, Bounds}', args.n_samples)))
svd.SVDSimBase
Definition: SVDSimBase.py:1