Belle II Software  release-05-01-25
DiscreteCosineTransform_31points.cc
Go to the documentation of this file.
1 
28 typedef double R;
29 typedef R E; /* internal precision of codelets. */
30 #define K(x) ((E) x)
31 #define DK(name, value) const E name = K(value)
32 #define WS(x,y) (y)
33 #define FMA(a, b, c) (((a) * (b)) + (c))
34 #define FMS(a, b, c) (((a) * (b)) - (c))
35 #define FNMA(a, b, c) (- (((a) * (b)) + (c)))
36 #define FNMS(a, b, c) ((c) - ((a) * (b)))
37 
38 extern "C" {
39  void e10_31(const R* I, R* O);
40  void e01_31(const R* I, R* O);
41 }
42 
43 namespace {
44  DK(KP000412259, +0.000412259418562871938380998445334699821046170);
45  DK(KP015708004, +0.015708004810545652602792720509151343637341044);
46  DK(KP019941366, +0.019941366459822654495429853833981662789297058);
47  DK(KP025400502, +0.025400502734294785428452465754780624947550210);
48  DK(KP028866483, +0.028866483847295741954970658534562655092360970);
49  DK(KP029606561, +0.029606561198652297994480542192674295188153583);
50  DK(KP045346848, +0.045346848173899962231923625268893718434894172);
51  DK(KP066666666, +0.066666666666666666666666666666666666666666667);
52  DK(KP092681288, +0.092681288904379450142256318609598804525206067);
53  DK(KP102097497, +0.102097497864916063688242067516611448492966715);
54  DK(KP112172063, +0.112172063906358903891072106654229302317378058);
55  DK(KP122761339, +0.122761339421712417807572754092970003891850465);
56  DK(KP127938670, +0.127938670558678996799573548799714027347967121);
57  DK(KP147857608, +0.147857608946689579852313890437569859447378619);
58  DK(KP155909426, +0.155909426230360388401557646847789940246255225);
59  DK(KP160793728, +0.160793728520323189459149287981372086275142541);
60  DK(KP183215435, +0.183215435972067868363105533577134775661644325);
61  DK(KP183333495, +0.183333495452244782819904055070309212901710558);
62  DK(KP183845747, +0.183845747585549357937166766576821269206738322);
63  DK(KP184517712, +0.184517712830393344154095734604975602001386285);
64  DK(KP184926209, +0.184926209687313710109434775837815115985225567);
65  DK(KP185591687, +0.185591687547196603013206497513733070197662098);
66  DK(KP202100941, +0.202100941504002851338890151760897670549611107);
67  DK(KP213702830, +0.213702830714905671421951896566570134065492948);
68  DK(KP245522678, +0.245522678843424835615145508185940007783700930);
69  DK(KP250000000, +0.250000000000000000000000000000000000000000000);
70  DK(KP251026872, +0.251026872929094175322677333303375485053014277);
71  DK(KP255877341, +0.255877341117357993599147097599428054695934242);
72  DK(KP258006924, +0.258006924095276452089799714364401388739221940);
73  DK(KP293892626, +0.293892626146236564584352977319536384298826219);
74  DK(KP296373721, +0.296373721102994137554600958572269203487908448);
75  DK(KP303494444, +0.303494444631551941253967923387361806243372364);
76  DK(KP311340628, +0.311340628927503445870467381445371310537082980);
77  DK(KP341720569, +0.341720569276894099786524583841162921922802620);
78  DK(KP348438623, +0.348438623509873804361149807347644092563702804);
79  DK(KP350296205, +0.350296205119560058350720150718018638663792697);
80  DK(KP360104421, +0.360104421960192515778041781881012837232188655);
81  DK(KP371184290, +0.371184290855334794807964753261236634698426225);
82  DK(KP387067417, +0.387067417450794062018448209110056640357696792);
83  DK(KP404201883, +0.404201883008005702677780303521795341099222215);
84  DK(KP427405661, +0.427405661429811342843903793133140268130985897);
85  DK(KP433012701, +0.433012701892219323381861585376468091735701313);
86  DK(KP462201919, +0.462201919825108579466283849397624285725155370);
87  DK(KP475528258, +0.475528258147576786058219666689691071702849317);
88  DK(KP500000000, +0.500000000000000000000000000000000000000000000);
89  DK(KP559016994, +0.559016994374947424102293417182819058860154590);
90  DK(KP587785252, +0.587785252292473129168705954639072768597652438);
91  DK(KP606988889, +0.606988889263103882507935846774723612486744729);
92  DK(KP618111346, +0.618111346055468967867841496245414225971410594);
93  DK(KP622681257, +0.622681257855006891740934762890742621074165960);
94  DK(KP638094290, +0.638094290379888237341125542413432125410711069);
95  DK(KP696877247, +0.696877247019747608722299614695288185127405608);
96  DK(KP700592410, +0.700592410239120116701440301436037277327585395);
97  DK(KP951056516, +0.951056516295153572116439333379382143405698634);//cos(pi()/10)
98  DK(KP968245836, +0.968245836551854221294816349945599902708230426);//sqrt(5)*sqrt(3)/2
99  DK(KP1_018073920, +1.018073920910254366901961726787815297021466329);//sin(pi()/5)*sqrt(3)
100  DK(KP1_118033988, +1.118033988749894848204586834365638117720309180);//sqrt(5)/2
101  DK(KP1_175570504, +1.175570504584946258337411909278145537195304875);//2*sin(pi()/5)
102  DK(KP1_647278207, +1.647278207092663851754840078556380006059321028);//cos(pi()*1/15)+cos(pi()*4/15)=2*cos(pi()/6)*cos(pi()/10)
103  DK(KP1_732050807, +1.732050807568877293527446341505872366942805254);// sqrt(3)
104  DK(KP1_902113032, +1.902113032590307144232878666758764286811397268); // 2*cos(pi/10)
105  DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
106  DK(KP3_464101615, +3.464101615137754587054892683011744733885610508); // 2*sqrt(3)
107  DK(KP4_000000000, +4.000000000000000000000000000000000000000000000);
108 }
109 
117 void e10_31(const R* I, R* O)
118 {
119  E T2J, T1H, T1J, T1I, T1K, T1T, T2S, T3a, T31, T3b, T3M, T1M, T1P, T1N, T1Q;
120  E T1i, T1l, T1A, T1D, T1b, T1k, T1v, T1C, T3n, T3I, T3y, T3D, T3S, T3V, T43;
121  E T46, T3P, T3U, T40, T45;
122  T2J = I[WS(is, 15)];
123  {
124  E TN, T2K, TW, T32, T16, T2T, T1d, T1e, T3e, T3d, T2R, T3p, TK, TO, Tu;
125  E T1p, T3i, T3F, T12, T17, T30, T3s, Tf, T1o, T3l, T3G, TT, TX, T39, T3v;
126  E TL, TM;
127  TL = I[WS(is, 17)];
128  TM = I[WS(is, 13)];
129  TN = TL - TM;
130  T2K = TL + TM;
131  {
132  E TU, TV, T14, T15;
133  TU = I[WS(is, 25)];
134  TV = I[WS(is, 5)];
135  TW = TU - TV;
136  T32 = TU + TV;
137  T14 = I[WS(is, 3)];
138  T15 = I[WS(is, 27)];
139  T16 = T14 - T15;
140  T2T = T14 + T15;
141  }
142  {
143  E Ty, T2L, TI, T2P, TB, T2M, TF, T2O;
144  {
145  E Tw, Tx, TG, TH;
146  Tw = I[WS(is, 14)];
147  Tx = I[WS(is, 16)];
148  Ty = Tw - Tx;
149  T2L = Tw + Tx;
150  TG = I[WS(is, 23)];
151  TH = I[WS(is, 7)];
152  TI = TG - TH;
153  T2P = TG + TH;
154  }
155  {
156  E Tz, TA, TD, TE;
157  Tz = I[WS(is, 19)];
158  TA = I[WS(is, 11)];
159  TB = Tz - TA;
160  T2M = Tz + TA;
161  TD = I[WS(is, 30)];
162  TE = I[0];
163  TF = TD - TE;
164  T2O = TD + TE;
165  }
166  T1d = Ty - TB;
167  T1e = TF - TI;
168  T3e = T2O - T2P;
169  T3d = T2M - T2L;
170  {
171  E T2N, T2Q, TC, TJ;
172  T2N = T2L + T2M;
173  T2Q = T2O + T2P;
174  T2R = T2N + T2Q;
175  T3p = KP559016994 * (T2Q - T2N);
176  TC = Ty + TB;
177  TJ = TF + TI;
178  TK = KP559016994 * (TC - TJ);
179  TO = TC + TJ;
180  }
181  }
182  {
183  E Ti, T2U, Ts, T2Y, Tl, T2V, Tp, T2X;
184  {
185  E Tg, Th, Tq, Tr;
186  Tg = I[WS(is, 9)];
187  Th = I[WS(is, 21)];
188  Ti = Tg - Th;
189  T2U = Tg + Th;
190  Tq = I[WS(is, 29)];
191  Tr = I[WS(is, 1)];
192  Ts = Tq - Tr;
193  T2Y = Tq + Tr;
194  }
195  {
196  E Tj, Tk, Tn, To;
197  Tj = I[WS(is, 8)];
198  Tk = I[WS(is, 22)];
199  Tl = Tj - Tk;
200  T2V = Tj + Tk;
201  Tn = I[WS(is, 18)];
202  To = I[WS(is, 12)];
203  Tp = Tn - To;
204  T2X = Tn + To;
205  }
206  {
207  E Tm, Tt, T3g, T3h;
208  Tm = Ti - Tl;
209  Tt = Tp - Ts;
210  Tu = FMA(KP475528258, Tm, KP293892626 * Tt);
211  T1p = FNMS(KP475528258, Tt, KP293892626 * Tm);
212  T3g = T2V - T2U;
213  T3h = T2X - T2Y;
214  T3i = FMA(KP293892626, T3g, KP475528258 * T3h);
215  T3F = FNMS(KP293892626, T3h, KP475528258 * T3g);
216  }
217  {
218  E T10, T11, T2W, T2Z;
219  T10 = Ti + Tl;
220  T11 = Tp + Ts;
221  T12 = T10 - T11;
222  T17 = T10 + T11;
223  T2W = T2U + T2V;
224  T2Z = T2X + T2Y;
225  T30 = T2W + T2Z;
226  T3s = KP559016994 * (T2Z - T2W);
227  }
228  }
229  {
230  E T3, T33, Td, T37, T6, T34, Ta, T36;
231  {
232  E T1, T2, Tb, Tc;
233  T1 = I[WS(is, 10)];
234  T2 = I[WS(is, 20)];
235  T3 = T1 - T2;
236  T33 = T1 + T2;
237  Tb = I[WS(is, 6)];
238  Tc = I[WS(is, 24)];
239  Td = Tb - Tc;
240  T37 = Tb + Tc;
241  }
242  {
243  E T4, T5, T8, T9;
244  T4 = I[WS(is, 26)];
245  T5 = I[WS(is, 4)];
246  T6 = T4 - T5;
247  T34 = T4 + T5;
248  T8 = I[WS(is, 28)];
249  T9 = I[WS(is, 2)];
250  Ta = T8 - T9;
251  T36 = T8 + T9;
252  }
253  {
254  E T7, Te, T3j, T3k;
255  T7 = T3 - T6;
256  Te = Ta - Td;
257  Tf = FMA(KP475528258, T7, KP293892626 * Te);
258  T1o = FNMS(KP475528258, Te, KP293892626 * T7);
259  T3j = T34 - T33;
260  T3k = T36 - T37;
261  T3l = FMA(KP293892626, T3j, KP475528258 * T3k);
262  T3G = FNMS(KP293892626, T3k, KP475528258 * T3j);
263  }
264  {
265  E TR, TS, T35, T38;
266  TR = T3 + T6;
267  TS = Ta + Td;
268  TT = KP559016994 * (TR - TS);
269  TX = TR + TS;
270  T35 = T33 + T34;
271  T38 = T36 + T37;
272  T39 = T35 + T38;
273  T3v = T38 - T35;
274  }
275  }
276  T1H = TN + TO;
277  T1J = T16 + T17;
278  T1I = TW + TX;
279  T1K = FMA(KP258006924, T1H, KP102097497 * T1I) - (KP360104421 * T1J);
280  T1T = KP371184290 * (T1H + T1J + T1I);
281  T2S = T2K + T2R;
282  T3a = T32 + T39;
283  T31 = T2T + T30;
284  T3b = T2S + T31 + T3a;
285  T3M = FMA(KP045346848, T31, KP296373721 * T3a) - (KP341720569 * T2S);
286  {
287  E Tv, T1q, TQ, T1r, TZ, T1s, T1c, T1f, T1g, T19, T1t, T1w, T1x, T1y;
288  {
289  E TP, TY, T13, T18;
290  Tv = KP3_464101615 * (Tf - Tu);
291  T1q = KP3_464101615 * (T1o - T1p);
292  TP = FNMS(KP250000000, TO, TN);
293  TQ = TK + TP;
294  T1r = TP - TK;
295  TY = FNMS(KP250000000, TX, TW);
296  TZ = TT + TY;
297  T1s = TY - TT;
298  T1c = Tu + Tf;
299  T1f = FMA(KP475528258, T1d, KP293892626 * T1e);
300  T1g = FNMS(KP4_000000000, T1f, KP2_000000000 * T1c);
301  T13 = KP559016994 * T12;
302  T18 = FNMS(KP250000000, T17, T16);
303  T19 = T13 + T18;
304  T1t = T18 - T13;
305  T1w = T1p + T1o;
306  T1x = FNMS(KP475528258, T1e, KP293892626 * T1d);
307  T1y = FNMS(KP4_000000000, T1x, KP2_000000000 * T1w);
308  }
309  T1M = T1f + T1c;
310  T1P = T1x + T1w;
311  T1N = TQ + T19 + TZ;
312  T1Q = T1r + T1t + T1s;
313  {
314  E T1h, T1z, T1a, T1u;
315  T1h = KP1_732050807 * (TZ - T19);
316  T1i = T1g + T1h;
317  T1l = T1h - T1g;
318  T1z = KP1_732050807 * (T1s - T1t);
319  T1A = T1y + T1z;
320  T1D = T1z - T1y;
321  T1a = FMS(KP2_000000000, TQ, TZ) - T19;
322  T1b = Tv + T1a;
323  T1k = T1a - Tv;
324  T1u = FMS(KP2_000000000, T1r, T1s) - T1t;
325  T1v = T1q + T1u;
326  T1C = T1u - T1q;
327  }
328  }
329  {
330  E T3O, T3Z, T3q, T3A, T3t, T3B, T3x, T3C, T3f, T3m, T3R, T3E, T3H, T42;
331  {
332  E T3o, T3r, T3w, T3u;
333  T3O = KP3_464101615 * (T3l - T3i);
334  T3Z = KP3_464101615 * (T3G - T3F);
335  T3o = FMS(KP250000000, T2R, T2K);
336  T3q = T3o - T3p;
337  T3A = T3o + T3p;
338  T3r = FMS(KP250000000, T30, T2T);
339  T3t = T3r - T3s;
340  T3B = T3r + T3s;
341  T3w = KP559016994 * T3v;
342  T3u = FMS(KP250000000, T39, T32);
343  T3x = T3u - T3w;
344  T3C = T3u + T3w;
345  T3f = FMA(KP293892626, T3d, KP475528258 * T3e);
346  T3m = T3i + T3l;
347  T3R = FNMS(KP2_000000000, T3m, KP4_000000000 * T3f);
348  T3E = FNMS(KP293892626, T3e, KP475528258 * T3d);
349  T3H = T3F + T3G;
350  T42 = FNMS(KP2_000000000, T3H, KP4_000000000 * T3E);
351  }
352  T3n = T3f + T3m;
353  T3I = T3E + T3H;
354  T3y = T3q + T3t + T3x;
355  T3D = T3A + T3B + T3C;
356  {
357  E T3Q, T41, T3N, T3Y;
358  T3Q = KP1_732050807 * (T3t - T3x);
359  T3S = T3Q - T3R;
360  T3V = T3R + T3Q;
361  T41 = KP1_732050807 * (T3B - T3C);
362  T43 = T41 - T42;
363  T46 = T42 + T41;
364  T3N = FMS(KP2_000000000, T3q, T3x) - T3t;
365  T3P = T3N - T3O;
366  T3U = T3O + T3N;
367  T3Y = FMS(KP2_000000000, T3A, T3C) - T3B;
368  T40 = T3Y - T3Z;
369  T45 = T3Z + T3Y;
370  }
371  }
372  }
373  O[0] = KP2_000000000 * (T2J + T3b);
374  {
375  E T2l, T2B, T1U, T2o, T2C, T2a, T2w, T2c, T1G, T2d, T23, T25, T2y, T2h, T22;
376  E T26, T2j, T2k, T24, T1L, T1V;
377  T2j = FNMS(KP700592410, T1M, KP122761339 * T1N);
378  T2k = FMA(KP404201883, T1P, KP311340628 * T1Q);
379  T2l = FMA(KP1_902113032, T2j, KP1_175570504 * T2k);
380  T2B = FNMS(KP1_902113032, T2k, KP1_175570504 * T2j);
381  {
382  E T1S, T2m, T1O, T1R, T2n;
383  T1O = FMA(KP245522678, T1M, KP350296205 * T1N);
384  T1R = FNMS(KP202100941, T1Q, KP622681257 * T1P);
385  T1S = T1O + T1R;
386  T2m = KP1_118033988 * (T1R - T1O);
387  T1U = FMS(KP2_000000000, T1S, T1T);
388  T2n = FMA(KP500000000, T1S, T1T);
389  T2o = T2m + T2n;
390  T2C = T2n - T2m;
391  }
392  {
393  E T1n, T29, T1F, T28;
394  {
395  E T1j, T1m, T1B, T1E;
396  T1j = FNMS(KP184517712, T1i, KP019941366 * T1b);
397  T1m = FNMS(KP183845747, T1l, KP025400502 * T1k);
398  T1n = T1j + T1m;
399  T29 = T1m - T1j;
400  T1B = FMA(KP184926209, T1v, KP015708004 * T1A);
401  T1E = FMA(KP183215435, T1C, KP029606561 * T1D);
402  T1F = T1B - T1E;
403  T28 = T1B + T1E;
404  }
405  T2a = FNMS(KP1_647278207, T29, KP1_018073920 * T28);
406  T2w = FMA(KP1_018073920, T29, KP1_647278207 * T28);
407  T2c = KP559016994 * (T1F - T1n);
408  T1G = T1n + T1F;
409  T2d = FMA(KP250000000, T1G, T1K);
410  }
411  {
412  E T21, T2g, T1Y, T2f;
413  T23 = FMA(KP462201919, T1H, KP155909426 * T1J) - (KP618111346 * T1I);
414  {
415  E T1Z, T20, T1W, T1X;
416  T1Z = FNMS(KP015708004, T1v, KP184926209 * T1A);
417  T20 = FNMS(KP183215435, T1D, KP029606561 * T1C);
418  T21 = T1Z + T20;
419  T2g = T1Z - T20;
420  T1W = FMA(KP184517712, T1b, KP019941366 * T1i);
421  T1X = FMA(KP183845747, T1k, KP025400502 * T1l);
422  T1Y = T1W + T1X;
423  T2f = T1W - T1X;
424  }
425  T25 = KP968245836 * (T1Y - T21);
426  T2y = FNMS(KP587785252, T2f, KP951056516 * T2g);
427  T2h = FMA(KP951056516, T2f, KP587785252 * T2g);
428  T22 = T1Y + T21;
429  T26 = FNMS(KP433012701, T22, T23);
430  }
431  T24 = FMA(KP1_732050807, T22, T23);
432  T1L = T1G - T1K;
433  T1V = T1U - T1L;
434  O[WS(os, 1)] = FMA(KP2_000000000, T1L, T1U);
435  O[WS(os, 25)] = T24 + T1V;
436  O[WS(os, 5)] = T1V - T24;
437  {
438  E T2D, T2G, T2x, T2H, T2A, T2F, T2v, T2z, T2E, T2I;
439  T2D = T2B - T2C;
440  T2G = T2B + T2C;
441  T2v = T25 - T26;
442  T2x = T2v - T2w;
443  T2H = T2w + T2v;
444  T2z = T2d - T2c;
445  T2A = T2y + T2z;
446  T2F = T2y - T2z;
447  O[WS(os, 23)] = FNMS(KP2_000000000, T2A, T2D);
448  O[WS(os, 27)] = FMS(KP2_000000000, T2F, T2G);
449  T2E = T2A + T2D;
450  O[WS(os, 17)] = T2x - T2E;
451  O[WS(os, 9)] = T2x + T2E;
452  T2I = T2F + T2G;
453  O[WS(os, 11)] = T2H - T2I;
454  O[WS(os, 7)] = T2H + T2I;
455  }
456  {
457  E T2p, T2r, T2b, T2u, T2i, T2s, T27, T2e, T2q, T2t;
458  T2p = T2l - T2o;
459  T2r = T2l + T2o;
460  T27 = T25 + T26;
461  T2b = T27 - T2a;
462  T2u = T2a + T27;
463  T2e = T2c + T2d;
464  T2i = T2e - T2h;
465  T2s = T2h + T2e;
466  O[WS(os, 15)] = FNMS(KP2_000000000, T2i, T2p);
467  O[WS(os, 29)] = FMA(KP2_000000000, T2s, T2r);
468  T2q = T2i + T2p;
469  O[WS(os, 13)] = T2b - T2q;
470  O[WS(os, 3)] = T2b + T2q;
471  T2t = T2r - T2s;
472  O[WS(os, 19)] = T2t - T2u;
473  O[WS(os, 21)] = T2u + T2t;
474  }
475  }
476  {
477  E T4x, T4P, T3L, T4u, T4O, T4p, T4T, T4E, T49, T4o, T4c, T4n, T4M, T4A, T4j;
478  E T4z, T4v, T4w, T4k, T4a, T4b;
479  T4v = FNMS(KP127938670, T3y, KP696877247 * T3n);
480  T4w = FMA(KP606988889, T3I, KP213702830 * T3D);
481  T4x = FMA(KP1_175570504, T4v, KP1_902113032 * T4w);
482  T4P = FNMS(KP1_902113032, T4v, KP1_175570504 * T4w);
483  {
484  E T3c, T3K, T4t, T3z, T3J, T4s;
485  T3c = FNMS(KP2_000000000, T2J, KP066666666 * T3b);
486  T3z = FMA(KP255877341, T3n, KP348438623 * T3y);
487  T3J = FNMS(KP427405661, T3I, KP303494444 * T3D);
488  T3K = T3z + T3J;
489  T4t = KP1_118033988 * (T3J - T3z);
490  T3L = FMA(KP2_000000000, T3K, T3c);
491  T4s = FNMS(KP500000000, T3K, T3c);
492  T4u = T4s + T4t;
493  T4O = T4t - T4s;
494  }
495  {
496  E T3X, T4C, T48, T4D;
497  {
498  E T3T, T3W, T44, T47;
499  T3T = FMA(KP185591687, T3P, KP000412259 * T3S);
500  T3W = FMA(KP112172063, T3U, KP147857608 * T3V);
501  T3X = T3T - T3W;
502  T4C = T3T + T3W;
503  T44 = FMA(KP028866483, T40, KP183333495 * T43);
504  T47 = FMA(KP092681288, T45, KP160793728 * T46);
505  T48 = T44 - T47;
506  T4D = T44 + T47;
507  }
508  T4p = KP559016994 * (T48 - T3X);
509  T4T = FNMS(KP1_647278207, T4C, KP1_018073920 * T4D);
510  T4E = FMA(KP1_018073920, T4C, KP1_647278207 * T4D);
511  T49 = T3X + T48;
512  T4o = FNMS(KP250000000, T49, T3M);
513  }
514  {
515  E T4i, T4m, T4f, T4l;
516  T4c = FMA(KP251026872, T2S, KP387067417 * T3a) - (KP638094290 * T31);
517  {
518  E T4g, T4h, T4d, T4e;
519  T4g = FNMS(KP183333495, T40, KP028866483 * T43);
520  T4h = FNMS(KP092681288, T46, KP160793728 * T45);
521  T4i = T4g + T4h;
522  T4m = T4h - T4g;
523  T4d = FNMS(KP000412259, T3P, KP185591687 * T3S);
524  T4e = FNMS(KP112172063, T3V, KP147857608 * T3U);
525  T4f = T4d + T4e;
526  T4l = T4e - T4d;
527  }
528  T4n = FMA(KP587785252, T4l, KP951056516 * T4m);
529  T4M = FNMS(KP951056516, T4l, KP587785252 * T4m);
530  T4A = KP968245836 * (T4i - T4f);
531  T4j = T4f + T4i;
532  T4z = FNMS(KP433012701, T4j, T4c);
533  }
534  T4k = FMA(KP1_732050807, T4j, T4c);
535  T4a = T3M + T49;
536  T4b = T3L - T4a;
537  O[WS(os, 6)] = T4b - T4k;
538  O[WS(os, 30)] = FMA(KP2_000000000, T4a, T3L);
539  O[WS(os, 26)] = T4k + T4b;
540  {
541  E T4Q, T4W, T4N, T4V, T4U, T4Y, T4L, T4S, T4R, T4X;
542  T4Q = T4O - T4P;
543  T4W = T4O + T4P;
544  T4L = T4p - T4o;
545  T4N = T4L - T4M;
546  T4V = T4M + T4L;
547  T4S = T4z - T4A;
548  T4U = T4S + T4T;
549  T4Y = T4S - T4T;
550  O[WS(os, 4)] = FMA(KP2_000000000, T4N, T4Q);
551  O[WS(os, 8)] = FMA(KP2_000000000, T4V, T4W);
552  T4R = T4Q - T4N;
553  O[WS(os, 20)] = T4R - T4U;
554  O[WS(os, 24)] = T4U + T4R;
555  T4X = T4V - T4W;
556  O[WS(os, 14)] = T4X - T4Y;
557  O[WS(os, 22)] = T4Y + T4X;
558  }
559  {
560  E T4y, T4J, T4r, T4I, T4F, T4H, T4q, T4B, T4G, T4K;
561  T4y = T4u + T4x;
562  T4J = T4x - T4u;
563  T4q = T4o + T4p;
564  T4r = T4n + T4q;
565  T4I = T4n - T4q;
566  T4B = T4z + T4A;
567  T4F = T4B + T4E;
568  T4H = T4B - T4E;
569  O[WS(os, 2)] = FMA(KP2_000000000, T4r, T4y);
570  O[WS(os, 16)] = FMA(KP2_000000000, T4I, T4J);
571  T4G = T4y - T4r;
572  O[WS(os, 12)] = T4F - T4G;
573  O[WS(os, 10)] = T4F + T4G;
574  T4K = T4I - T4J;
575  O[WS(os, 28)] = T4H - T4K;
576  O[WS(os, 18)] = T4H + T4K;
577  }
578  }
579 }
580 
581 
589 void e01_31(const R* I, R* O)
590 {
591  E T22, T4l, T2R, T1S, T1W, T1X, T2O, T3t, T2L, T3s, T1M, T1B, T1N, T2D, T3q;
592  E T2A, T3p, T1, Tt, Tu, T47, T25, T10, TV, T11, T2q, T3h, T2n, T3i, TQ;
593  E TF, TR, T2f, T3e, T2c, T3f;
594  {
595  E T1c, T1a, T1O, T1L, T1b, T1d, T1f, T1y, T1o, T1w, T1m, T1n, T1x, T1z, T1P;
596  E T1Q, T1R, T1E, T1H, T1I, T1Z, T21, T20;
597  {
598  E T19, T1K, T16, T1J;
599  T1c = I[WS(is, 27)];
600  {
601  E T17, T18, T14, T15;
602  T17 = I[WS(is, 1)];
603  T18 = I[WS(is, 15)];
604  T19 = T17 + T18;
605  T1K = T18 - T17;
606  T14 = I[WS(is, 29)];
607  T15 = I[WS(is, 23)];
608  T16 = T14 - T15;
609  T1J = T14 + T15;
610  }
611  T1a = KP559016994 * (T16 + T19);
612  T1O = FNMS(KP475528258, T1K, KP293892626 * T1J);
613  T1L = FMA(KP475528258, T1J, KP293892626 * T1K);
614  T1b = T16 - T19;
615  T1d = FMA(KP250000000, T1b, T1c);
616  }
617  {
618  E T1i, T1F, T1v, T1C, T1s, T1D, T1l, T1G;
619  {
620  E T1g, T1h, T1t, T1u;
621  T1f = I[WS(is, 7)];
622  T1y = I[WS(is, 11)];
623  T1g = I[WS(is, 19)];
624  T1h = I[WS(is, 17)];
625  T1i = T1g + T1h;
626  T1F = T1g - T1h;
627  T1t = I[WS(is, 13)];
628  T1u = I[WS(is, 5)];
629  T1v = T1t - T1u;
630  T1C = T1t + T1u;
631  {
632  E T1q, T1r, T1j, T1k;
633  T1q = I[WS(is, 21)];
634  T1r = I[WS(is, 9)];
635  T1s = T1q - T1r;
636  T1D = T1q + T1r;
637  T1j = I[WS(is, 25)];
638  T1k = I[WS(is, 3)];
639  T1l = T1j + T1k;
640  T1G = T1k - T1j;
641  }
642  }
643  T1o = KP559016994 * (T1i + T1l);
644  T1w = KP559016994 * (T1s - T1v);
645  T1m = T1i - T1l;
646  T1n = FNMS(KP250000000, T1m, T1f);
647  T1x = T1s + T1v;
648  T1z = FMA(KP250000000, T1x, T1y);
649  T1P = FNMS(KP475528258, T1G, KP293892626 * T1F);
650  T1Q = FMA(KP293892626, T1D, KP475528258 * T1C);
651  T1R = T1P + T1Q;
652  T1E = FNMS(KP475528258, T1D, KP293892626 * T1C);
653  T1H = FMA(KP475528258, T1F, KP293892626 * T1G);
654  T1I = T1E - T1H;
655  }
656  T1Z = T1b - T1c;
657  T21 = T1x - T1y;
658  T20 = T1f + T1m;
659  T22 = KP371184290 * (T1Z + T20 + T21);
660  T4l = FMA(KP462201919, T1Z, KP155909426 * T20) - (KP618111346 * T21);
661  T2R = FMA(KP258006924, T1Z, KP102097497 * T21) - (KP360104421 * T20);
662  {
663  E T2I, T2F, T2G, T2J, T1T, T1V, T1U;
664  T1S = T1O + T1R;
665  T2I = KP3_464101615 * (T1Q - T1P);
666  T2F = FNMS(KP4_000000000, T1O, KP2_000000000 * T1R);
667  T1T = T1n - T1o;
668  T1V = T1a + T1d;
669  T1U = T1w + T1z;
670  T2G = KP1_732050807 * (T1T + T1U);
671  T2J = T1U + FNMA(KP2_000000000, T1V, T1T);
672  T1W = T1T - T1U - T1V;
673  T1X = FNMS(KP202100941, T1W, KP622681257 * T1S);
674  {
675  E T2M, T2N, T2H, T2K;
676  T2M = T2F + T2G;
677  T2N = T2J - T2I;
678  T2O = FNMS(KP183215435, T2N, KP029606561 * T2M);
679  T3t = FMA(KP183215435, T2M, KP029606561 * T2N);
680  T2H = T2F - T2G;
681  T2K = T2I + T2J;
682  T2L = FMA(KP015708004, T2H, KP184926209 * T2K);
683  T3s = FNMS(KP015708004, T2K, KP184926209 * T2H);
684  }
685  }
686  {
687  E T2y, T2v, T2x, T2u, T1e, T1A, T1p;
688  T1M = T1I - T1L;
689  T2y = FMA(KP4_000000000, T1L, KP2_000000000 * T1I);
690  T2v = KP3_464101615 * (T1H + T1E);
691  T1e = T1a - T1d;
692  T1A = T1w - T1z;
693  T1p = T1n + T1o;
694  T2x = KP1_732050807 * (T1A - T1p);
695  T2u = FMS(KP2_000000000, T1e, T1A) - T1p;
696  T1B = T1e + T1p + T1A;
697  T1N = FNMS(KP245522678, T1M, KP350296205 * T1B);
698  {
699  E T2B, T2C, T2w, T2z;
700  T2B = T2v + T2u;
701  T2C = T2y + T2x;
702  T2D = FNMS(KP183845747, T2C, KP025400502 * T2B);
703  T3q = FMA(KP183845747, T2B, KP025400502 * T2C);
704  T2w = T2u - T2v;
705  T2z = T2x - T2y;
706  T2A = FNMS(KP184517712, T2z, KP019941366 * T2w);
707  T3p = FMA(KP184517712, T2w, KP019941366 * T2z);
708  }
709  }
710  }
711  {
712  E T2, Tw, TZ, TI, T9, Tv, Tb, Tk, Tz, TD, Ti, Ty, Tr, TB, TW;
713  E TX, TY, TL, TO, TP, Ta, Ts, Tj;
714  T1 = I[0];
715  {
716  E T8, TG, T5, TH;
717  T2 = I[WS(is, 4)];
718  {
719  E T6, T7, T3, T4;
720  T6 = I[WS(is, 16)];
721  T7 = I[WS(is, 30)];
722  T8 = T6 - T7;
723  TG = T6 + T7;
724  T3 = I[WS(is, 8)];
725  T4 = I[WS(is, 2)];
726  T5 = T3 - T4;
727  TH = T3 + T4;
728  }
729  Tw = KP559016994 * (T5 - T8);
730  TZ = FMA(KP475528258, TH, KP293892626 * TG);
731  TI = FNMS(KP293892626, TH, KP475528258 * TG);
732  T9 = T5 + T8;
733  Tv = FNMS(KP250000000, T9, T2);
734  }
735  {
736  E Te, TJ, Tq, TN, Tn, TM, Th, TK, TC;
737  {
738  E Tc, Td, To, Tp;
739  Tb = I[WS(is, 24)];
740  Tk = I[WS(is, 20)];
741  Tc = I[WS(is, 12)];
742  Td = I[WS(is, 14)];
743  Te = Tc - Td;
744  TJ = Tc + Td;
745  To = I[WS(is, 26)];
746  Tp = I[WS(is, 18)];
747  Tq = To + Tp;
748  TN = Tp - To;
749  {
750  E Tl, Tm, Tf, Tg;
751  Tl = I[WS(is, 10)];
752  Tm = I[WS(is, 22)];
753  Tn = Tl + Tm;
754  TM = Tm - Tl;
755  Tf = I[WS(is, 28)];
756  Tg = I[WS(is, 6)];
757  Th = Tf - Tg;
758  TK = Tf + Tg;
759  }
760  }
761  Tz = KP559016994 * (Te - Th);
762  TC = Tq - Tn;
763  TD = KP559016994 * TC;
764  Ti = Te + Th;
765  Ty = FNMS(KP250000000, Ti, Tb);
766  Tr = Tn + Tq;
767  TB = FMA(KP250000000, Tr, Tk);
768  TW = FNMS(KP293892626, TK, KP475528258 * TJ);
769  TX = FMA(KP475528258, TM, KP293892626 * TN);
770  TY = TW + TX;
771  TL = FMA(KP293892626, TJ, KP475528258 * TK);
772  TO = FNMS(KP475528258, TN, KP293892626 * TM);
773  TP = TL + TO;
774  }
775  Ta = T2 + T9;
776  Ts = Tk - Tr;
777  Tj = Tb + Ti;
778  Tt = Ta + Tj + Ts;
779  Tu = FNMS(KP066666666, Tt, T1);
780  T47 = FNMS(KP387067417, Ts, KP638094290 * Tj) - (KP251026872 * Ta);
781  T25 = FNMS(KP296373721, Ts, KP341720569 * Ta) - (KP045346848 * Tj);
782  {
783  E T2l, T2i, T2h, T2k, TS, TU, TT;
784  T10 = TY - TZ;
785  T2l = KP3_464101615 * (TX - TW);
786  T2i = FMA(KP4_000000000, TZ, KP2_000000000 * TY);
787  TS = Tw + Tv;
788  TU = TB + TD;
789  TT = Tz + Ty;
790  T2h = KP1_732050807 * (TT - TU);
791  T2k = FMS(KP2_000000000, TS, TU) - TT;
792  TV = TS + TT + TU;
793  T11 = FNMS(KP427405661, T10, KP303494444 * TV);
794  {
795  E T2o, T2p, T2j, T2m;
796  T2o = T2h - T2i;
797  T2p = T2k + T2l;
798  T2q = FMA(KP160793728, T2o, KP092681288 * T2p);
799  T3h = FNMS(KP092681288, T2o, KP160793728 * T2p);
800  T2j = T2h + T2i;
801  T2m = T2k - T2l;
802  T2n = FMA(KP183333495, T2j, KP028866483 * T2m);
803  T3i = FNMS(KP183333495, T2m, KP028866483 * T2j);
804  }
805  }
806  {
807  E T2a, T27, T26, T29, Tx, TE, TA;
808  TQ = TI + TP;
809  T2a = KP3_464101615 * (TO - TL);
810  T27 = FNMS(KP2_000000000, TP, KP4_000000000 * TI);
811  Tx = Tv - Tw;
812  TE = TB - TD;
813  TA = Ty - Tz;
814  T26 = KP1_732050807 * (TA - TE);
815  T29 = FMS(KP2_000000000, Tx, TE) - TA;
816  TF = Tx + TA + TE;
817  TR = FMA(KP348438623, TF, KP255877341 * TQ);
818  {
819  E T2d, T2e, T28, T2b;
820  T2d = T26 + T27;
821  T2e = T29 + T2a;
822  T2f = FMA(KP147857608, T2d, KP112172063 * T2e);
823  T3e = FNMS(KP112172063, T2d, KP147857608 * T2e);
824  T28 = T26 - T27;
825  T2b = T29 - T2a;
826  T2c = FMA(KP000412259, T28, KP185591687 * T2b);
827  T3f = FNMS(KP000412259, T2b, KP185591687 * T28);
828  }
829  }
830  }
831  O[WS(os, 15)] = FMA(KP2_000000000, Tt, T1);
832  {
833  E T3k, T3Q, T4g, T4D, T4r, T4H, T3v, T3T, T2t, T3d, T3P, T4T, T4d, T4E, T4U;
834  E T4o, T4G, T2S, T3o, T3S, T24, T2U, T32, T3A, T3N, T3Z, T3K, T3Y, T39, T3B;
835  {
836  E T3g, T3j, T4e, T4f;
837  T3g = T3e - T3f;
838  T3j = T3h - T3i;
839  T3k = FMA(KP587785252, T3g, KP951056516 * T3j);
840  T3Q = FNMS(KP587785252, T3j, KP951056516 * T3g);
841  T4e = T2c + T2f;
842  T4f = T2n + T2q;
843  T4g = FMA(KP1_018073920, T4e, KP1_647278207 * T4f);
844  T4D = FNMS(KP1_647278207, T4e, KP1_018073920 * T4f);
845  }
846  {
847  E T4p, T4q, T3r, T3u;
848  T4p = T2L - T2O;
849  T4q = T2D - T2A;
850  T4r = FNMS(KP1_647278207, T4q, KP1_018073920 * T4p);
851  T4H = FMA(KP1_018073920, T4q, KP1_647278207 * T4p);
852  T3r = T3p - T3q;
853  T3u = T3s - T3t;
854  T3v = FMA(KP951056516, T3r, KP587785252 * T3u);
855  T3T = FNMS(KP587785252, T3r, KP951056516 * T3u);
856  }
857  {
858  E T2s, T3c, T2g, T2r, T3b;
859  T2g = T2c - T2f;
860  T2r = T2n - T2q;
861  T2s = T2g + T2r;
862  T3c = KP559016994 * (T2r - T2g);
863  T2t = T25 + T2s;
864  T3b = FNMS(KP250000000, T2s, T25);
865  T3d = T3b + T3c;
866  T3P = T3b - T3c;
867  }
868  {
869  E T4a, T4c, T48, T49, T4b;
870  T48 = T3f + T3e;
871  T49 = T3i + T3h;
872  T4a = T48 + T49;
873  T4c = KP968245836 * (T49 - T48);
874  T4T = FMA(KP1_732050807, T4a, T47);
875  T4b = FNMS(KP433012701, T4a, T47);
876  T4d = T4b + T4c;
877  T4E = T4b - T4c;
878  }
879  {
880  E T4k, T4m, T4i, T4j, T4n;
881  T4i = T3p + T3q;
882  T4j = T3s + T3t;
883  T4k = KP968245836 * (T4i - T4j);
884  T4m = T4i + T4j;
885  T4U = FMA(KP1_732050807, T4m, T4l);
886  T4n = FNMS(KP433012701, T4m, T4l);
887  T4o = T4k + T4n;
888  T4G = T4n - T4k;
889  }
890  {
891  E T2Q, T3n, T2E, T2P, T3m;
892  T2E = T2A + T2D;
893  T2P = T2L + T2O;
894  T2Q = T2E + T2P;
895  T3n = KP559016994 * (T2P - T2E);
896  T2S = T2Q - T2R;
897  T3m = FMA(KP250000000, T2Q, T2R);
898  T3o = T3m + T3n;
899  T3S = T3m - T3n;
900  }
901  {
902  E T2X, T34, T13, T2W, T23, T33, T38, T3M, T31, T3J, T12, T1Y;
903  T2X = KP1_118033988 * (T11 - TR);
904  T34 = KP1_118033988 * (T1X - T1N);
905  T12 = TR + T11;
906  T13 = FMA(KP2_000000000, T12, Tu);
907  T2W = FNMS(KP500000000, T12, Tu);
908  T1Y = T1N + T1X;
909  T23 = FMS(KP2_000000000, T1Y, T22);
910  T33 = FMA(KP500000000, T1Y, T22);
911  {
912  E T36, T37, T2Z, T30;
913  T36 = FMA(KP700592410, T1M, KP122761339 * T1B);
914  T37 = FMA(KP404201883, T1S, KP311340628 * T1W);
915  T38 = FMA(KP1_902113032, T36, KP1_175570504 * T37);
916  T3M = FNMS(KP1_175570504, T36, KP1_902113032 * T37);
917  T2Z = FNMS(KP127938670, TF, KP696877247 * TQ);
918  T30 = FMA(KP213702830, TV, KP606988889 * T10);
919  T31 = FMA(KP1_175570504, T2Z, KP1_902113032 * T30);
920  T3J = FNMS(KP1_175570504, T30, KP1_902113032 * T2Z);
921  }
922  {
923  E T2Y, T3L, T3I, T35;
924  T24 = T13 - T23;
925  T2U = T13 + T23;
926  T2Y = T2W + T2X;
927  T32 = T2Y - T31;
928  T3A = T2Y + T31;
929  T3L = T33 - T34;
930  T3N = T3L - T3M;
931  T3Z = T3L + T3M;
932  T3I = T2W - T2X;
933  T3K = T3I - T3J;
934  T3Y = T3I + T3J;
935  T35 = T33 + T34;
936  T39 = T35 - T38;
937  T3B = T35 + T38;
938  }
939  }
940  {
941  E T4X, T2T, T4W, T4V, T2V, T4S;
942  T4X = T4T + T4U;
943  T2T = T2t - T2S;
944  T4W = T24 - T2T;
945  O[WS(os, 30)] = FMA(KP2_000000000, T2T, T24);
946  O[WS(os, 28)] = T4W + T4X;
947  O[WS(os, 18)] = T4W - T4X;
948  T4V = T4T - T4U;
949  T2V = T2t + T2S;
950  T4S = T2U - T2V;
951  O[0] = FMA(KP2_000000000, T2V, T2U);
952  O[WS(os, 2)] = T4S + T4V;
953  O[WS(os, 12)] = T4S - T4V;
954  {
955  E T3a, T3y, T3x, T3z, T4t, T4v, T46, T4u;
956  T3a = T32 - T39;
957  T3y = T32 + T39;
958  {
959  E T3l, T3w, T4h, T4s;
960  T3l = T3d - T3k;
961  T3w = T3o - T3v;
962  T3x = T3l - T3w;
963  T3z = T3l + T3w;
964  T4h = T4d - T4g;
965  T4s = T4o - T4r;
966  T4t = T4h - T4s;
967  T4v = T4h + T4s;
968  }
969  O[WS(os, 7)] = FMA(KP2_000000000, T3x, T3a);
970  O[WS(os, 23)] = FMA(KP2_000000000, T3z, T3y);
971  T46 = T3a - T3x;
972  O[WS(os, 1)] = T46 - T4t;
973  O[WS(os, 24)] = T46 + T4t;
974  T4u = T3y - T3z;
975  O[WS(os, 29)] = T4u - T4v;
976  O[WS(os, 6)] = T4u + T4v;
977  }
978  }
979  {
980  E T40, T44, T43, T45, T4P, T4R, T4M, T4Q;
981  T40 = T3Y - T3Z;
982  T44 = T3Y + T3Z;
983  {
984  E T41, T42, T4N, T4O;
985  T41 = T3P + T3Q;
986  T42 = T3S + T3T;
987  T43 = T41 - T42;
988  T45 = T41 + T42;
989  T4N = T4E - T4D;
990  T4O = T4H + T4G;
991  T4P = T4N - T4O;
992  T4R = T4N + T4O;
993  }
994  O[WS(os, 11)] = FMA(KP2_000000000, T43, T40);
995  O[WS(os, 19)] = FMA(KP2_000000000, T45, T44);
996  T4M = T40 - T43;
997  O[WS(os, 22)] = T4M - T4P;
998  O[WS(os, 4)] = T4M + T4P;
999  T4Q = T44 - T45;
1000  O[WS(os, 8)] = T4Q - T4R;
1001  O[WS(os, 26)] = T4Q + T4R;
1002  }
1003  {
1004  E T3O, T3W, T3V, T3X, T4J, T4L, T4C, T4K;
1005  T3O = T3K - T3N;
1006  T3W = T3K + T3N;
1007  {
1008  E T3R, T3U, T4F, T4I;
1009  T3R = T3P - T3Q;
1010  T3U = T3S - T3T;
1011  T3V = T3R - T3U;
1012  T3X = T3R + T3U;
1013  T4F = T4D + T4E;
1014  T4I = T4G - T4H;
1015  T4J = T4F - T4I;
1016  T4L = T4F + T4I;
1017  }
1018  O[WS(os, 13)] = FMA(KP2_000000000, T3V, T3O);
1019  O[WS(os, 17)] = FMA(KP2_000000000, T3X, T3W);
1020  T4C = T3O - T3V;
1021  O[WS(os, 27)] = T4C - T4J;
1022  O[WS(os, 5)] = T4C + T4J;
1023  T4K = T3W - T3X;
1024  O[WS(os, 3)] = T4K - T4L;
1025  O[WS(os, 25)] = T4K + T4L;
1026  }
1027  {
1028  E T3C, T3G, T3F, T3H, T4z, T4B, T4w, T4A;
1029  T3C = T3A - T3B;
1030  T3G = T3A + T3B;
1031  {
1032  E T3D, T3E, T4x, T4y;
1033  T3D = T3d + T3k;
1034  T3E = T3o + T3v;
1035  T3F = T3D - T3E;
1036  T3H = T3D + T3E;
1037  T4x = T4g + T4d;
1038  T4y = T4r + T4o;
1039  T4z = T4x - T4y;
1040  T4B = T4x + T4y;
1041  }
1042  O[WS(os, 16)] = FMA(KP2_000000000, T3F, T3C);
1043  O[WS(os, 14)] = FMA(KP2_000000000, T3H, T3G);
1044  T4w = T3C - T3F;
1045  O[WS(os, 21)] = T4w - T4z;
1046  O[WS(os, 20)] = T4w + T4z;
1047  T4A = T3G - T3H;
1048  O[WS(os, 9)] = T4A - T4B;
1049  O[WS(os, 10)] = T4A + T4B;
1050  }
1051  }
1052 }
e10_31
void e10_31(const R *I, R *O)
DCT-II or "the" DCT transformation of 31-point signal This function contains 320 FP additions,...
Definition: DiscreteCosineTransform_31points.cc:117
e01_31
void e01_31(const R *I, R *O)
DCT-III or "the inverse" DCT transformation of 31-point signal This function contains 320 FP addition...
Definition: DiscreteCosineTransform_31points.cc:589