Belle II Software  release-05-01-25
DiscreteCosineTransform_31points.cc File Reference

Go to the source code of this file.

Macros

#define K(x)   ((E) x)
 
#define DK(name, value)   const E name = K(value)
 
#define WS(x, y)   (y)
 
#define FMA(a, b, c)   (((a) * (b)) + (c))
 
#define FMS(a, b, c)   (((a) * (b)) - (c))
 
#define FNMA(a, b, c)   (- (((a) * (b)) + (c)))
 
#define FNMS(a, b, c)   ((c) - ((a) * (b)))
 

Typedefs

typedef double R
 
typedef R E
 

Functions

void e10_31 (const R *I, R *O)
 DCT-II or "the" DCT transformation of 31-point signal This function contains 320 FP additions, 170 FP multiplications, (or, 229 additions, 79 multiplications, 91 fused multiply/add), 150 stack variables, 64 constants, and 62 memory accesses. More...
 
void e01_31 (const R *I, R *O)
 DCT-III or "the inverse" DCT transformation of 31-point signal This function contains 320 FP additions, 169 FP multiplications, (or, 228 additions, 77 multiplications, 92 fused multiply/add), 149 stack variables, 64 constants, and 62 memory accesses. More...
 

Detailed Description

Generated by: ../../../genfft/gen_r2r.native -compact -variables 4 -pipeline-latency 4 -redft10 -n 31 -name e10_31 -include rdft/scalar/r2r.h Generated by: ../../../genfft/gen_r2r.native -compact -variables 4 -pipeline-latency 4 -redft01 -n 31 -name e01_31 -include rdft/scalar/r2r.h

This file was autogenerated by the FFTW package explicitly for 31-point DCT transformation. It is optimized version of the following functions:

DCT-II or "the" DCT transformation of 31-point signal void e10_31(const double <em>A, double *C){ for(int i=0;i<31;i++){ double c = 0; for(int j=0;j<31;j++) c += A[j]*cos(M_PI/31(j+0.5)*i); C[i] = 2*c; } }

and DCT-III or "the inverse" DCT transformation of 31-point signal void e01_31(const double <em>C, double *A){ for(int i=0;i<31;i++){ double a = 0.5*C[0]; for(int j=1;j<31;j++) a += C[j]*cos(M_PI/31*j(i+0.5)); A[i] = 2*a; } }

Those functions and the functions defined below give exactly the same results (up to floating point rounding errors).

Definition in file DiscreteCosineTransform_31points.cc.

Function Documentation

◆ e01_31()

void e01_31 ( const R *  I,
R *  O 
)

DCT-III or "the inverse" DCT transformation of 31-point signal This function contains 320 FP additions, 169 FP multiplications, (or, 228 additions, 77 multiplications, 92 fused multiply/add), 149 stack variables, 64 constants, and 62 memory accesses.

DCT-III or "the inverse" DCT transformation of 31-point signal.

Parameters
Iinput coefficients
Ooutput signal amplitudes

Definition at line 589 of file DiscreteCosineTransform_31points.cc.

590 {
591  E T22, T4l, T2R, T1S, T1W, T1X, T2O, T3t, T2L, T3s, T1M, T1B, T1N, T2D, T3q;
592  E T2A, T3p, T1, Tt, Tu, T47, T25, T10, TV, T11, T2q, T3h, T2n, T3i, TQ;
593  E TF, TR, T2f, T3e, T2c, T3f;
594  {
595  E T1c, T1a, T1O, T1L, T1b, T1d, T1f, T1y, T1o, T1w, T1m, T1n, T1x, T1z, T1P;
596  E T1Q, T1R, T1E, T1H, T1I, T1Z, T21, T20;
597  {
598  E T19, T1K, T16, T1J;
599  T1c = I[WS(is, 27)];
600  {
601  E T17, T18, T14, T15;
602  T17 = I[WS(is, 1)];
603  T18 = I[WS(is, 15)];
604  T19 = T17 + T18;
605  T1K = T18 - T17;
606  T14 = I[WS(is, 29)];
607  T15 = I[WS(is, 23)];
608  T16 = T14 - T15;
609  T1J = T14 + T15;
610  }
611  T1a = KP559016994 * (T16 + T19);
612  T1O = FNMS(KP475528258, T1K, KP293892626 * T1J);
613  T1L = FMA(KP475528258, T1J, KP293892626 * T1K);
614  T1b = T16 - T19;
615  T1d = FMA(KP250000000, T1b, T1c);
616  }
617  {
618  E T1i, T1F, T1v, T1C, T1s, T1D, T1l, T1G;
619  {
620  E T1g, T1h, T1t, T1u;
621  T1f = I[WS(is, 7)];
622  T1y = I[WS(is, 11)];
623  T1g = I[WS(is, 19)];
624  T1h = I[WS(is, 17)];
625  T1i = T1g + T1h;
626  T1F = T1g - T1h;
627  T1t = I[WS(is, 13)];
628  T1u = I[WS(is, 5)];
629  T1v = T1t - T1u;
630  T1C = T1t + T1u;
631  {
632  E T1q, T1r, T1j, T1k;
633  T1q = I[WS(is, 21)];
634  T1r = I[WS(is, 9)];
635  T1s = T1q - T1r;
636  T1D = T1q + T1r;
637  T1j = I[WS(is, 25)];
638  T1k = I[WS(is, 3)];
639  T1l = T1j + T1k;
640  T1G = T1k - T1j;
641  }
642  }
643  T1o = KP559016994 * (T1i + T1l);
644  T1w = KP559016994 * (T1s - T1v);
645  T1m = T1i - T1l;
646  T1n = FNMS(KP250000000, T1m, T1f);
647  T1x = T1s + T1v;
648  T1z = FMA(KP250000000, T1x, T1y);
649  T1P = FNMS(KP475528258, T1G, KP293892626 * T1F);
650  T1Q = FMA(KP293892626, T1D, KP475528258 * T1C);
651  T1R = T1P + T1Q;
652  T1E = FNMS(KP475528258, T1D, KP293892626 * T1C);
653  T1H = FMA(KP475528258, T1F, KP293892626 * T1G);
654  T1I = T1E - T1H;
655  }
656  T1Z = T1b - T1c;
657  T21 = T1x - T1y;
658  T20 = T1f + T1m;
659  T22 = KP371184290 * (T1Z + T20 + T21);
660  T4l = FMA(KP462201919, T1Z, KP155909426 * T20) - (KP618111346 * T21);
661  T2R = FMA(KP258006924, T1Z, KP102097497 * T21) - (KP360104421 * T20);
662  {
663  E T2I, T2F, T2G, T2J, T1T, T1V, T1U;
664  T1S = T1O + T1R;
665  T2I = KP3_464101615 * (T1Q - T1P);
666  T2F = FNMS(KP4_000000000, T1O, KP2_000000000 * T1R);
667  T1T = T1n - T1o;
668  T1V = T1a + T1d;
669  T1U = T1w + T1z;
670  T2G = KP1_732050807 * (T1T + T1U);
671  T2J = T1U + FNMA(KP2_000000000, T1V, T1T);
672  T1W = T1T - T1U - T1V;
673  T1X = FNMS(KP202100941, T1W, KP622681257 * T1S);
674  {
675  E T2M, T2N, T2H, T2K;
676  T2M = T2F + T2G;
677  T2N = T2J - T2I;
678  T2O = FNMS(KP183215435, T2N, KP029606561 * T2M);
679  T3t = FMA(KP183215435, T2M, KP029606561 * T2N);
680  T2H = T2F - T2G;
681  T2K = T2I + T2J;
682  T2L = FMA(KP015708004, T2H, KP184926209 * T2K);
683  T3s = FNMS(KP015708004, T2K, KP184926209 * T2H);
684  }
685  }
686  {
687  E T2y, T2v, T2x, T2u, T1e, T1A, T1p;
688  T1M = T1I - T1L;
689  T2y = FMA(KP4_000000000, T1L, KP2_000000000 * T1I);
690  T2v = KP3_464101615 * (T1H + T1E);
691  T1e = T1a - T1d;
692  T1A = T1w - T1z;
693  T1p = T1n + T1o;
694  T2x = KP1_732050807 * (T1A - T1p);
695  T2u = FMS(KP2_000000000, T1e, T1A) - T1p;
696  T1B = T1e + T1p + T1A;
697  T1N = FNMS(KP245522678, T1M, KP350296205 * T1B);
698  {
699  E T2B, T2C, T2w, T2z;
700  T2B = T2v + T2u;
701  T2C = T2y + T2x;
702  T2D = FNMS(KP183845747, T2C, KP025400502 * T2B);
703  T3q = FMA(KP183845747, T2B, KP025400502 * T2C);
704  T2w = T2u - T2v;
705  T2z = T2x - T2y;
706  T2A = FNMS(KP184517712, T2z, KP019941366 * T2w);
707  T3p = FMA(KP184517712, T2w, KP019941366 * T2z);
708  }
709  }
710  }
711  {
712  E T2, Tw, TZ, TI, T9, Tv, Tb, Tk, Tz, TD, Ti, Ty, Tr, TB, TW;
713  E TX, TY, TL, TO, TP, Ta, Ts, Tj;
714  T1 = I[0];
715  {
716  E T8, TG, T5, TH;
717  T2 = I[WS(is, 4)];
718  {
719  E T6, T7, T3, T4;
720  T6 = I[WS(is, 16)];
721  T7 = I[WS(is, 30)];
722  T8 = T6 - T7;
723  TG = T6 + T7;
724  T3 = I[WS(is, 8)];
725  T4 = I[WS(is, 2)];
726  T5 = T3 - T4;
727  TH = T3 + T4;
728  }
729  Tw = KP559016994 * (T5 - T8);
730  TZ = FMA(KP475528258, TH, KP293892626 * TG);
731  TI = FNMS(KP293892626, TH, KP475528258 * TG);
732  T9 = T5 + T8;
733  Tv = FNMS(KP250000000, T9, T2);
734  }
735  {
736  E Te, TJ, Tq, TN, Tn, TM, Th, TK, TC;
737  {
738  E Tc, Td, To, Tp;
739  Tb = I[WS(is, 24)];
740  Tk = I[WS(is, 20)];
741  Tc = I[WS(is, 12)];
742  Td = I[WS(is, 14)];
743  Te = Tc - Td;
744  TJ = Tc + Td;
745  To = I[WS(is, 26)];
746  Tp = I[WS(is, 18)];
747  Tq = To + Tp;
748  TN = Tp - To;
749  {
750  E Tl, Tm, Tf, Tg;
751  Tl = I[WS(is, 10)];
752  Tm = I[WS(is, 22)];
753  Tn = Tl + Tm;
754  TM = Tm - Tl;
755  Tf = I[WS(is, 28)];
756  Tg = I[WS(is, 6)];
757  Th = Tf - Tg;
758  TK = Tf + Tg;
759  }
760  }
761  Tz = KP559016994 * (Te - Th);
762  TC = Tq - Tn;
763  TD = KP559016994 * TC;
764  Ti = Te + Th;
765  Ty = FNMS(KP250000000, Ti, Tb);
766  Tr = Tn + Tq;
767  TB = FMA(KP250000000, Tr, Tk);
768  TW = FNMS(KP293892626, TK, KP475528258 * TJ);
769  TX = FMA(KP475528258, TM, KP293892626 * TN);
770  TY = TW + TX;
771  TL = FMA(KP293892626, TJ, KP475528258 * TK);
772  TO = FNMS(KP475528258, TN, KP293892626 * TM);
773  TP = TL + TO;
774  }
775  Ta = T2 + T9;
776  Ts = Tk - Tr;
777  Tj = Tb + Ti;
778  Tt = Ta + Tj + Ts;
779  Tu = FNMS(KP066666666, Tt, T1);
780  T47 = FNMS(KP387067417, Ts, KP638094290 * Tj) - (KP251026872 * Ta);
781  T25 = FNMS(KP296373721, Ts, KP341720569 * Ta) - (KP045346848 * Tj);
782  {
783  E T2l, T2i, T2h, T2k, TS, TU, TT;
784  T10 = TY - TZ;
785  T2l = KP3_464101615 * (TX - TW);
786  T2i = FMA(KP4_000000000, TZ, KP2_000000000 * TY);
787  TS = Tw + Tv;
788  TU = TB + TD;
789  TT = Tz + Ty;
790  T2h = KP1_732050807 * (TT - TU);
791  T2k = FMS(KP2_000000000, TS, TU) - TT;
792  TV = TS + TT + TU;
793  T11 = FNMS(KP427405661, T10, KP303494444 * TV);
794  {
795  E T2o, T2p, T2j, T2m;
796  T2o = T2h - T2i;
797  T2p = T2k + T2l;
798  T2q = FMA(KP160793728, T2o, KP092681288 * T2p);
799  T3h = FNMS(KP092681288, T2o, KP160793728 * T2p);
800  T2j = T2h + T2i;
801  T2m = T2k - T2l;
802  T2n = FMA(KP183333495, T2j, KP028866483 * T2m);
803  T3i = FNMS(KP183333495, T2m, KP028866483 * T2j);
804  }
805  }
806  {
807  E T2a, T27, T26, T29, Tx, TE, TA;
808  TQ = TI + TP;
809  T2a = KP3_464101615 * (TO - TL);
810  T27 = FNMS(KP2_000000000, TP, KP4_000000000 * TI);
811  Tx = Tv - Tw;
812  TE = TB - TD;
813  TA = Ty - Tz;
814  T26 = KP1_732050807 * (TA - TE);
815  T29 = FMS(KP2_000000000, Tx, TE) - TA;
816  TF = Tx + TA + TE;
817  TR = FMA(KP348438623, TF, KP255877341 * TQ);
818  {
819  E T2d, T2e, T28, T2b;
820  T2d = T26 + T27;
821  T2e = T29 + T2a;
822  T2f = FMA(KP147857608, T2d, KP112172063 * T2e);
823  T3e = FNMS(KP112172063, T2d, KP147857608 * T2e);
824  T28 = T26 - T27;
825  T2b = T29 - T2a;
826  T2c = FMA(KP000412259, T28, KP185591687 * T2b);
827  T3f = FNMS(KP000412259, T2b, KP185591687 * T28);
828  }
829  }
830  }
831  O[WS(os, 15)] = FMA(KP2_000000000, Tt, T1);
832  {
833  E T3k, T3Q, T4g, T4D, T4r, T4H, T3v, T3T, T2t, T3d, T3P, T4T, T4d, T4E, T4U;
834  E T4o, T4G, T2S, T3o, T3S, T24, T2U, T32, T3A, T3N, T3Z, T3K, T3Y, T39, T3B;
835  {
836  E T3g, T3j, T4e, T4f;
837  T3g = T3e - T3f;
838  T3j = T3h - T3i;
839  T3k = FMA(KP587785252, T3g, KP951056516 * T3j);
840  T3Q = FNMS(KP587785252, T3j, KP951056516 * T3g);
841  T4e = T2c + T2f;
842  T4f = T2n + T2q;
843  T4g = FMA(KP1_018073920, T4e, KP1_647278207 * T4f);
844  T4D = FNMS(KP1_647278207, T4e, KP1_018073920 * T4f);
845  }
846  {
847  E T4p, T4q, T3r, T3u;
848  T4p = T2L - T2O;
849  T4q = T2D - T2A;
850  T4r = FNMS(KP1_647278207, T4q, KP1_018073920 * T4p);
851  T4H = FMA(KP1_018073920, T4q, KP1_647278207 * T4p);
852  T3r = T3p - T3q;
853  T3u = T3s - T3t;
854  T3v = FMA(KP951056516, T3r, KP587785252 * T3u);
855  T3T = FNMS(KP587785252, T3r, KP951056516 * T3u);
856  }
857  {
858  E T2s, T3c, T2g, T2r, T3b;
859  T2g = T2c - T2f;
860  T2r = T2n - T2q;
861  T2s = T2g + T2r;
862  T3c = KP559016994 * (T2r - T2g);
863  T2t = T25 + T2s;
864  T3b = FNMS(KP250000000, T2s, T25);
865  T3d = T3b + T3c;
866  T3P = T3b - T3c;
867  }
868  {
869  E T4a, T4c, T48, T49, T4b;
870  T48 = T3f + T3e;
871  T49 = T3i + T3h;
872  T4a = T48 + T49;
873  T4c = KP968245836 * (T49 - T48);
874  T4T = FMA(KP1_732050807, T4a, T47);
875  T4b = FNMS(KP433012701, T4a, T47);
876  T4d = T4b + T4c;
877  T4E = T4b - T4c;
878  }
879  {
880  E T4k, T4m, T4i, T4j, T4n;
881  T4i = T3p + T3q;
882  T4j = T3s + T3t;
883  T4k = KP968245836 * (T4i - T4j);
884  T4m = T4i + T4j;
885  T4U = FMA(KP1_732050807, T4m, T4l);
886  T4n = FNMS(KP433012701, T4m, T4l);
887  T4o = T4k + T4n;
888  T4G = T4n - T4k;
889  }
890  {
891  E T2Q, T3n, T2E, T2P, T3m;
892  T2E = T2A + T2D;
893  T2P = T2L + T2O;
894  T2Q = T2E + T2P;
895  T3n = KP559016994 * (T2P - T2E);
896  T2S = T2Q - T2R;
897  T3m = FMA(KP250000000, T2Q, T2R);
898  T3o = T3m + T3n;
899  T3S = T3m - T3n;
900  }
901  {
902  E T2X, T34, T13, T2W, T23, T33, T38, T3M, T31, T3J, T12, T1Y;
903  T2X = KP1_118033988 * (T11 - TR);
904  T34 = KP1_118033988 * (T1X - T1N);
905  T12 = TR + T11;
906  T13 = FMA(KP2_000000000, T12, Tu);
907  T2W = FNMS(KP500000000, T12, Tu);
908  T1Y = T1N + T1X;
909  T23 = FMS(KP2_000000000, T1Y, T22);
910  T33 = FMA(KP500000000, T1Y, T22);
911  {
912  E T36, T37, T2Z, T30;
913  T36 = FMA(KP700592410, T1M, KP122761339 * T1B);
914  T37 = FMA(KP404201883, T1S, KP311340628 * T1W);
915  T38 = FMA(KP1_902113032, T36, KP1_175570504 * T37);
916  T3M = FNMS(KP1_175570504, T36, KP1_902113032 * T37);
917  T2Z = FNMS(KP127938670, TF, KP696877247 * TQ);
918  T30 = FMA(KP213702830, TV, KP606988889 * T10);
919  T31 = FMA(KP1_175570504, T2Z, KP1_902113032 * T30);
920  T3J = FNMS(KP1_175570504, T30, KP1_902113032 * T2Z);
921  }
922  {
923  E T2Y, T3L, T3I, T35;
924  T24 = T13 - T23;
925  T2U = T13 + T23;
926  T2Y = T2W + T2X;
927  T32 = T2Y - T31;
928  T3A = T2Y + T31;
929  T3L = T33 - T34;
930  T3N = T3L - T3M;
931  T3Z = T3L + T3M;
932  T3I = T2W - T2X;
933  T3K = T3I - T3J;
934  T3Y = T3I + T3J;
935  T35 = T33 + T34;
936  T39 = T35 - T38;
937  T3B = T35 + T38;
938  }
939  }
940  {
941  E T4X, T2T, T4W, T4V, T2V, T4S;
942  T4X = T4T + T4U;
943  T2T = T2t - T2S;
944  T4W = T24 - T2T;
945  O[WS(os, 30)] = FMA(KP2_000000000, T2T, T24);
946  O[WS(os, 28)] = T4W + T4X;
947  O[WS(os, 18)] = T4W - T4X;
948  T4V = T4T - T4U;
949  T2V = T2t + T2S;
950  T4S = T2U - T2V;
951  O[0] = FMA(KP2_000000000, T2V, T2U);
952  O[WS(os, 2)] = T4S + T4V;
953  O[WS(os, 12)] = T4S - T4V;
954  {
955  E T3a, T3y, T3x, T3z, T4t, T4v, T46, T4u;
956  T3a = T32 - T39;
957  T3y = T32 + T39;
958  {
959  E T3l, T3w, T4h, T4s;
960  T3l = T3d - T3k;
961  T3w = T3o - T3v;
962  T3x = T3l - T3w;
963  T3z = T3l + T3w;
964  T4h = T4d - T4g;
965  T4s = T4o - T4r;
966  T4t = T4h - T4s;
967  T4v = T4h + T4s;
968  }
969  O[WS(os, 7)] = FMA(KP2_000000000, T3x, T3a);
970  O[WS(os, 23)] = FMA(KP2_000000000, T3z, T3y);
971  T46 = T3a - T3x;
972  O[WS(os, 1)] = T46 - T4t;
973  O[WS(os, 24)] = T46 + T4t;
974  T4u = T3y - T3z;
975  O[WS(os, 29)] = T4u - T4v;
976  O[WS(os, 6)] = T4u + T4v;
977  }
978  }
979  {
980  E T40, T44, T43, T45, T4P, T4R, T4M, T4Q;
981  T40 = T3Y - T3Z;
982  T44 = T3Y + T3Z;
983  {
984  E T41, T42, T4N, T4O;
985  T41 = T3P + T3Q;
986  T42 = T3S + T3T;
987  T43 = T41 - T42;
988  T45 = T41 + T42;
989  T4N = T4E - T4D;
990  T4O = T4H + T4G;
991  T4P = T4N - T4O;
992  T4R = T4N + T4O;
993  }
994  O[WS(os, 11)] = FMA(KP2_000000000, T43, T40);
995  O[WS(os, 19)] = FMA(KP2_000000000, T45, T44);
996  T4M = T40 - T43;
997  O[WS(os, 22)] = T4M - T4P;
998  O[WS(os, 4)] = T4M + T4P;
999  T4Q = T44 - T45;
1000  O[WS(os, 8)] = T4Q - T4R;
1001  O[WS(os, 26)] = T4Q + T4R;
1002  }
1003  {
1004  E T3O, T3W, T3V, T3X, T4J, T4L, T4C, T4K;
1005  T3O = T3K - T3N;
1006  T3W = T3K + T3N;
1007  {
1008  E T3R, T3U, T4F, T4I;
1009  T3R = T3P - T3Q;
1010  T3U = T3S - T3T;
1011  T3V = T3R - T3U;
1012  T3X = T3R + T3U;
1013  T4F = T4D + T4E;
1014  T4I = T4G - T4H;
1015  T4J = T4F - T4I;
1016  T4L = T4F + T4I;
1017  }
1018  O[WS(os, 13)] = FMA(KP2_000000000, T3V, T3O);
1019  O[WS(os, 17)] = FMA(KP2_000000000, T3X, T3W);
1020  T4C = T3O - T3V;
1021  O[WS(os, 27)] = T4C - T4J;
1022  O[WS(os, 5)] = T4C + T4J;
1023  T4K = T3W - T3X;
1024  O[WS(os, 3)] = T4K - T4L;
1025  O[WS(os, 25)] = T4K + T4L;
1026  }
1027  {
1028  E T3C, T3G, T3F, T3H, T4z, T4B, T4w, T4A;
1029  T3C = T3A - T3B;
1030  T3G = T3A + T3B;
1031  {
1032  E T3D, T3E, T4x, T4y;
1033  T3D = T3d + T3k;
1034  T3E = T3o + T3v;
1035  T3F = T3D - T3E;
1036  T3H = T3D + T3E;
1037  T4x = T4g + T4d;
1038  T4y = T4r + T4o;
1039  T4z = T4x - T4y;
1040  T4B = T4x + T4y;
1041  }
1042  O[WS(os, 16)] = FMA(KP2_000000000, T3F, T3C);
1043  O[WS(os, 14)] = FMA(KP2_000000000, T3H, T3G);
1044  T4w = T3C - T3F;
1045  O[WS(os, 21)] = T4w - T4z;
1046  O[WS(os, 20)] = T4w + T4z;
1047  T4A = T3G - T3H;
1048  O[WS(os, 9)] = T4A - T4B;
1049  O[WS(os, 10)] = T4A + T4B;
1050  }
1051  }
1052 }

◆ e10_31()

void e10_31 ( const R *  I,
R *  O 
)

DCT-II or "the" DCT transformation of 31-point signal This function contains 320 FP additions, 170 FP multiplications, (or, 229 additions, 79 multiplications, 91 fused multiply/add), 150 stack variables, 64 constants, and 62 memory accesses.

DCT-II or "the" DCT transformation of 31-point signal.

Parameters
Iinput signal array of 31 values
Ooutput array with 31 DCT coefficients

Definition at line 117 of file DiscreteCosineTransform_31points.cc.