Belle II Software development
DiscreteCosineTransform_31points.cc File Reference

Generated by: ../../../genfft/gen_r2r.native -compact -variables 4 -pipeline-latency 4 -redft10 -n 31 -name e10_31 -include rdft/scalar/r2r.h Generated by: ../../../genfft/gen_r2r.native -compact -variables 4 -pipeline-latency 4 -redft01 -n 31 -name e01_31 -include rdft/scalar/r2r.h. More...

Go to the source code of this file.

Macros

#define K(x)   ((E) x)
 macro autogenerated by FFTW
 
#define DK(name, value)   const E name = K(value)
 macro autogenerated by FFTW
 
#define WS(x, y)   (y)
 macro autogenerated by FFTW
 
#define FMA(a, b, c)   (((a) * (b)) + (c))
 fused multiply add
 
#define FMS(a, b, c)   (((a) * (b)) - (c))
 fused multiply subtract
 
#define FNMA(a, b, c)   (- (((a) * (b)) + (c)))
 fused negative multiply add
 
#define FNMS(a, b, c)   ((c) - ((a) * (b)))
 fused negative multiply subtract
 

Typedefs

typedef double R
 typedef autogenerated by FFTW
 
typedef R E
 internal precision of FFTW codelets
 

Functions

void e10_31 (const R *I, R *O)
 DCT-II or "the" DCT transformation of 31-point signal This function contains 320 FP additions, 170 FP multiplications, (or, 229 additions, 79 multiplications, 91 fused multiply/add), 150 stack variables, 64 constants, and 62 memory accesses.
 
void e01_31 (const R *I, R *O)
 DCT-III or "the inverse" DCT transformation of 31-point signal This function contains 320 FP additions, 169 FP multiplications, (or, 228 additions, 77 multiplications, 92 fused multiply/add), 149 stack variables, 64 constants, and 62 memory accesses.
 

Detailed Description

Generated by: ../../../genfft/gen_r2r.native -compact -variables 4 -pipeline-latency 4 -redft10 -n 31 -name e10_31 -include rdft/scalar/r2r.h Generated by: ../../../genfft/gen_r2r.native -compact -variables 4 -pipeline-latency 4 -redft01 -n 31 -name e01_31 -include rdft/scalar/r2r.h.

This file was autogenerated by the FFTW package explicitly for 31-point DCT transformation. It is optimized version of the following functions:

DCT-II or "the" DCT transformation of 31-point signal void e10_31(const double *A, double *C){ for(int i=0;i<31;i++){ double c = 0; for(int j=0;j<31;j++) c += A[j]*cos(M_PI/31 * (j+0.5) * i); C[i] = 2*c; } }

and DCT-III or "the inverse" DCT transformation of 31-point signal void e01_31(const double *C, double *A){ for(int i=0;i<31;i++){ double a = 0.5*C[0]; for(int j=1;j<31;j++) a += C[j]*cos(M_PI/31*j * (i+0.5)); A[i] = 2*a; } }

Those functions and the functions defined below give exactly the same results (up to floating point rounding errors).

Definition in file DiscreteCosineTransform_31points.cc.

Macro Definition Documentation

◆ DK

#define DK (   name,
  value 
)    const E name = K(value)

macro autogenerated by FFTW

Definition at line 38 of file DiscreteCosineTransform_31points.cc.

◆ FMA

#define FMA (   a,
  b,
 
)    (((a) * (b)) + (c))

fused multiply add

Definition at line 40 of file DiscreteCosineTransform_31points.cc.

◆ FMS

#define FMS (   a,
  b,
 
)    (((a) * (b)) - (c))

fused multiply subtract

Definition at line 41 of file DiscreteCosineTransform_31points.cc.

◆ FNMA

#define FNMA (   a,
  b,
 
)    (- (((a) * (b)) + (c)))

fused negative multiply add

Definition at line 42 of file DiscreteCosineTransform_31points.cc.

◆ FNMS

#define FNMS (   a,
  b,
 
)    ((c) - ((a) * (b)))

fused negative multiply subtract

Definition at line 43 of file DiscreteCosineTransform_31points.cc.

◆ K

#define K (   x)    ((E) x)

macro autogenerated by FFTW

Definition at line 37 of file DiscreteCosineTransform_31points.cc.

◆ WS

#define WS (   x,
 
)    (y)

macro autogenerated by FFTW

Definition at line 39 of file DiscreteCosineTransform_31points.cc.

Typedef Documentation

◆ E

typedef R E

internal precision of FFTW codelets

Definition at line 36 of file DiscreteCosineTransform_31points.cc.

◆ R

typedef double R

typedef autogenerated by FFTW

Definition at line 35 of file DiscreteCosineTransform_31points.cc.

Function Documentation

◆ e01_31()

void e01_31 ( const R I,
R O 
)

DCT-III or "the inverse" DCT transformation of 31-point signal This function contains 320 FP additions, 169 FP multiplications, (or, 228 additions, 77 multiplications, 92 fused multiply/add), 149 stack variables, 64 constants, and 62 memory accesses.

DCT-III or "the inverse" DCT transformation of 31-point signal.

Parameters
Iinput coefficients
Ooutput signal amplitudes

Definition at line 596 of file DiscreteCosineTransform_31points.cc.

597{
598 E T22, T4l, T2R, T1S, T1W, T1X, T2O, T3t, T2L, T3s, T1M, T1B, T1N, T2D, T3q;
599 E T2A, T3p, T1, Tt, Tu, T47, T25, T10, TV, T11, T2q, T3h, T2n, T3i, TQ;
600 E TF, TR, T2f, T3e, T2c, T3f;
601 {
602 E T1c, T1a, T1O, T1L, T1b, T1d, T1f, T1y, T1o, T1w, T1m, T1n, T1x, T1z, T1P;
603 E T1Q, T1R, T1E, T1H, T1I, T1Z, T21, T20;
604 {
605 E T19, T1K, T16, T1J;
606 T1c = I[WS(is, 27)];
607 {
608 E T17, T18, T14, T15;
609 T17 = I[WS(is, 1)];
610 T18 = I[WS(is, 15)];
611 T19 = T17 + T18;
612 T1K = T18 - T17;
613 T14 = I[WS(is, 29)];
614 T15 = I[WS(is, 23)];
615 T16 = T14 - T15;
616 T1J = T14 + T15;
617 }
618 T1a = KP559016994 * (T16 + T19);
619 T1O = FNMS(KP475528258, T1K, KP293892626 * T1J);
620 T1L = FMA(KP475528258, T1J, KP293892626 * T1K);
621 T1b = T16 - T19;
622 T1d = FMA(KP250000000, T1b, T1c);
623 }
624 {
625 E T1i, T1F, T1v, T1C, T1s, T1D, T1l, T1G;
626 {
627 E T1g, T1h, T1t, T1u;
628 T1f = I[WS(is, 7)];
629 T1y = I[WS(is, 11)];
630 T1g = I[WS(is, 19)];
631 T1h = I[WS(is, 17)];
632 T1i = T1g + T1h;
633 T1F = T1g - T1h;
634 T1t = I[WS(is, 13)];
635 T1u = I[WS(is, 5)];
636 T1v = T1t - T1u;
637 T1C = T1t + T1u;
638 {
639 E T1q, T1r, T1j, T1k;
640 T1q = I[WS(is, 21)];
641 T1r = I[WS(is, 9)];
642 T1s = T1q - T1r;
643 T1D = T1q + T1r;
644 T1j = I[WS(is, 25)];
645 T1k = I[WS(is, 3)];
646 T1l = T1j + T1k;
647 T1G = T1k - T1j;
648 }
649 }
650 T1o = KP559016994 * (T1i + T1l);
651 T1w = KP559016994 * (T1s - T1v);
652 T1m = T1i - T1l;
653 T1n = FNMS(KP250000000, T1m, T1f);
654 T1x = T1s + T1v;
655 T1z = FMA(KP250000000, T1x, T1y);
656 T1P = FNMS(KP475528258, T1G, KP293892626 * T1F);
657 T1Q = FMA(KP293892626, T1D, KP475528258 * T1C);
658 T1R = T1P + T1Q;
659 T1E = FNMS(KP475528258, T1D, KP293892626 * T1C);
660 T1H = FMA(KP475528258, T1F, KP293892626 * T1G);
661 T1I = T1E - T1H;
662 }
663 T1Z = T1b - T1c;
664 T21 = T1x - T1y;
665 T20 = T1f + T1m;
666 T22 = KP371184290 * (T1Z + T20 + T21);
667 T4l = FMA(KP462201919, T1Z, KP155909426 * T20) - (KP618111346 * T21);
668 T2R = FMA(KP258006924, T1Z, KP102097497 * T21) - (KP360104421 * T20);
669 {
670 E T2I, T2F, T2G, T2J, T1T, T1V, T1U;
671 T1S = T1O + T1R;
672 T2I = KP3_464101615 * (T1Q - T1P);
673 T2F = FNMS(KP4_000000000, T1O, KP2_000000000 * T1R);
674 T1T = T1n - T1o;
675 T1V = T1a + T1d;
676 T1U = T1w + T1z;
677 T2G = KP1_732050807 * (T1T + T1U);
678 T2J = T1U + FNMA(KP2_000000000, T1V, T1T);
679 T1W = T1T - T1U - T1V;
680 T1X = FNMS(KP202100941, T1W, KP622681257 * T1S);
681 {
682 E T2M, T2N, T2H, T2K;
683 T2M = T2F + T2G;
684 T2N = T2J - T2I;
685 T2O = FNMS(KP183215435, T2N, KP029606561 * T2M);
686 T3t = FMA(KP183215435, T2M, KP029606561 * T2N);
687 T2H = T2F - T2G;
688 T2K = T2I + T2J;
689 T2L = FMA(KP015708004, T2H, KP184926209 * T2K);
690 T3s = FNMS(KP015708004, T2K, KP184926209 * T2H);
691 }
692 }
693 {
694 E T2y, T2v, T2x, T2u, T1e, T1A, T1p;
695 T1M = T1I - T1L;
696 T2y = FMA(KP4_000000000, T1L, KP2_000000000 * T1I);
697 T2v = KP3_464101615 * (T1H + T1E);
698 T1e = T1a - T1d;
699 T1A = T1w - T1z;
700 T1p = T1n + T1o;
701 T2x = KP1_732050807 * (T1A - T1p);
702 T2u = FMS(KP2_000000000, T1e, T1A) - T1p;
703 T1B = T1e + T1p + T1A;
704 T1N = FNMS(KP245522678, T1M, KP350296205 * T1B);
705 {
706 E T2B, T2C, T2w, T2z;
707 T2B = T2v + T2u;
708 T2C = T2y + T2x;
709 T2D = FNMS(KP183845747, T2C, KP025400502 * T2B);
710 T3q = FMA(KP183845747, T2B, KP025400502 * T2C);
711 T2w = T2u - T2v;
712 T2z = T2x - T2y;
713 T2A = FNMS(KP184517712, T2z, KP019941366 * T2w);
714 T3p = FMA(KP184517712, T2w, KP019941366 * T2z);
715 }
716 }
717 }
718 {
719 E T2, Tw, TZ, TI, T9, Tv, Tb, Tk, Tz, TD, Ti, Ty, Tr, TB, TW;
720 E TX, TY, TL, TO, TP, Ta, Ts, Tj;
721 T1 = I[0];
722 {
723 E T8, TG, T5, TH;
724 T2 = I[WS(is, 4)];
725 {
726 E T6, T7, T3, T4;
727 T6 = I[WS(is, 16)];
728 T7 = I[WS(is, 30)];
729 T8 = T6 - T7;
730 TG = T6 + T7;
731 T3 = I[WS(is, 8)];
732 T4 = I[WS(is, 2)];
733 T5 = T3 - T4;
734 TH = T3 + T4;
735 }
736 Tw = KP559016994 * (T5 - T8);
737 TZ = FMA(KP475528258, TH, KP293892626 * TG);
738 TI = FNMS(KP293892626, TH, KP475528258 * TG);
739 T9 = T5 + T8;
740 Tv = FNMS(KP250000000, T9, T2);
741 }
742 {
743 E Te, TJ, Tq, TN, Tn, TM, Th, TK, TC;
744 {
745 E Tc, Td, To, Tp;
746 Tb = I[WS(is, 24)];
747 Tk = I[WS(is, 20)];
748 Tc = I[WS(is, 12)];
749 Td = I[WS(is, 14)];
750 Te = Tc - Td;
751 TJ = Tc + Td;
752 To = I[WS(is, 26)];
753 Tp = I[WS(is, 18)];
754 Tq = To + Tp;
755 TN = Tp - To;
756 {
757 E Tl, Tm, Tf, Tg;
758 Tl = I[WS(is, 10)];
759 Tm = I[WS(is, 22)];
760 Tn = Tl + Tm;
761 TM = Tm - Tl;
762 Tf = I[WS(is, 28)];
763 Tg = I[WS(is, 6)];
764 Th = Tf - Tg;
765 TK = Tf + Tg;
766 }
767 }
768 Tz = KP559016994 * (Te - Th);
769 TC = Tq - Tn;
770 TD = KP559016994 * TC;
771 Ti = Te + Th;
772 Ty = FNMS(KP250000000, Ti, Tb);
773 Tr = Tn + Tq;
774 TB = FMA(KP250000000, Tr, Tk);
775 TW = FNMS(KP293892626, TK, KP475528258 * TJ);
776 TX = FMA(KP475528258, TM, KP293892626 * TN);
777 TY = TW + TX;
778 TL = FMA(KP293892626, TJ, KP475528258 * TK);
779 TO = FNMS(KP475528258, TN, KP293892626 * TM);
780 TP = TL + TO;
781 }
782 Ta = T2 + T9;
783 Ts = Tk - Tr;
784 Tj = Tb + Ti;
785 Tt = Ta + Tj + Ts;
786 Tu = FNMS(KP066666666, Tt, T1);
787 T47 = FNMS(KP387067417, Ts, KP638094290 * Tj) - (KP251026872 * Ta);
788 T25 = FNMS(KP296373721, Ts, KP341720569 * Ta) - (KP045346848 * Tj);
789 {
790 E T2l, T2i, T2h, T2k, TS, TU, TT;
791 T10 = TY - TZ;
792 T2l = KP3_464101615 * (TX - TW);
793 T2i = FMA(KP4_000000000, TZ, KP2_000000000 * TY);
794 TS = Tw + Tv;
795 TU = TB + TD;
796 TT = Tz + Ty;
797 T2h = KP1_732050807 * (TT - TU);
798 T2k = FMS(KP2_000000000, TS, TU) - TT;
799 TV = TS + TT + TU;
800 T11 = FNMS(KP427405661, T10, KP303494444 * TV);
801 {
802 E T2o, T2p, T2j, T2m;
803 T2o = T2h - T2i;
804 T2p = T2k + T2l;
805 T2q = FMA(KP160793728, T2o, KP092681288 * T2p);
806 T3h = FNMS(KP092681288, T2o, KP160793728 * T2p);
807 T2j = T2h + T2i;
808 T2m = T2k - T2l;
809 T2n = FMA(KP183333495, T2j, KP028866483 * T2m);
810 T3i = FNMS(KP183333495, T2m, KP028866483 * T2j);
811 }
812 }
813 {
814 E T2a, T27, T26, T29, Tx, TE, TA;
815 TQ = TI + TP;
816 T2a = KP3_464101615 * (TO - TL);
817 T27 = FNMS(KP2_000000000, TP, KP4_000000000 * TI);
818 Tx = Tv - Tw;
819 TE = TB - TD;
820 TA = Ty - Tz;
821 T26 = KP1_732050807 * (TA - TE);
822 T29 = FMS(KP2_000000000, Tx, TE) - TA;
823 TF = Tx + TA + TE;
824 TR = FMA(KP348438623, TF, KP255877341 * TQ);
825 {
826 E T2d, T2e, T28, T2b;
827 T2d = T26 + T27;
828 T2e = T29 + T2a;
829 T2f = FMA(KP147857608, T2d, KP112172063 * T2e);
830 T3e = FNMS(KP112172063, T2d, KP147857608 * T2e);
831 T28 = T26 - T27;
832 T2b = T29 - T2a;
833 T2c = FMA(KP000412259, T28, KP185591687 * T2b);
834 T3f = FNMS(KP000412259, T2b, KP185591687 * T28);
835 }
836 }
837 }
838 O[WS(os, 15)] = FMA(KP2_000000000, Tt, T1);
839 {
840 E T3k, T3Q, T4g, T4D, T4r, T4H, T3v, T3T, T2t, T3d, T3P, T4T, T4d, T4E, T4U;
841 E T4o, T4G, T2S, T3o, T3S, T24, T2U, T32, T3A, T3N, T3Z, T3K, T3Y, T39, T3B;
842 {
843 E T3g, T3j, T4e, T4f;
844 T3g = T3e - T3f;
845 T3j = T3h - T3i;
846 T3k = FMA(KP587785252, T3g, KP951056516 * T3j);
847 T3Q = FNMS(KP587785252, T3j, KP951056516 * T3g);
848 T4e = T2c + T2f;
849 T4f = T2n + T2q;
850 T4g = FMA(KP1_018073920, T4e, KP1_647278207 * T4f);
851 T4D = FNMS(KP1_647278207, T4e, KP1_018073920 * T4f);
852 }
853 {
854 E T4p, T4q, T3r, T3u;
855 T4p = T2L - T2O;
856 T4q = T2D - T2A;
857 T4r = FNMS(KP1_647278207, T4q, KP1_018073920 * T4p);
858 T4H = FMA(KP1_018073920, T4q, KP1_647278207 * T4p);
859 T3r = T3p - T3q;
860 T3u = T3s - T3t;
861 T3v = FMA(KP951056516, T3r, KP587785252 * T3u);
862 T3T = FNMS(KP587785252, T3r, KP951056516 * T3u);
863 }
864 {
865 E T2s, T3c, T2g, T2r, T3b;
866 T2g = T2c - T2f;
867 T2r = T2n - T2q;
868 T2s = T2g + T2r;
869 T3c = KP559016994 * (T2r - T2g);
870 T2t = T25 + T2s;
871 T3b = FNMS(KP250000000, T2s, T25);
872 T3d = T3b + T3c;
873 T3P = T3b - T3c;
874 }
875 {
876 E T4a, T4c, T48, T49, T4b;
877 T48 = T3f + T3e;
878 T49 = T3i + T3h;
879 T4a = T48 + T49;
880 T4c = KP968245836 * (T49 - T48);
881 T4T = FMA(KP1_732050807, T4a, T47);
882 T4b = FNMS(KP433012701, T4a, T47);
883 T4d = T4b + T4c;
884 T4E = T4b - T4c;
885 }
886 {
887 E T4k, T4m, T4i, T4j, T4n;
888 T4i = T3p + T3q;
889 T4j = T3s + T3t;
890 T4k = KP968245836 * (T4i - T4j);
891 T4m = T4i + T4j;
892 T4U = FMA(KP1_732050807, T4m, T4l);
893 T4n = FNMS(KP433012701, T4m, T4l);
894 T4o = T4k + T4n;
895 T4G = T4n - T4k;
896 }
897 {
898 E T2Q, T3n, T2E, T2P, T3m;
899 T2E = T2A + T2D;
900 T2P = T2L + T2O;
901 T2Q = T2E + T2P;
902 T3n = KP559016994 * (T2P - T2E);
903 T2S = T2Q - T2R;
904 T3m = FMA(KP250000000, T2Q, T2R);
905 T3o = T3m + T3n;
906 T3S = T3m - T3n;
907 }
908 {
909 E T2X, T34, T13, T2W, T23, T33, T38, T3M, T31, T3J, T12, T1Y;
910 T2X = KP1_118033988 * (T11 - TR);
911 T34 = KP1_118033988 * (T1X - T1N);
912 T12 = TR + T11;
913 T13 = FMA(KP2_000000000, T12, Tu);
914 T2W = FNMS(KP500000000, T12, Tu);
915 T1Y = T1N + T1X;
916 T23 = FMS(KP2_000000000, T1Y, T22);
917 T33 = FMA(KP500000000, T1Y, T22);
918 {
919 E T36, T37, T2Z, T30;
920 T36 = FMA(KP700592410, T1M, KP122761339 * T1B);
921 T37 = FMA(KP404201883, T1S, KP311340628 * T1W);
922 T38 = FMA(KP1_902113032, T36, KP1_175570504 * T37);
923 T3M = FNMS(KP1_175570504, T36, KP1_902113032 * T37);
924 T2Z = FNMS(KP127938670, TF, KP696877247 * TQ);
925 T30 = FMA(KP213702830, TV, KP606988889 * T10);
926 T31 = FMA(KP1_175570504, T2Z, KP1_902113032 * T30);
927 T3J = FNMS(KP1_175570504, T30, KP1_902113032 * T2Z);
928 }
929 {
930 E T2Y, T3L, T3I, T35;
931 T24 = T13 - T23;
932 T2U = T13 + T23;
933 T2Y = T2W + T2X;
934 T32 = T2Y - T31;
935 T3A = T2Y + T31;
936 T3L = T33 - T34;
937 T3N = T3L - T3M;
938 T3Z = T3L + T3M;
939 T3I = T2W - T2X;
940 T3K = T3I - T3J;
941 T3Y = T3I + T3J;
942 T35 = T33 + T34;
943 T39 = T35 - T38;
944 T3B = T35 + T38;
945 }
946 }
947 {
948 E T4X, T2T, T4W, T4V, T2V, T4S;
949 T4X = T4T + T4U;
950 T2T = T2t - T2S;
951 T4W = T24 - T2T;
952 O[WS(os, 30)] = FMA(KP2_000000000, T2T, T24);
953 O[WS(os, 28)] = T4W + T4X;
954 O[WS(os, 18)] = T4W - T4X;
955 T4V = T4T - T4U;
956 T2V = T2t + T2S;
957 T4S = T2U - T2V;
958 O[0] = FMA(KP2_000000000, T2V, T2U);
959 O[WS(os, 2)] = T4S + T4V;
960 O[WS(os, 12)] = T4S - T4V;
961 {
962 E T3a, T3y, T3x, T3z, T4t, T4v, T46, T4u;
963 T3a = T32 - T39;
964 T3y = T32 + T39;
965 {
966 E T3l, T3w, T4h, T4s;
967 T3l = T3d - T3k;
968 T3w = T3o - T3v;
969 T3x = T3l - T3w;
970 T3z = T3l + T3w;
971 T4h = T4d - T4g;
972 T4s = T4o - T4r;
973 T4t = T4h - T4s;
974 T4v = T4h + T4s;
975 }
976 O[WS(os, 7)] = FMA(KP2_000000000, T3x, T3a);
977 O[WS(os, 23)] = FMA(KP2_000000000, T3z, T3y);
978 T46 = T3a - T3x;
979 O[WS(os, 1)] = T46 - T4t;
980 O[WS(os, 24)] = T46 + T4t;
981 T4u = T3y - T3z;
982 O[WS(os, 29)] = T4u - T4v;
983 O[WS(os, 6)] = T4u + T4v;
984 }
985 }
986 {
987 E T40, T44, T43, T45, T4P, T4R, T4M, T4Q;
988 T40 = T3Y - T3Z;
989 T44 = T3Y + T3Z;
990 {
991 E T41, T42, T4N, T4O;
992 T41 = T3P + T3Q;
993 T42 = T3S + T3T;
994 T43 = T41 - T42;
995 T45 = T41 + T42;
996 T4N = T4E - T4D;
997 T4O = T4H + T4G;
998 T4P = T4N - T4O;
999 T4R = T4N + T4O;
1000 }
1001 O[WS(os, 11)] = FMA(KP2_000000000, T43, T40);
1002 O[WS(os, 19)] = FMA(KP2_000000000, T45, T44);
1003 T4M = T40 - T43;
1004 O[WS(os, 22)] = T4M - T4P;
1005 O[WS(os, 4)] = T4M + T4P;
1006 T4Q = T44 - T45;
1007 O[WS(os, 8)] = T4Q - T4R;
1008 O[WS(os, 26)] = T4Q + T4R;
1009 }
1010 {
1011 E T3O, T3W, T3V, T3X, T4J, T4L, T4C, T4K;
1012 T3O = T3K - T3N;
1013 T3W = T3K + T3N;
1014 {
1015 E T3R, T3U, T4F, T4I;
1016 T3R = T3P - T3Q;
1017 T3U = T3S - T3T;
1018 T3V = T3R - T3U;
1019 T3X = T3R + T3U;
1020 T4F = T4D + T4E;
1021 T4I = T4G - T4H;
1022 T4J = T4F - T4I;
1023 T4L = T4F + T4I;
1024 }
1025 O[WS(os, 13)] = FMA(KP2_000000000, T3V, T3O);
1026 O[WS(os, 17)] = FMA(KP2_000000000, T3X, T3W);
1027 T4C = T3O - T3V;
1028 O[WS(os, 27)] = T4C - T4J;
1029 O[WS(os, 5)] = T4C + T4J;
1030 T4K = T3W - T3X;
1031 O[WS(os, 3)] = T4K - T4L;
1032 O[WS(os, 25)] = T4K + T4L;
1033 }
1034 {
1035 E T3C, T3G, T3F, T3H, T4z, T4B, T4w, T4A;
1036 T3C = T3A - T3B;
1037 T3G = T3A + T3B;
1038 {
1039 E T3D, T3E, T4x, T4y;
1040 T3D = T3d + T3k;
1041 T3E = T3o + T3v;
1042 T3F = T3D - T3E;
1043 T3H = T3D + T3E;
1044 T4x = T4g + T4d;
1045 T4y = T4r + T4o;
1046 T4z = T4x - T4y;
1047 T4B = T4x + T4y;
1048 }
1049 O[WS(os, 16)] = FMA(KP2_000000000, T3F, T3C);
1050 O[WS(os, 14)] = FMA(KP2_000000000, T3H, T3G);
1051 T4w = T3C - T3F;
1052 O[WS(os, 21)] = T4w - T4z;
1053 O[WS(os, 20)] = T4w + T4z;
1054 T4A = T3G - T3H;
1055 O[WS(os, 9)] = T4A - T4B;
1056 O[WS(os, 10)] = T4A + T4B;
1057 }
1058 }
1059}
R E
internal precision of FFTW codelets
#define FMA(a, b, c)
fused multiply add
#define FNMA(a, b, c)
fused negative multiply add
#define FMS(a, b, c)
fused multiply subtract
#define WS(x, y)
macro autogenerated by FFTW
#define FNMS(a, b, c)
fused negative multiply subtract

◆ e10_31()

void e10_31 ( const R I,
R O 
)

DCT-II or "the" DCT transformation of 31-point signal This function contains 320 FP additions, 170 FP multiplications, (or, 229 additions, 79 multiplications, 91 fused multiply/add), 150 stack variables, 64 constants, and 62 memory accesses.

DCT-II or "the" DCT transformation of 31-point signal.

Parameters
Iinput signal array of 31 values
Ooutput array with 31 DCT coefficients

Definition at line 124 of file DiscreteCosineTransform_31points.cc.

125{
126 E T2J, T1H, T1J, T1I, T1K, T1T, T2S, T3a, T31, T3b, T3M, T1M, T1P, T1N, T1Q;
127 E T1i, T1l, T1A, T1D, T1b, T1k, T1v, T1C, T3n, T3I, T3y, T3D, T3S, T3V, T43;
128 E T46, T3P, T3U, T40, T45;
129 T2J = I[WS(is, 15)];
130 {
131 E TN, T2K, TW, T32, T16, T2T, T1d, T1e, T3e, T3d, T2R, T3p, TK, TO, Tu;
132 E T1p, T3i, T3F, T12, T17, T30, T3s, Tf, T1o, T3l, T3G, TT, TX, T39, T3v;
133 E TL, TM;
134 TL = I[WS(is, 17)];
135 TM = I[WS(is, 13)];
136 TN = TL - TM;
137 T2K = TL + TM;
138 {
139 E TU, TV, T14, T15;
140 TU = I[WS(is, 25)];
141 TV = I[WS(is, 5)];
142 TW = TU - TV;
143 T32 = TU + TV;
144 T14 = I[WS(is, 3)];
145 T15 = I[WS(is, 27)];
146 T16 = T14 - T15;
147 T2T = T14 + T15;
148 }
149 {
150 E Ty, T2L, TI, T2P, TB, T2M, TF, T2O;
151 {
152 E Tw, Tx, TG, TH;
153 Tw = I[WS(is, 14)];
154 Tx = I[WS(is, 16)];
155 Ty = Tw - Tx;
156 T2L = Tw + Tx;
157 TG = I[WS(is, 23)];
158 TH = I[WS(is, 7)];
159 TI = TG - TH;
160 T2P = TG + TH;
161 }
162 {
163 E Tz, TA, TD, TE;
164 Tz = I[WS(is, 19)];
165 TA = I[WS(is, 11)];
166 TB = Tz - TA;
167 T2M = Tz + TA;
168 TD = I[WS(is, 30)];
169 TE = I[0];
170 TF = TD - TE;
171 T2O = TD + TE;
172 }
173 T1d = Ty - TB;
174 T1e = TF - TI;
175 T3e = T2O - T2P;
176 T3d = T2M - T2L;
177 {
178 E T2N, T2Q, TC, TJ;
179 T2N = T2L + T2M;
180 T2Q = T2O + T2P;
181 T2R = T2N + T2Q;
182 T3p = KP559016994 * (T2Q - T2N);
183 TC = Ty + TB;
184 TJ = TF + TI;
185 TK = KP559016994 * (TC - TJ);
186 TO = TC + TJ;
187 }
188 }
189 {
190 E Ti, T2U, Ts, T2Y, Tl, T2V, Tp, T2X;
191 {
192 E Tg, Th, Tq, Tr;
193 Tg = I[WS(is, 9)];
194 Th = I[WS(is, 21)];
195 Ti = Tg - Th;
196 T2U = Tg + Th;
197 Tq = I[WS(is, 29)];
198 Tr = I[WS(is, 1)];
199 Ts = Tq - Tr;
200 T2Y = Tq + Tr;
201 }
202 {
203 E Tj, Tk, Tn, To;
204 Tj = I[WS(is, 8)];
205 Tk = I[WS(is, 22)];
206 Tl = Tj - Tk;
207 T2V = Tj + Tk;
208 Tn = I[WS(is, 18)];
209 To = I[WS(is, 12)];
210 Tp = Tn - To;
211 T2X = Tn + To;
212 }
213 {
214 E Tm, Tt, T3g, T3h;
215 Tm = Ti - Tl;
216 Tt = Tp - Ts;
217 Tu = FMA(KP475528258, Tm, KP293892626 * Tt);
218 T1p = FNMS(KP475528258, Tt, KP293892626 * Tm);
219 T3g = T2V - T2U;
220 T3h = T2X - T2Y;
221 T3i = FMA(KP293892626, T3g, KP475528258 * T3h);
222 T3F = FNMS(KP293892626, T3h, KP475528258 * T3g);
223 }
224 {
225 E T10, T11, T2W, T2Z;
226 T10 = Ti + Tl;
227 T11 = Tp + Ts;
228 T12 = T10 - T11;
229 T17 = T10 + T11;
230 T2W = T2U + T2V;
231 T2Z = T2X + T2Y;
232 T30 = T2W + T2Z;
233 T3s = KP559016994 * (T2Z - T2W);
234 }
235 }
236 {
237 E T3, T33, Td, T37, T6, T34, Ta, T36;
238 {
239 E T1, T2, Tb, Tc;
240 T1 = I[WS(is, 10)];
241 T2 = I[WS(is, 20)];
242 T3 = T1 - T2;
243 T33 = T1 + T2;
244 Tb = I[WS(is, 6)];
245 Tc = I[WS(is, 24)];
246 Td = Tb - Tc;
247 T37 = Tb + Tc;
248 }
249 {
250 E T4, T5, T8, T9;
251 T4 = I[WS(is, 26)];
252 T5 = I[WS(is, 4)];
253 T6 = T4 - T5;
254 T34 = T4 + T5;
255 T8 = I[WS(is, 28)];
256 T9 = I[WS(is, 2)];
257 Ta = T8 - T9;
258 T36 = T8 + T9;
259 }
260 {
261 E T7, Te, T3j, T3k;
262 T7 = T3 - T6;
263 Te = Ta - Td;
264 Tf = FMA(KP475528258, T7, KP293892626 * Te);
265 T1o = FNMS(KP475528258, Te, KP293892626 * T7);
266 T3j = T34 - T33;
267 T3k = T36 - T37;
268 T3l = FMA(KP293892626, T3j, KP475528258 * T3k);
269 T3G = FNMS(KP293892626, T3k, KP475528258 * T3j);
270 }
271 {
272 E TR, TS, T35, T38;
273 TR = T3 + T6;
274 TS = Ta + Td;
275 TT = KP559016994 * (TR - TS);
276 TX = TR + TS;
277 T35 = T33 + T34;
278 T38 = T36 + T37;
279 T39 = T35 + T38;
280 T3v = T38 - T35;
281 }
282 }
283 T1H = TN + TO;
284 T1J = T16 + T17;
285 T1I = TW + TX;
286 T1K = FMA(KP258006924, T1H, KP102097497 * T1I) - (KP360104421 * T1J);
287 T1T = KP371184290 * (T1H + T1J + T1I);
288 T2S = T2K + T2R;
289 T3a = T32 + T39;
290 T31 = T2T + T30;
291 T3b = T2S + T31 + T3a;
292 T3M = FMA(KP045346848, T31, KP296373721 * T3a) - (KP341720569 * T2S);
293 {
294 E Tv, T1q, TQ, T1r, TZ, T1s, T1c, T1f, T1g, T19, T1t, T1w, T1x, T1y;
295 {
296 E TP, TY, T13, T18;
297 Tv = KP3_464101615 * (Tf - Tu);
298 T1q = KP3_464101615 * (T1o - T1p);
299 TP = FNMS(KP250000000, TO, TN);
300 TQ = TK + TP;
301 T1r = TP - TK;
302 TY = FNMS(KP250000000, TX, TW);
303 TZ = TT + TY;
304 T1s = TY - TT;
305 T1c = Tu + Tf;
306 T1f = FMA(KP475528258, T1d, KP293892626 * T1e);
307 T1g = FNMS(KP4_000000000, T1f, KP2_000000000 * T1c);
308 T13 = KP559016994 * T12;
309 T18 = FNMS(KP250000000, T17, T16);
310 T19 = T13 + T18;
311 T1t = T18 - T13;
312 T1w = T1p + T1o;
313 T1x = FNMS(KP475528258, T1e, KP293892626 * T1d);
314 T1y = FNMS(KP4_000000000, T1x, KP2_000000000 * T1w);
315 }
316 T1M = T1f + T1c;
317 T1P = T1x + T1w;
318 T1N = TQ + T19 + TZ;
319 T1Q = T1r + T1t + T1s;
320 {
321 E T1h, T1z, T1a, T1u;
322 T1h = KP1_732050807 * (TZ - T19);
323 T1i = T1g + T1h;
324 T1l = T1h - T1g;
325 T1z = KP1_732050807 * (T1s - T1t);
326 T1A = T1y + T1z;
327 T1D = T1z - T1y;
328 T1a = FMS(KP2_000000000, TQ, TZ) - T19;
329 T1b = Tv + T1a;
330 T1k = T1a - Tv;
331 T1u = FMS(KP2_000000000, T1r, T1s) - T1t;
332 T1v = T1q + T1u;
333 T1C = T1u - T1q;
334 }
335 }
336 {
337 E T3O, T3Z, T3q, T3A, T3t, T3B, T3x, T3C, T3f, T3m, T3R, T3E, T3H, T42;
338 {
339 E T3o, T3r, T3w, T3u;
340 T3O = KP3_464101615 * (T3l - T3i);
341 T3Z = KP3_464101615 * (T3G - T3F);
342 T3o = FMS(KP250000000, T2R, T2K);
343 T3q = T3o - T3p;
344 T3A = T3o + T3p;
345 T3r = FMS(KP250000000, T30, T2T);
346 T3t = T3r - T3s;
347 T3B = T3r + T3s;
348 T3w = KP559016994 * T3v;
349 T3u = FMS(KP250000000, T39, T32);
350 T3x = T3u - T3w;
351 T3C = T3u + T3w;
352 T3f = FMA(KP293892626, T3d, KP475528258 * T3e);
353 T3m = T3i + T3l;
354 T3R = FNMS(KP2_000000000, T3m, KP4_000000000 * T3f);
355 T3E = FNMS(KP293892626, T3e, KP475528258 * T3d);
356 T3H = T3F + T3G;
357 T42 = FNMS(KP2_000000000, T3H, KP4_000000000 * T3E);
358 }
359 T3n = T3f + T3m;
360 T3I = T3E + T3H;
361 T3y = T3q + T3t + T3x;
362 T3D = T3A + T3B + T3C;
363 {
364 E T3Q, T41, T3N, T3Y;
365 T3Q = KP1_732050807 * (T3t - T3x);
366 T3S = T3Q - T3R;
367 T3V = T3R + T3Q;
368 T41 = KP1_732050807 * (T3B - T3C);
369 T43 = T41 - T42;
370 T46 = T42 + T41;
371 T3N = FMS(KP2_000000000, T3q, T3x) - T3t;
372 T3P = T3N - T3O;
373 T3U = T3O + T3N;
374 T3Y = FMS(KP2_000000000, T3A, T3C) - T3B;
375 T40 = T3Y - T3Z;
376 T45 = T3Z + T3Y;
377 }
378 }
379 }
380 O[0] = KP2_000000000 * (T2J + T3b);
381 {
382 E T2l, T2B, T1U, T2o, T2C, T2a, T2w, T2c, T1G, T2d, T23, T25, T2y, T2h, T22;
383 E T26, T2j, T2k, T24, T1L, T1V;
384 T2j = FNMS(KP700592410, T1M, KP122761339 * T1N);
385 T2k = FMA(KP404201883, T1P, KP311340628 * T1Q);
386 T2l = FMA(KP1_902113032, T2j, KP1_175570504 * T2k);
387 T2B = FNMS(KP1_902113032, T2k, KP1_175570504 * T2j);
388 {
389 E T1S, T2m, T1O, T1R, T2n;
390 T1O = FMA(KP245522678, T1M, KP350296205 * T1N);
391 T1R = FNMS(KP202100941, T1Q, KP622681257 * T1P);
392 T1S = T1O + T1R;
393 T2m = KP1_118033988 * (T1R - T1O);
394 T1U = FMS(KP2_000000000, T1S, T1T);
395 T2n = FMA(KP500000000, T1S, T1T);
396 T2o = T2m + T2n;
397 T2C = T2n - T2m;
398 }
399 {
400 E T1n, T29, T1F, T28;
401 {
402 E T1j, T1m, T1B, T1E;
403 T1j = FNMS(KP184517712, T1i, KP019941366 * T1b);
404 T1m = FNMS(KP183845747, T1l, KP025400502 * T1k);
405 T1n = T1j + T1m;
406 T29 = T1m - T1j;
407 T1B = FMA(KP184926209, T1v, KP015708004 * T1A);
408 T1E = FMA(KP183215435, T1C, KP029606561 * T1D);
409 T1F = T1B - T1E;
410 T28 = T1B + T1E;
411 }
412 T2a = FNMS(KP1_647278207, T29, KP1_018073920 * T28);
413 T2w = FMA(KP1_018073920, T29, KP1_647278207 * T28);
414 T2c = KP559016994 * (T1F - T1n);
415 T1G = T1n + T1F;
416 T2d = FMA(KP250000000, T1G, T1K);
417 }
418 {
419 E T21, T2g, T1Y, T2f;
420 T23 = FMA(KP462201919, T1H, KP155909426 * T1J) - (KP618111346 * T1I);
421 {
422 E T1Z, T20, T1W, T1X;
423 T1Z = FNMS(KP015708004, T1v, KP184926209 * T1A);
424 T20 = FNMS(KP183215435, T1D, KP029606561 * T1C);
425 T21 = T1Z + T20;
426 T2g = T1Z - T20;
427 T1W = FMA(KP184517712, T1b, KP019941366 * T1i);
428 T1X = FMA(KP183845747, T1k, KP025400502 * T1l);
429 T1Y = T1W + T1X;
430 T2f = T1W - T1X;
431 }
432 T25 = KP968245836 * (T1Y - T21);
433 T2y = FNMS(KP587785252, T2f, KP951056516 * T2g);
434 T2h = FMA(KP951056516, T2f, KP587785252 * T2g);
435 T22 = T1Y + T21;
436 T26 = FNMS(KP433012701, T22, T23);
437 }
438 T24 = FMA(KP1_732050807, T22, T23);
439 T1L = T1G - T1K;
440 T1V = T1U - T1L;
441 O[WS(os, 1)] = FMA(KP2_000000000, T1L, T1U);
442 O[WS(os, 25)] = T24 + T1V;
443 O[WS(os, 5)] = T1V - T24;
444 {
445 E T2D, T2G, T2x, T2H, T2A, T2F, T2v, T2z, T2E, T2I;
446 T2D = T2B - T2C;
447 T2G = T2B + T2C;
448 T2v = T25 - T26;
449 T2x = T2v - T2w;
450 T2H = T2w + T2v;
451 T2z = T2d - T2c;
452 T2A = T2y + T2z;
453 T2F = T2y - T2z;
454 O[WS(os, 23)] = FNMS(KP2_000000000, T2A, T2D);
455 O[WS(os, 27)] = FMS(KP2_000000000, T2F, T2G);
456 T2E = T2A + T2D;
457 O[WS(os, 17)] = T2x - T2E;
458 O[WS(os, 9)] = T2x + T2E;
459 T2I = T2F + T2G;
460 O[WS(os, 11)] = T2H - T2I;
461 O[WS(os, 7)] = T2H + T2I;
462 }
463 {
464 E T2p, T2r, T2b, T2u, T2i, T2s, T27, T2e, T2q, T2t;
465 T2p = T2l - T2o;
466 T2r = T2l + T2o;
467 T27 = T25 + T26;
468 T2b = T27 - T2a;
469 T2u = T2a + T27;
470 T2e = T2c + T2d;
471 T2i = T2e - T2h;
472 T2s = T2h + T2e;
473 O[WS(os, 15)] = FNMS(KP2_000000000, T2i, T2p);
474 O[WS(os, 29)] = FMA(KP2_000000000, T2s, T2r);
475 T2q = T2i + T2p;
476 O[WS(os, 13)] = T2b - T2q;
477 O[WS(os, 3)] = T2b + T2q;
478 T2t = T2r - T2s;
479 O[WS(os, 19)] = T2t - T2u;
480 O[WS(os, 21)] = T2u + T2t;
481 }
482 }
483 {
484 E T4x, T4P, T3L, T4u, T4O, T4p, T4T, T4E, T49, T4o, T4c, T4n, T4M, T4A, T4j;
485 E T4z, T4v, T4w, T4k, T4a, T4b;
486 T4v = FNMS(KP127938670, T3y, KP696877247 * T3n);
487 T4w = FMA(KP606988889, T3I, KP213702830 * T3D);
488 T4x = FMA(KP1_175570504, T4v, KP1_902113032 * T4w);
489 T4P = FNMS(KP1_902113032, T4v, KP1_175570504 * T4w);
490 {
491 E T3c, T3K, T4t, T3z, T3J, T4s;
492 T3c = FNMS(KP2_000000000, T2J, KP066666666 * T3b);
493 T3z = FMA(KP255877341, T3n, KP348438623 * T3y);
494 T3J = FNMS(KP427405661, T3I, KP303494444 * T3D);
495 T3K = T3z + T3J;
496 T4t = KP1_118033988 * (T3J - T3z);
497 T3L = FMA(KP2_000000000, T3K, T3c);
498 T4s = FNMS(KP500000000, T3K, T3c);
499 T4u = T4s + T4t;
500 T4O = T4t - T4s;
501 }
502 {
503 E T3X, T4C, T48, T4D;
504 {
505 E T3T, T3W, T44, T47;
506 T3T = FMA(KP185591687, T3P, KP000412259 * T3S);
507 T3W = FMA(KP112172063, T3U, KP147857608 * T3V);
508 T3X = T3T - T3W;
509 T4C = T3T + T3W;
510 T44 = FMA(KP028866483, T40, KP183333495 * T43);
511 T47 = FMA(KP092681288, T45, KP160793728 * T46);
512 T48 = T44 - T47;
513 T4D = T44 + T47;
514 }
515 T4p = KP559016994 * (T48 - T3X);
516 T4T = FNMS(KP1_647278207, T4C, KP1_018073920 * T4D);
517 T4E = FMA(KP1_018073920, T4C, KP1_647278207 * T4D);
518 T49 = T3X + T48;
519 T4o = FNMS(KP250000000, T49, T3M);
520 }
521 {
522 E T4i, T4m, T4f, T4l;
523 T4c = FMA(KP251026872, T2S, KP387067417 * T3a) - (KP638094290 * T31);
524 {
525 E T4g, T4h, T4d, T4e;
526 T4g = FNMS(KP183333495, T40, KP028866483 * T43);
527 T4h = FNMS(KP092681288, T46, KP160793728 * T45);
528 T4i = T4g + T4h;
529 T4m = T4h - T4g;
530 T4d = FNMS(KP000412259, T3P, KP185591687 * T3S);
531 T4e = FNMS(KP112172063, T3V, KP147857608 * T3U);
532 T4f = T4d + T4e;
533 T4l = T4e - T4d;
534 }
535 T4n = FMA(KP587785252, T4l, KP951056516 * T4m);
536 T4M = FNMS(KP951056516, T4l, KP587785252 * T4m);
537 T4A = KP968245836 * (T4i - T4f);
538 T4j = T4f + T4i;
539 T4z = FNMS(KP433012701, T4j, T4c);
540 }
541 T4k = FMA(KP1_732050807, T4j, T4c);
542 T4a = T3M + T49;
543 T4b = T3L - T4a;
544 O[WS(os, 6)] = T4b - T4k;
545 O[WS(os, 30)] = FMA(KP2_000000000, T4a, T3L);
546 O[WS(os, 26)] = T4k + T4b;
547 {
548 E T4Q, T4W, T4N, T4V, T4U, T4Y, T4L, T4S, T4R, T4X;
549 T4Q = T4O - T4P;
550 T4W = T4O + T4P;
551 T4L = T4p - T4o;
552 T4N = T4L - T4M;
553 T4V = T4M + T4L;
554 T4S = T4z - T4A;
555 T4U = T4S + T4T;
556 T4Y = T4S - T4T;
557 O[WS(os, 4)] = FMA(KP2_000000000, T4N, T4Q);
558 O[WS(os, 8)] = FMA(KP2_000000000, T4V, T4W);
559 T4R = T4Q - T4N;
560 O[WS(os, 20)] = T4R - T4U;
561 O[WS(os, 24)] = T4U + T4R;
562 T4X = T4V - T4W;
563 O[WS(os, 14)] = T4X - T4Y;
564 O[WS(os, 22)] = T4Y + T4X;
565 }
566 {
567 E T4y, T4J, T4r, T4I, T4F, T4H, T4q, T4B, T4G, T4K;
568 T4y = T4u + T4x;
569 T4J = T4x - T4u;
570 T4q = T4o + T4p;
571 T4r = T4n + T4q;
572 T4I = T4n - T4q;
573 T4B = T4z + T4A;
574 T4F = T4B + T4E;
575 T4H = T4B - T4E;
576 O[WS(os, 2)] = FMA(KP2_000000000, T4r, T4y);
577 O[WS(os, 16)] = FMA(KP2_000000000, T4I, T4J);
578 T4G = T4y - T4r;
579 O[WS(os, 12)] = T4F - T4G;
580 O[WS(os, 10)] = T4F + T4G;
581 T4K = T4I - T4J;
582 O[WS(os, 28)] = T4H - T4K;
583 O[WS(os, 18)] = T4H + T4K;
584 }
585 }
586}