22 #ifndef AVCODEC_X86_DSPUTIL_MMX_H
23 #define AVCODEC_X86_DSPUTIL_MMX_H
69 #define LOAD4(stride,in,a,b,c,d)\
70 "movq 0*"#stride"+"#in", "#a"\n\t"\
71 "movq 1*"#stride"+"#in", "#b"\n\t"\
72 "movq 2*"#stride"+"#in", "#c"\n\t"\
73 "movq 3*"#stride"+"#in", "#d"\n\t"
75 #define STORE4(stride,out,a,b,c,d)\
76 "movq "#a", 0*"#stride"+"#out"\n\t"\
77 "movq "#b", 1*"#stride"+"#out"\n\t"\
78 "movq "#c", 2*"#stride"+"#out"\n\t"\
79 "movq "#d", 3*"#stride"+"#out"\n\t"
82 #define SUMSUB_BA( a, b ) \
83 "paddw "#b", "#a" \n\t"\
84 "paddw "#b", "#b" \n\t"\
85 "psubw "#a", "#b" \n\t"
87 #define SBUTTERFLY(a,b,t,n,m)\
88 "mov" #m " " #a ", " #t " \n\t" \
89 "punpckl" #n " " #b ", " #a " \n\t" \
90 "punpckh" #n " " #b ", " #t " \n\t" \
92 #define TRANSPOSE4(a,b,c,d,t)\
93 SBUTTERFLY(a,b,t,wd,q) \
94 SBUTTERFLY(c,d,b,wd,q) \
95 SBUTTERFLY(a,c,d,dq,q) \
96 SBUTTERFLY(t,b,c,dq,q)
100 "movd (%1), %%mm0 \n\t"
102 "movd (%1), %%mm1 \n\t"
103 "movd (%1,%3,1), %%mm2 \n\t"
104 "movd (%1,%3,2), %%mm3 \n\t"
105 "punpcklbw %%mm1, %%mm0 \n\t"
106 "punpcklbw %%mm3, %%mm2 \n\t"
107 "movq %%mm0, %%mm1 \n\t"
108 "punpcklwd %%mm2, %%mm0 \n\t"
109 "punpckhwd %%mm2, %%mm1 \n\t"
110 "movd %%mm0, (%0) \n\t"
112 "punpckhdq %%mm0, %%mm0 \n\t"
113 "movd %%mm0, (%0) \n\t"
114 "movd %%mm1, (%0,%2,1) \n\t"
115 "punpckhdq %%mm1, %%mm1 \n\t"
116 "movd %%mm1, (%0,%2,2) \n\t"
128 #define TRANSPOSE8x4(a,b,c,d,e,f,g,h,t)\
129 "punpcklbw " #e ", " #a " \n\t" \
130 "punpcklbw " #f ", " #b " \n\t" \
131 "punpcklbw " #g ", " #c " \n\t" \
132 "punpcklbw " #h ", " #d " \n\t" \
133 SBUTTERFLY(a, b, t, bw, q) \
135 SBUTTERFLY(c, d, b, bw, q) \
137 SBUTTERFLY(a, c, d, wd, q) \
139 SBUTTERFLY(t, b, c, wd, q) \
144 #define TRANSPOSE8(a,b,c,d,e,f,g,h,t)\
145 SBUTTERFLY(a,b,%%xmm8,wd,dqa)\
146 SBUTTERFLY(c,d,b,wd,dqa)\
147 SBUTTERFLY(e,f,d,wd,dqa)\
148 SBUTTERFLY(g,h,f,wd,dqa)\
149 SBUTTERFLY(a,c,h,dq,dqa)\
150 SBUTTERFLY(%%xmm8,b,c,dq,dqa)\
151 SBUTTERFLY(e,g,b,dq,dqa)\
152 SBUTTERFLY(d,f,g,dq,dqa)\
153 SBUTTERFLY(a,e,f,qdq,dqa)\
154 SBUTTERFLY(%%xmm8,d,e,qdq,dqa)\
155 SBUTTERFLY(h,b,d,qdq,dqa)\
156 SBUTTERFLY(c,g,b,qdq,dqa)\
157 "movdqa %%xmm8, "#g" \n\t"
159 #define TRANSPOSE8(a,b,c,d,e,f,g,h,t)\
160 "movdqa "#h", "#t" \n\t"\
161 SBUTTERFLY(a,b,h,wd,dqa)\
162 "movdqa "#h", 16"#t" \n\t"\
163 "movdqa "#t", "#h" \n\t"\
164 SBUTTERFLY(c,d,b,wd,dqa)\
165 SBUTTERFLY(e,f,d,wd,dqa)\
166 SBUTTERFLY(g,h,f,wd,dqa)\
167 SBUTTERFLY(a,c,h,dq,dqa)\
168 "movdqa "#h", "#t" \n\t"\
169 "movdqa 16"#t", "#h" \n\t"\
170 SBUTTERFLY(h,b,c,dq,dqa)\
171 SBUTTERFLY(e,g,b,dq,dqa)\
172 SBUTTERFLY(d,f,g,dq,dqa)\
173 SBUTTERFLY(a,e,f,qdq,dqa)\
174 SBUTTERFLY(h,d,e,qdq,dqa)\
175 "movdqa "#h", 16"#t" \n\t"\
176 "movdqa "#t", "#h" \n\t"\
177 SBUTTERFLY(h,b,d,qdq,dqa)\
178 SBUTTERFLY(c,g,b,qdq,dqa)\
179 "movdqa 16"#t", "#g" \n\t"
182 #define MOVQ_WONE(regd) \
184 "pcmpeqd %%" #regd ", %%" #regd " \n\t" \
185 "psrlw $15, %%" #regd ::)
207 const uint8_t *lum_m4,
const uint8_t *lum_m3,
208 const uint8_t *lum_m2,
const uint8_t *lum_m1,
213 const uint8_t *lum_m3,
214 const uint8_t *lum_m2,
215 const uint8_t *lum_m1,
216 const uint8_t *lum,
int size);