yadif_template.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2006 Michael Niedermayer <michaelni@gmx.at>
3  *
4  * This file is part of Libav.
5  *
6  * Libav is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * Libav is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License along
17  * with Libav; if not, write to the Free Software Foundation, Inc.,
18  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19  */
20 
21 #ifdef COMPILE_TEMPLATE_SSE
22 #define MM "%%xmm"
23 #define MOV "movq"
24 #define MOVQ "movdqa"
25 #define MOVQU "movdqu"
26 #define STEP 8
27 #define LOAD(mem,dst) \
28  MOV" "mem", "dst" \n\t"\
29  "punpcklbw "MM"7, "dst" \n\t"
30 #define PSRL1(reg) "psrldq $1, "reg" \n\t"
31 #define PSRL2(reg) "psrldq $2, "reg" \n\t"
32 #define PSHUF(src,dst) "movdqa "dst", "src" \n\t"\
33  "psrldq $2, "src" \n\t"
34 #else
35 #define MM "%%mm"
36 #define MOV "movd"
37 #define MOVQ "movq"
38 #define MOVQU "movq"
39 #define STEP 4
40 #define LOAD(mem,dst) \
41  MOV" "mem", "dst" \n\t"\
42  "punpcklbw "MM"7, "dst" \n\t"
43 #define PSRL1(reg) "psrlq $8, "reg" \n\t"
44 #define PSRL2(reg) "psrlq $16, "reg" \n\t"
45 #define PSHUF(src,dst) "pshufw $9, "dst", "src" \n\t"
46 #endif
47 
48 #ifdef COMPILE_TEMPLATE_SSSE3
49 #define PABS(tmp,dst) \
50  "pabsw "dst", "dst" \n\t"
51 #else
52 #define PABS(tmp,dst) \
53  "pxor "tmp", "tmp" \n\t"\
54  "psubw "dst", "tmp" \n\t"\
55  "pmaxsw "tmp", "dst" \n\t"
56 #endif
57 
58 #define CHECK(pj,mj) \
59  MOVQU" "#pj"(%[cur],%[mrefs]), "MM"2 \n\t" /* cur[x-refs-1+j] */\
60  MOVQU" "#mj"(%[cur],%[prefs]), "MM"3 \n\t" /* cur[x+refs-1-j] */\
61  MOVQ" "MM"2, "MM"4 \n\t"\
62  MOVQ" "MM"2, "MM"5 \n\t"\
63  "pxor "MM"3, "MM"4 \n\t"\
64  "pavgb "MM"3, "MM"5 \n\t"\
65  "pand "MANGLE(pb_1)", "MM"4 \n\t"\
66  "psubusb "MM"4, "MM"5 \n\t"\
67  PSRL1(MM"5") \
68  "punpcklbw "MM"7, "MM"5 \n\t" /* (cur[x-refs+j] + cur[x+refs-j])>>1 */\
69  MOVQ" "MM"2, "MM"4 \n\t"\
70  "psubusb "MM"3, "MM"2 \n\t"\
71  "psubusb "MM"4, "MM"3 \n\t"\
72  "pmaxub "MM"3, "MM"2 \n\t"\
73  MOVQ" "MM"2, "MM"3 \n\t"\
74  MOVQ" "MM"2, "MM"4 \n\t" /* ABS(cur[x-refs-1+j] - cur[x+refs-1-j]) */\
75  PSRL1(MM"3") /* ABS(cur[x-refs +j] - cur[x+refs -j]) */\
76  PSRL2(MM"4") /* ABS(cur[x-refs+1+j] - cur[x+refs+1-j]) */\
77  "punpcklbw "MM"7, "MM"2 \n\t"\
78  "punpcklbw "MM"7, "MM"3 \n\t"\
79  "punpcklbw "MM"7, "MM"4 \n\t"\
80  "paddw "MM"3, "MM"2 \n\t"\
81  "paddw "MM"4, "MM"2 \n\t" /* score */
82 
83 #define CHECK1 \
84  MOVQ" "MM"0, "MM"3 \n\t"\
85  "pcmpgtw "MM"2, "MM"3 \n\t" /* if(score < spatial_score) */\
86  "pminsw "MM"2, "MM"0 \n\t" /* spatial_score= score; */\
87  MOVQ" "MM"3, "MM"6 \n\t"\
88  "pand "MM"3, "MM"5 \n\t"\
89  "pandn "MM"1, "MM"3 \n\t"\
90  "por "MM"5, "MM"3 \n\t"\
91  MOVQ" "MM"3, "MM"1 \n\t" /* spatial_pred= (cur[x-refs+j] + cur[x+refs-j])>>1; */
92 
93 #define CHECK2 /* pretend not to have checked dir=2 if dir=1 was bad.\
94  hurts both quality and speed, but matches the C version. */\
95  "paddw "MANGLE(pw_1)", "MM"6 \n\t"\
96  "psllw $14, "MM"6 \n\t"\
97  "paddsw "MM"6, "MM"2 \n\t"\
98  MOVQ" "MM"0, "MM"3 \n\t"\
99  "pcmpgtw "MM"2, "MM"3 \n\t"\
100  "pminsw "MM"2, "MM"0 \n\t"\
101  "pand "MM"3, "MM"5 \n\t"\
102  "pandn "MM"1, "MM"3 \n\t"\
103  "por "MM"5, "MM"3 \n\t"\
104  MOVQ" "MM"3, "MM"1 \n\t"
106 void RENAME(ff_yadif_filter_line)(uint8_t *dst,
107  uint8_t *prev, uint8_t *cur, uint8_t *next,
108  int w, int prefs, int mrefs, int parity, int mode)
109 {
110  DECLARE_ALIGNED(16, uint8_t, tmp0[16]);
111  DECLARE_ALIGNED(16, uint8_t, tmp1[16]);
112  DECLARE_ALIGNED(16, uint8_t, tmp2[16]);
113  DECLARE_ALIGNED(16, uint8_t, tmp3[16]);
114  int x;
115 
116 #define FILTER\
117  for(x=0; x<w; x+=STEP){\
118  __asm__ volatile(\
119  "pxor "MM"7, "MM"7 \n\t"\
120  LOAD("(%[cur],%[mrefs])", MM"0") /* c = cur[x-refs] */\
121  LOAD("(%[cur],%[prefs])", MM"1") /* e = cur[x+refs] */\
122  LOAD("(%["prev2"])", MM"2") /* prev2[x] */\
123  LOAD("(%["next2"])", MM"3") /* next2[x] */\
124  MOVQ" "MM"3, "MM"4 \n\t"\
125  "paddw "MM"2, "MM"3 \n\t"\
126  "psraw $1, "MM"3 \n\t" /* d = (prev2[x] + next2[x])>>1 */\
127  MOVQ" "MM"0, %[tmp0] \n\t" /* c */\
128  MOVQ" "MM"3, %[tmp1] \n\t" /* d */\
129  MOVQ" "MM"1, %[tmp2] \n\t" /* e */\
130  "psubw "MM"4, "MM"2 \n\t"\
131  PABS( MM"4", MM"2") /* temporal_diff0 */\
132  LOAD("(%[prev],%[mrefs])", MM"3") /* prev[x-refs] */\
133  LOAD("(%[prev],%[prefs])", MM"4") /* prev[x+refs] */\
134  "psubw "MM"0, "MM"3 \n\t"\
135  "psubw "MM"1, "MM"4 \n\t"\
136  PABS( MM"5", MM"3")\
137  PABS( MM"5", MM"4")\
138  "paddw "MM"4, "MM"3 \n\t" /* temporal_diff1 */\
139  "psrlw $1, "MM"2 \n\t"\
140  "psrlw $1, "MM"3 \n\t"\
141  "pmaxsw "MM"3, "MM"2 \n\t"\
142  LOAD("(%[next],%[mrefs])", MM"3") /* next[x-refs] */\
143  LOAD("(%[next],%[prefs])", MM"4") /* next[x+refs] */\
144  "psubw "MM"0, "MM"3 \n\t"\
145  "psubw "MM"1, "MM"4 \n\t"\
146  PABS( MM"5", MM"3")\
147  PABS( MM"5", MM"4")\
148  "paddw "MM"4, "MM"3 \n\t" /* temporal_diff2 */\
149  "psrlw $1, "MM"3 \n\t"\
150  "pmaxsw "MM"3, "MM"2 \n\t"\
151  MOVQ" "MM"2, %[tmp3] \n\t" /* diff */\
152 \
153  "paddw "MM"0, "MM"1 \n\t"\
154  "paddw "MM"0, "MM"0 \n\t"\
155  "psubw "MM"1, "MM"0 \n\t"\
156  "psrlw $1, "MM"1 \n\t" /* spatial_pred */\
157  PABS( MM"2", MM"0") /* ABS(c-e) */\
158 \
159  MOVQU" -1(%[cur],%[mrefs]), "MM"2 \n\t" /* cur[x-refs-1] */\
160  MOVQU" -1(%[cur],%[prefs]), "MM"3 \n\t" /* cur[x+refs-1] */\
161  MOVQ" "MM"2, "MM"4 \n\t"\
162  "psubusb "MM"3, "MM"2 \n\t"\
163  "psubusb "MM"4, "MM"3 \n\t"\
164  "pmaxub "MM"3, "MM"2 \n\t"\
165  PSHUF(MM"3", MM"2") \
166  "punpcklbw "MM"7, "MM"2 \n\t" /* ABS(cur[x-refs-1] - cur[x+refs-1]) */\
167  "punpcklbw "MM"7, "MM"3 \n\t" /* ABS(cur[x-refs+1] - cur[x+refs+1]) */\
168  "paddw "MM"2, "MM"0 \n\t"\
169  "paddw "MM"3, "MM"0 \n\t"\
170  "psubw "MANGLE(pw_1)", "MM"0 \n\t" /* spatial_score */\
171 \
172  CHECK(-2,0)\
173  CHECK1\
174  CHECK(-3,1)\
175  CHECK2\
176  CHECK(0,-2)\
177  CHECK1\
178  CHECK(1,-3)\
179  CHECK2\
180 \
181  /* if(p->mode<2) ... */\
182  MOVQ" %[tmp3], "MM"6 \n\t" /* diff */\
183  "cmpl $2, %[mode] \n\t"\
184  "jge 1f \n\t"\
185  LOAD("(%["prev2"],%[mrefs],2)", MM"2") /* prev2[x-2*refs] */\
186  LOAD("(%["next2"],%[mrefs],2)", MM"4") /* next2[x-2*refs] */\
187  LOAD("(%["prev2"],%[prefs],2)", MM"3") /* prev2[x+2*refs] */\
188  LOAD("(%["next2"],%[prefs],2)", MM"5") /* next2[x+2*refs] */\
189  "paddw "MM"4, "MM"2 \n\t"\
190  "paddw "MM"5, "MM"3 \n\t"\
191  "psrlw $1, "MM"2 \n\t" /* b */\
192  "psrlw $1, "MM"3 \n\t" /* f */\
193  MOVQ" %[tmp0], "MM"4 \n\t" /* c */\
194  MOVQ" %[tmp1], "MM"5 \n\t" /* d */\
195  MOVQ" %[tmp2], "MM"7 \n\t" /* e */\
196  "psubw "MM"4, "MM"2 \n\t" /* b-c */\
197  "psubw "MM"7, "MM"3 \n\t" /* f-e */\
198  MOVQ" "MM"5, "MM"0 \n\t"\
199  "psubw "MM"4, "MM"5 \n\t" /* d-c */\
200  "psubw "MM"7, "MM"0 \n\t" /* d-e */\
201  MOVQ" "MM"2, "MM"4 \n\t"\
202  "pminsw "MM"3, "MM"2 \n\t"\
203  "pmaxsw "MM"4, "MM"3 \n\t"\
204  "pmaxsw "MM"5, "MM"2 \n\t"\
205  "pminsw "MM"5, "MM"3 \n\t"\
206  "pmaxsw "MM"0, "MM"2 \n\t" /* max */\
207  "pminsw "MM"0, "MM"3 \n\t" /* min */\
208  "pxor "MM"4, "MM"4 \n\t"\
209  "pmaxsw "MM"3, "MM"6 \n\t"\
210  "psubw "MM"2, "MM"4 \n\t" /* -max */\
211  "pmaxsw "MM"4, "MM"6 \n\t" /* diff= MAX3(diff, min, -max); */\
212  "1: \n\t"\
213 \
214  MOVQ" %[tmp1], "MM"2 \n\t" /* d */\
215  MOVQ" "MM"2, "MM"3 \n\t"\
216  "psubw "MM"6, "MM"2 \n\t" /* d-diff */\
217  "paddw "MM"6, "MM"3 \n\t" /* d+diff */\
218  "pmaxsw "MM"2, "MM"1 \n\t"\
219  "pminsw "MM"3, "MM"1 \n\t" /* d = clip(spatial_pred, d-diff, d+diff); */\
220  "packuswb "MM"1, "MM"1 \n\t"\
221 \
222  :[tmp0]"=m"(tmp0),\
223  [tmp1]"=m"(tmp1),\
224  [tmp2]"=m"(tmp2),\
225  [tmp3]"=m"(tmp3)\
226  :[prev] "r"(prev),\
227  [cur] "r"(cur),\
228  [next] "r"(next),\
229  [prefs]"r"((x86_reg)prefs),\
230  [mrefs]"r"((x86_reg)mrefs),\
231  [mode] "g"(mode)\
232  );\
233  __asm__ volatile(MOV" "MM"1, %0" :"=m"(*dst));\
234  dst += STEP;\
235  prev+= STEP;\
236  cur += STEP;\
237  next+= STEP;\
238  }
239 
240  if (parity) {
241 #define prev2 "prev"
242 #define next2 "cur"
243  FILTER
244 #undef prev2
245 #undef next2
246  } else {
247 #define prev2 "cur"
248 #define next2 "next"
249  FILTER
250 #undef prev2
251 #undef next2
252  }
253 }
254 #undef STEP
255 #undef MM
256 #undef MOV
257 #undef MOVQ
258 #undef MOVQU
259 #undef PSHUF
260 #undef PSRL1
261 #undef PSRL2
262 #undef LOAD
263 #undef PABS
264 #undef CHECK
265 #undef CHECK1
266 #undef CHECK2
267 #undef FILTER
268