Main Page
Related Pages
Modules
Data Structures
Files
Examples
File List
Globals
libavfilter
x86
yadif_template.c
Go to the documentation of this file.
1
/*
2
* Copyright (C) 2006 Michael Niedermayer <michaelni@gmx.at>
3
*
4
* This file is part of Libav.
5
*
6
* Libav is free software; you can redistribute it and/or modify
7
* it under the terms of the GNU General Public License as published by
8
* the Free Software Foundation; either version 2 of the License, or
9
* (at your option) any later version.
10
*
11
* Libav is distributed in the hope that it will be useful,
12
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
* GNU General Public License for more details.
15
*
16
* You should have received a copy of the GNU General Public License along
17
* with Libav; if not, write to the Free Software Foundation, Inc.,
18
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19
*/
20
21
#ifdef COMPILE_TEMPLATE_SSE
22
#define MM "%%xmm"
23
#define MOV "movq"
24
#define MOVQ "movdqa"
25
#define MOVQU "movdqu"
26
#define STEP 8
27
#define LOAD(mem,dst) \
28
MOV" "mem", "dst" \n\t"\
29
"punpcklbw "MM"7, "dst" \n\t"
30
#define PSRL1(reg) "psrldq $1, "reg" \n\t"
31
#define PSRL2(reg) "psrldq $2, "reg" \n\t"
32
#define PSHUF(src,dst) "movdqa "dst", "src" \n\t"\
33
"psrldq $2, "src" \n\t"
34
#else
35
#define MM "%%mm"
36
#define MOV "movd"
37
#define MOVQ "movq"
38
#define MOVQU "movq"
39
#define STEP 4
40
#define LOAD(mem,dst) \
41
MOV" "mem", "dst" \n\t"\
42
"punpcklbw "MM"7, "dst" \n\t"
43
#define PSRL1(reg) "psrlq $8, "reg" \n\t"
44
#define PSRL2(reg) "psrlq $16, "reg" \n\t"
45
#define PSHUF(src,dst) "pshufw $9, "dst", "src" \n\t"
46
#endif
47
48
#ifdef COMPILE_TEMPLATE_SSSE3
49
#define PABS(tmp,dst) \
50
"pabsw "dst", "dst" \n\t"
51
#else
52
#define PABS(tmp,dst) \
53
"pxor "tmp", "tmp" \n\t"\
54
"psubw "dst", "tmp" \n\t"\
55
"pmaxsw "tmp", "dst" \n\t"
56
#endif
57
58
#define CHECK(pj,mj) \
59
MOVQU" "#pj"(%[cur],%[mrefs]), "MM"2 \n\t"
/* cur[x-refs-1+j] */
\
60
MOVQU" "#mj"(%[cur],%[prefs]), "MM"3 \n\t"
/* cur[x+refs-1-j] */
\
61
MOVQ" "MM"2, "MM"4 \n\t"\
62
MOVQ" "MM"2, "MM"5 \n\t"\
63
"pxor "MM"3, "MM"4 \n\t"\
64
"pavgb "MM"3, "MM"5 \n\t"\
65
"pand "MANGLE(pb_1)", "MM"4 \n\t"\
66
"psubusb "MM"4, "MM"5 \n\t"\
67
PSRL1(MM"5") \
68
"punpcklbw "MM"7, "MM"5 \n\t"
/* (cur[x-refs+j] + cur[x+refs-j])>>1 */
\
69
MOVQ" "MM"2, "MM"4 \n\t"\
70
"psubusb "MM"3, "MM"2 \n\t"\
71
"psubusb "MM"4, "MM"3 \n\t"\
72
"pmaxub "MM"3, "MM"2 \n\t"\
73
MOVQ" "MM"2, "MM"3 \n\t"\
74
MOVQ" "MM"2, "MM"4 \n\t"
/* ABS(cur[x-refs-1+j] - cur[x+refs-1-j]) */
\
75
PSRL1(MM"3")
/* ABS(cur[x-refs +j] - cur[x+refs -j]) */
\
76
PSRL2(MM"4")
/* ABS(cur[x-refs+1+j] - cur[x+refs+1-j]) */
\
77
"punpcklbw "MM"7, "MM"2 \n\t"\
78
"punpcklbw "MM"7, "MM"3 \n\t"\
79
"punpcklbw "MM"7, "MM"4 \n\t"\
80
"paddw "MM"3, "MM"2 \n\t"\
81
"paddw "MM"4, "MM"2 \n\t"
/* score */
82
83
#define CHECK1 \
84
MOVQ" "MM"0, "MM"3 \n\t"\
85
"pcmpgtw "MM"2, "MM"3 \n\t"
/* if(score < spatial_score) */
\
86
"pminsw "MM"2, "MM"0 \n\t"
/* spatial_score= score; */
\
87
MOVQ" "MM"3, "MM"6 \n\t"\
88
"pand "MM"3, "MM"5 \n\t"\
89
"pandn "MM"1, "MM"3 \n\t"\
90
"por "MM"5, "MM"3 \n\t"\
91
MOVQ" "MM"3, "MM"1 \n\t"
/* spatial_pred= (cur[x-refs+j] + cur[x+refs-j])>>1; */
92
93
#define CHECK2
/* pretend not to have checked dir=2 if dir=1 was bad.\
94
hurts both quality and speed, but matches the C version. */
\
95
"paddw "MANGLE(pw_1)", "MM"6 \n\t"\
96
"psllw $14, "MM"6 \n\t"\
97
"paddsw "MM"6, "MM"2 \n\t"\
98
MOVQ" "MM"0, "MM"3 \n\t"\
99
"pcmpgtw "MM"2, "MM"3 \n\t"\
100
"pminsw "MM"2, "MM"0 \n\t"\
101
"pand "MM"3, "MM"5 \n\t"\
102
"pandn "MM"1, "MM"3 \n\t"\
103
"por "MM"5, "MM"3 \n\t"\
104
MOVQ" "MM"3, "MM"1 \n\t"
105
106
void
RENAME
(ff_yadif_filter_line)(uint8_t *dst,
107
uint8_t *prev, uint8_t *cur, uint8_t *next,
108
int
w,
int
prefs,
int
mrefs,
int
parity,
int
mode)
109
{
110
DECLARE_ALIGNED
(16, uint8_t, tmp0[16]);
111
DECLARE_ALIGNED
(16, uint8_t, tmp1[16]);
112
DECLARE_ALIGNED
(16, uint8_t, tmp2[16]);
113
DECLARE_ALIGNED
(16, uint8_t, tmp3[16]);
114
int
x;
115
116
#define FILTER\
117
for(x=0; x<w; x+=STEP){\
118
__asm__ volatile(\
119
"pxor "MM"7, "MM"7 \n\t"\
120
LOAD("(%[cur],%[mrefs])", MM"0")
/* c = cur[x-refs] */
\
121
LOAD("(%[cur],%[prefs])", MM"1")
/* e = cur[x+refs] */
\
122
LOAD("(%["prev2"])", MM"2")
/* prev2[x] */
\
123
LOAD("(%["next2"])", MM"3")
/* next2[x] */
\
124
MOVQ" "MM"3, "MM"4 \n\t"\
125
"paddw "MM"2, "MM"3 \n\t"\
126
"psraw $1, "MM"3 \n\t"
/* d = (prev2[x] + next2[x])>>1 */
\
127
MOVQ" "MM"0, %[tmp0] \n\t"
/* c */
\
128
MOVQ" "MM"3, %[tmp1] \n\t"
/* d */
\
129
MOVQ" "MM"1, %[tmp2] \n\t"
/* e */
\
130
"psubw "MM"4, "MM"2 \n\t"\
131
PABS( MM"4", MM"2")
/* temporal_diff0 */
\
132
LOAD("(%[prev],%[mrefs])", MM"3")
/* prev[x-refs] */
\
133
LOAD("(%[prev],%[prefs])", MM"4")
/* prev[x+refs] */
\
134
"psubw "MM"0, "MM"3 \n\t"\
135
"psubw "MM"1, "MM"4 \n\t"\
136
PABS( MM"5", MM"3")\
137
PABS( MM"5", MM"4")\
138
"paddw "MM"4, "MM"3 \n\t"
/* temporal_diff1 */
\
139
"psrlw $1, "MM"2 \n\t"\
140
"psrlw $1, "MM"3 \n\t"\
141
"pmaxsw "MM"3, "MM"2 \n\t"\
142
LOAD("(%[next],%[mrefs])", MM"3")
/* next[x-refs] */
\
143
LOAD("(%[next],%[prefs])", MM"4")
/* next[x+refs] */
\
144
"psubw "MM"0, "MM"3 \n\t"\
145
"psubw "MM"1, "MM"4 \n\t"\
146
PABS( MM"5", MM"3")\
147
PABS( MM"5", MM"4")\
148
"paddw "MM"4, "MM"3 \n\t"
/* temporal_diff2 */
\
149
"psrlw $1, "MM"3 \n\t"\
150
"pmaxsw "MM"3, "MM"2 \n\t"\
151
MOVQ" "MM"2, %[tmp3] \n\t"
/* diff */
\
152
\
153
"paddw "MM"0, "MM"1 \n\t"\
154
"paddw "MM"0, "MM"0 \n\t"\
155
"psubw "MM"1, "MM"0 \n\t"\
156
"psrlw $1, "MM"1 \n\t"
/* spatial_pred */
\
157
PABS( MM"2", MM"0")
/* ABS(c-e) */
\
158
\
159
MOVQU" -1(%[cur],%[mrefs]), "MM"2 \n\t"
/* cur[x-refs-1] */
\
160
MOVQU" -1(%[cur],%[prefs]), "MM"3 \n\t"
/* cur[x+refs-1] */
\
161
MOVQ" "MM"2, "MM"4 \n\t"\
162
"psubusb "MM"3, "MM"2 \n\t"\
163
"psubusb "MM"4, "MM"3 \n\t"\
164
"pmaxub "MM"3, "MM"2 \n\t"\
165
PSHUF(MM"3", MM"2") \
166
"punpcklbw "MM"7, "MM"2 \n\t"
/* ABS(cur[x-refs-1] - cur[x+refs-1]) */
\
167
"punpcklbw "MM"7, "MM"3 \n\t"
/* ABS(cur[x-refs+1] - cur[x+refs+1]) */
\
168
"paddw "MM"2, "MM"0 \n\t"\
169
"paddw "MM"3, "MM"0 \n\t"\
170
"psubw "MANGLE(pw_1)", "MM"0 \n\t"
/* spatial_score */
\
171
\
172
CHECK(-2,0)\
173
CHECK1\
174
CHECK(-3,1)\
175
CHECK2\
176
CHECK(0,-2)\
177
CHECK1\
178
CHECK(1,-3)\
179
CHECK2\
180
\
181
/* if(p->mode<2) ... */
\
182
MOVQ" %[tmp3], "MM"6 \n\t"
/* diff */
\
183
"cmpl $2, %[mode] \n\t"\
184
"jge 1f \n\t"\
185
LOAD("(%["prev2"],%[mrefs],2)", MM"2")
/* prev2[x-2*refs] */
\
186
LOAD("(%["next2"],%[mrefs],2)", MM"4")
/* next2[x-2*refs] */
\
187
LOAD("(%["prev2"],%[prefs],2)", MM"3")
/* prev2[x+2*refs] */
\
188
LOAD("(%["next2"],%[prefs],2)", MM"5")
/* next2[x+2*refs] */
\
189
"paddw "MM"4, "MM"2 \n\t"\
190
"paddw "MM"5, "MM"3 \n\t"\
191
"psrlw $1, "MM"2 \n\t"
/* b */
\
192
"psrlw $1, "MM"3 \n\t"
/* f */
\
193
MOVQ" %[tmp0], "MM"4 \n\t"
/* c */
\
194
MOVQ" %[tmp1], "MM"5 \n\t"
/* d */
\
195
MOVQ" %[tmp2], "MM"7 \n\t"
/* e */
\
196
"psubw "MM"4, "MM"2 \n\t"
/* b-c */
\
197
"psubw "MM"7, "MM"3 \n\t"
/* f-e */
\
198
MOVQ" "MM"5, "MM"0 \n\t"\
199
"psubw "MM"4, "MM"5 \n\t"
/* d-c */
\
200
"psubw "MM"7, "MM"0 \n\t"
/* d-e */
\
201
MOVQ" "MM"2, "MM"4 \n\t"\
202
"pminsw "MM"3, "MM"2 \n\t"\
203
"pmaxsw "MM"4, "MM"3 \n\t"\
204
"pmaxsw "MM"5, "MM"2 \n\t"\
205
"pminsw "MM"5, "MM"3 \n\t"\
206
"pmaxsw "MM"0, "MM"2 \n\t"
/* max */
\
207
"pminsw "MM"0, "MM"3 \n\t"
/* min */
\
208
"pxor "MM"4, "MM"4 \n\t"\
209
"pmaxsw "MM"3, "MM"6 \n\t"\
210
"psubw "MM"2, "MM"4 \n\t"
/* -max */
\
211
"pmaxsw "MM"4, "MM"6 \n\t"
/* diff= MAX3(diff, min, -max); */
\
212
"1: \n\t"\
213
\
214
MOVQ" %[tmp1], "MM"2 \n\t"
/* d */
\
215
MOVQ" "MM"2, "MM"3 \n\t"\
216
"psubw "MM"6, "MM"2 \n\t"
/* d-diff */
\
217
"paddw "MM"6, "MM"3 \n\t"
/* d+diff */
\
218
"pmaxsw "MM"2, "MM"1 \n\t"\
219
"pminsw "MM"3, "MM"1 \n\t"
/* d = clip(spatial_pred, d-diff, d+diff); */
\
220
"packuswb "MM"1, "MM"1 \n\t"\
221
\
222
:[tmp0]"=m"(tmp0),\
223
[tmp1]"=m"(tmp1),\
224
[tmp2]"=m"(tmp2),\
225
[tmp3]"=m"(tmp3)\
226
:[prev] "r"(prev),\
227
[cur] "r"(cur),\
228
[next] "r"(next),\
229
[prefs]"r"((x86_reg)prefs),\
230
[mrefs]"r"((x86_reg)mrefs),\
231
[mode] "g"(mode)\
232
);\
233
__asm__ volatile(MOV" "MM"1, %0" :"=m"(*dst));\
234
dst += STEP;\
235
prev+= STEP;\
236
cur += STEP;\
237
next+= STEP;\
238
}
239
240
if
(parity) {
241
#define prev2 "prev"
242
#define next2 "cur"
243
FILTER
244
#undef prev2
245
#undef next2
246
}
else
{
247
#define prev2 "cur"
248
#define next2 "next"
249
FILTER
250
#undef prev2
251
#undef next2
252
}
253
}
254
#undef STEP
255
#undef MM
256
#undef MOV
257
#undef MOVQ
258
#undef MOVQU
259
#undef PSHUF
260
#undef PSRL1
261
#undef PSRL2
262
#undef LOAD
263
#undef PABS
264
#undef CHECK
265
#undef CHECK1
266
#undef CHECK2
267
#undef FILTER
268