Main Page
Related Pages
Modules
Data Structures
Files
Examples
File List
Globals
libavfilter
x86
yadif_template.c
Go to the documentation of this file.
1
/*
2
* Copyright (C) 2006 Michael Niedermayer <michaelni@gmx.at>
3
*
4
* This file is part of Libav.
5
*
6
* Libav is free software; you can redistribute it and/or modify
7
* it under the terms of the GNU General Public License as published by
8
* the Free Software Foundation; either version 2 of the License, or
9
* (at your option) any later version.
10
*
11
* Libav is distributed in the hope that it will be useful,
12
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
* GNU General Public License for more details.
15
*
16
* You should have received a copy of the GNU General Public License along
17
* with Libav; if not, write to the Free Software Foundation, Inc.,
18
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19
*/
20
21
#ifdef COMPILE_TEMPLATE_SSE2
22
#define MM "%%xmm"
23
#define MOV "movq"
24
#define MOVQ "movdqa"
25
#define MOVQU "movdqu"
26
#define STEP 8
27
#define LOAD(mem,dst) \
28
MOV" "mem", "dst" \n\t"\
29
"punpcklbw "MM"7, "dst" \n\t"
30
#define PSRL1(reg) "psrldq $1, "reg" \n\t"
31
#define PSRL2(reg) "psrldq $2, "reg" \n\t"
32
#define PSHUF(src,dst) "movdqa "dst", "src" \n\t"\
33
"psrldq $2, "src" \n\t"
34
#else
35
#define MM "%%mm"
36
#define MOV "movd"
37
#define MOVQ "movq"
38
#define MOVQU "movq"
39
#define STEP 4
40
#define LOAD(mem,dst) \
41
MOV" "mem", "dst" \n\t"\
42
"punpcklbw "MM"7, "dst" \n\t"
43
#define PSRL1(reg) "psrlq $8, "reg" \n\t"
44
#define PSRL2(reg) "psrlq $16, "reg" \n\t"
45
#define PSHUF(src,dst) "pshufw $9, "dst", "src" \n\t"
46
#endif
47
48
#ifdef COMPILE_TEMPLATE_SSSE3
49
#define PABS(tmp,dst) \
50
"pabsw "dst", "dst" \n\t"
51
#else
52
#define PABS(tmp,dst) \
53
"pxor "tmp", "tmp" \n\t"\
54
"psubw "dst", "tmp" \n\t"\
55
"pmaxsw "tmp", "dst" \n\t"
56
#endif
57
58
#define CHECK(pj,mj) \
59
MOVQU" "#pj"(%[cur],%[mrefs]), "MM"2 \n\t"
/* cur[x-refs-1+j] */
\
60
MOVQU" "#mj"(%[cur],%[prefs]), "MM"3 \n\t"
/* cur[x+refs-1-j] */
\
61
MOVQ" "MM"2, "MM"4 \n\t"\
62
MOVQ" "MM"2, "MM"5 \n\t"\
63
"pxor "MM"3, "MM"4 \n\t"\
64
"pavgb "MM"3, "MM"5 \n\t"\
65
"pand "MANGLE(pb_1)", "MM"4 \n\t"\
66
"psubusb "MM"4, "MM"5 \n\t"\
67
PSRL1(MM"5") \
68
"punpcklbw "MM"7, "MM"5 \n\t"
/* (cur[x-refs+j] + cur[x+refs-j])>>1 */
\
69
MOVQ" "MM"2, "MM"4 \n\t"\
70
"psubusb "MM"3, "MM"2 \n\t"\
71
"psubusb "MM"4, "MM"3 \n\t"\
72
"pmaxub "MM"3, "MM"2 \n\t"\
73
MOVQ" "MM"2, "MM"3 \n\t"\
74
MOVQ" "MM"2, "MM"4 \n\t"
/* ABS(cur[x-refs-1+j] - cur[x+refs-1-j]) */
\
75
PSRL1(MM"3")
/* ABS(cur[x-refs +j] - cur[x+refs -j]) */
\
76
PSRL2(MM"4")
/* ABS(cur[x-refs+1+j] - cur[x+refs+1-j]) */
\
77
"punpcklbw "MM"7, "MM"2 \n\t"\
78
"punpcklbw "MM"7, "MM"3 \n\t"\
79
"punpcklbw "MM"7, "MM"4 \n\t"\
80
"paddw "MM"3, "MM"2 \n\t"\
81
"paddw "MM"4, "MM"2 \n\t"
/* score */
82
83
#define CHECK1 \
84
MOVQ" "MM"0, "MM"3 \n\t"\
85
"pcmpgtw "MM"2, "MM"3 \n\t"
/* if(score < spatial_score) */
\
86
"pminsw "MM"2, "MM"0 \n\t"
/* spatial_score= score; */
\
87
MOVQ" "MM"3, "MM"6 \n\t"\
88
"pand "MM"3, "MM"5 \n\t"\
89
"pandn "MM"1, "MM"3 \n\t"\
90
"por "MM"5, "MM"3 \n\t"\
91
MOVQ" "MM"3, "MM"1 \n\t"
/* spatial_pred= (cur[x-refs+j] + cur[x+refs-j])>>1; */
92
93
#define CHECK2
/* pretend not to have checked dir=2 if dir=1 was bad.\
94
hurts both quality and speed, but matches the C version. */
\
95
"paddw "MANGLE(pw_1)", "MM"6 \n\t"\
96
"psllw $14, "MM"6 \n\t"\
97
"paddsw "MM"6, "MM"2 \n\t"\
98
MOVQ" "MM"0, "MM"3 \n\t"\
99
"pcmpgtw "MM"2, "MM"3 \n\t"\
100
"pminsw "MM"2, "MM"0 \n\t"\
101
"pand "MM"3, "MM"5 \n\t"\
102
"pandn "MM"1, "MM"3 \n\t"\
103
"por "MM"5, "MM"3 \n\t"\
104
MOVQ" "MM"3, "MM"1 \n\t"
105
106
static
void
RENAME
(
yadif_filter_line
)(
uint8_t
*dst,
uint8_t
*prev,
uint8_t
*cur,
107
uint8_t
*next,
int
w,
int
prefs,
108
int
mrefs,
int
parity,
int
mode)
109
{
110
DECLARE_ALIGNED
(16,
uint8_t
, tmp)[16*4];
111
int
x;
112
113
#define FILTER\
114
for(x=0; x<w; x+=STEP){\
115
__asm__ volatile(\
116
"pxor "MM"7, "MM"7 \n\t"\
117
LOAD("(%[cur],%[mrefs])", MM"0")
/* c = cur[x-refs] */
\
118
LOAD("(%[cur],%[prefs])", MM"1")
/* e = cur[x+refs] */
\
119
LOAD("(%["prev2"])", MM"2")
/* prev2[x] */
\
120
LOAD("(%["next2"])", MM"3")
/* next2[x] */
\
121
MOVQ" "MM"3, "MM"4 \n\t"\
122
"paddw "MM"2, "MM"3 \n\t"\
123
"psraw $1, "MM"3 \n\t"
/* d = (prev2[x] + next2[x])>>1 */
\
124
MOVQ" "MM"0, (%[tmp]) \n\t"
/* c */
\
125
MOVQ" "MM"3, 16(%[tmp]) \n\t"
/* d */
\
126
MOVQ" "MM"1, 32(%[tmp]) \n\t"
/* e */
\
127
"psubw "MM"4, "MM"2 \n\t"\
128
PABS( MM"4", MM"2")
/* temporal_diff0 */
\
129
LOAD("(%[prev],%[mrefs])", MM"3")
/* prev[x-refs] */
\
130
LOAD("(%[prev],%[prefs])", MM"4")
/* prev[x+refs] */
\
131
"psubw "MM"0, "MM"3 \n\t"\
132
"psubw "MM"1, "MM"4 \n\t"\
133
PABS( MM"5", MM"3")\
134
PABS( MM"5", MM"4")\
135
"paddw "MM"4, "MM"3 \n\t"
/* temporal_diff1 */
\
136
"psrlw $1, "MM"2 \n\t"\
137
"psrlw $1, "MM"3 \n\t"\
138
"pmaxsw "MM"3, "MM"2 \n\t"\
139
LOAD("(%[next],%[mrefs])", MM"3")
/* next[x-refs] */
\
140
LOAD("(%[next],%[prefs])", MM"4")
/* next[x+refs] */
\
141
"psubw "MM"0, "MM"3 \n\t"\
142
"psubw "MM"1, "MM"4 \n\t"\
143
PABS( MM"5", MM"3")\
144
PABS( MM"5", MM"4")\
145
"paddw "MM"4, "MM"3 \n\t"
/* temporal_diff2 */
\
146
"psrlw $1, "MM"3 \n\t"\
147
"pmaxsw "MM"3, "MM"2 \n\t"\
148
MOVQ" "MM"2, 48(%[tmp]) \n\t"
/* diff */
\
149
\
150
"paddw "MM"0, "MM"1 \n\t"\
151
"paddw "MM"0, "MM"0 \n\t"\
152
"psubw "MM"1, "MM"0 \n\t"\
153
"psrlw $1, "MM"1 \n\t"
/* spatial_pred */
\
154
PABS( MM"2", MM"0")
/* ABS(c-e) */
\
155
\
156
MOVQU" -1(%[cur],%[mrefs]), "MM"2 \n\t"
/* cur[x-refs-1] */
\
157
MOVQU" -1(%[cur],%[prefs]), "MM"3 \n\t"
/* cur[x+refs-1] */
\
158
MOVQ" "MM"2, "MM"4 \n\t"\
159
"psubusb "MM"3, "MM"2 \n\t"\
160
"psubusb "MM"4, "MM"3 \n\t"\
161
"pmaxub "MM"3, "MM"2 \n\t"\
162
PSHUF(MM"3", MM"2") \
163
"punpcklbw "MM"7, "MM"2 \n\t"
/* ABS(cur[x-refs-1] - cur[x+refs-1]) */
\
164
"punpcklbw "MM"7, "MM"3 \n\t"
/* ABS(cur[x-refs+1] - cur[x+refs+1]) */
\
165
"paddw "MM"2, "MM"0 \n\t"\
166
"paddw "MM"3, "MM"0 \n\t"\
167
"psubw "MANGLE(pw_1)", "MM"0 \n\t"
/* spatial_score */
\
168
\
169
CHECK(-2,0)\
170
CHECK1\
171
CHECK(-3,1)\
172
CHECK2\
173
CHECK(0,-2)\
174
CHECK1\
175
CHECK(1,-3)\
176
CHECK2\
177
\
178
/* if(p->mode<2) ... */
\
179
MOVQ" 48(%[tmp]), "MM"6 \n\t"
/* diff */
\
180
"cmpl $2, %[mode] \n\t"\
181
"jge 1f \n\t"\
182
LOAD("(%["prev2"],%[mrefs],2)", MM"2")
/* prev2[x-2*refs] */
\
183
LOAD("(%["next2"],%[mrefs],2)", MM"4")
/* next2[x-2*refs] */
\
184
LOAD("(%["prev2"],%[prefs],2)", MM"3")
/* prev2[x+2*refs] */
\
185
LOAD("(%["next2"],%[prefs],2)", MM"5")
/* next2[x+2*refs] */
\
186
"paddw "MM"4, "MM"2 \n\t"\
187
"paddw "MM"5, "MM"3 \n\t"\
188
"psrlw $1, "MM"2 \n\t"
/* b */
\
189
"psrlw $1, "MM"3 \n\t"
/* f */
\
190
MOVQ" (%[tmp]), "MM"4 \n\t"
/* c */
\
191
MOVQ" 16(%[tmp]), "MM"5 \n\t"
/* d */
\
192
MOVQ" 32(%[tmp]), "MM"7 \n\t"
/* e */
\
193
"psubw "MM"4, "MM"2 \n\t"
/* b-c */
\
194
"psubw "MM"7, "MM"3 \n\t"
/* f-e */
\
195
MOVQ" "MM"5, "MM"0 \n\t"\
196
"psubw "MM"4, "MM"5 \n\t"
/* d-c */
\
197
"psubw "MM"7, "MM"0 \n\t"
/* d-e */
\
198
MOVQ" "MM"2, "MM"4 \n\t"\
199
"pminsw "MM"3, "MM"2 \n\t"\
200
"pmaxsw "MM"4, "MM"3 \n\t"\
201
"pmaxsw "MM"5, "MM"2 \n\t"\
202
"pminsw "MM"5, "MM"3 \n\t"\
203
"pmaxsw "MM"0, "MM"2 \n\t"
/* max */
\
204
"pminsw "MM"0, "MM"3 \n\t"
/* min */
\
205
"pxor "MM"4, "MM"4 \n\t"\
206
"pmaxsw "MM"3, "MM"6 \n\t"\
207
"psubw "MM"2, "MM"4 \n\t"
/* -max */
\
208
"pmaxsw "MM"4, "MM"6 \n\t"
/* diff= MAX3(diff, min, -max); */
\
209
"1: \n\t"\
210
\
211
MOVQ" 16(%[tmp]), "MM"2 \n\t"
/* d */
\
212
MOVQ" "MM"2, "MM"3 \n\t"\
213
"psubw "MM"6, "MM"2 \n\t"
/* d-diff */
\
214
"paddw "MM"6, "MM"3 \n\t"
/* d+diff */
\
215
"pmaxsw "MM"2, "MM"1 \n\t"\
216
"pminsw "MM"3, "MM"1 \n\t"
/* d = clip(spatial_pred, d-diff, d+diff); */
\
217
"packuswb "MM"1, "MM"1 \n\t"\
218
\
219
::[prev] "r"(prev),\
220
[cur] "r"(cur),\
221
[next] "r"(next),\
222
[prefs]"r"((x86_reg)prefs),\
223
[mrefs]"r"((x86_reg)mrefs),\
224
[mode] "g"(mode),\
225
[tmp] "r"(tmp)\
226
);\
227
__asm__ volatile(MOV" "MM"1, %0" :"=m"(*dst));\
228
dst += STEP;\
229
prev+= STEP;\
230
cur += STEP;\
231
next+= STEP;\
232
}
233
234
if
(parity) {
235
#define prev2 "prev"
236
#define next2 "cur"
237
FILTER
238
#undef prev2
239
#undef next2
240
}
else
{
241
#define prev2 "cur"
242
#define next2 "next"
243
FILTER
244
#undef prev2
245
#undef next2
246
}
247
}
248
#undef STEP
249
#undef MM
250
#undef MOV
251
#undef MOVQ
252
#undef MOVQU
253
#undef PSHUF
254
#undef PSRL1
255
#undef PSRL2
256
#undef LOAD
257
#undef PABS
258
#undef CHECK
259
#undef CHECK1
260
#undef CHECK2
261
#undef FILTER
uint8_t
uint8_t
Definition:
audio_convert.c:194
FILTER
#define FILTER
RENAME
#define RENAME(a)
Definition:
mpegaudiodec.c:107
yadif_filter_line
static void RENAME() yadif_filter_line(uint8_t *dst, uint8_t *prev, uint8_t *cur, uint8_t *next, int w, int prefs, int mrefs, int parity, int mode)
Definition:
yadif_template.c:106
DECLARE_ALIGNED
#define DECLARE_ALIGNED(n, t, v)