swscale.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
3  *
4  * This file is part of Libav.
5  *
6  * Libav is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * Libav is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with Libav; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include <inttypes.h>
22 #include <string.h>
23 #include <math.h>
24 #include <stdio.h>
25 #include "config.h"
26 #include <assert.h>
27 #include "swscale.h"
28 #include "swscale_internal.h"
29 #include "rgb2rgb.h"
30 #include "libavutil/intreadwrite.h"
31 #include "libavutil/cpu.h"
32 #include "libavutil/avutil.h"
33 #include "libavutil/mathematics.h"
34 #include "libavutil/bswap.h"
35 #include "libavutil/pixdesc.h"
36 
37 #define DITHER1XBPP
38 
39 #define RGB2YUV_SHIFT 15
40 #define BY ( (int)(0.114*219/255*(1<<RGB2YUV_SHIFT)+0.5))
41 #define BV (-(int)(0.081*224/255*(1<<RGB2YUV_SHIFT)+0.5))
42 #define BU ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
43 #define GY ( (int)(0.587*219/255*(1<<RGB2YUV_SHIFT)+0.5))
44 #define GV (-(int)(0.419*224/255*(1<<RGB2YUV_SHIFT)+0.5))
45 #define GU (-(int)(0.331*224/255*(1<<RGB2YUV_SHIFT)+0.5))
46 #define RY ( (int)(0.299*219/255*(1<<RGB2YUV_SHIFT)+0.5))
47 #define RV ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
48 #define RU (-(int)(0.169*224/255*(1<<RGB2YUV_SHIFT)+0.5))
49 
50 /*
51 NOTES
52 Special versions: fast Y 1:1 scaling (no interpolation in y direction)
53 
54 TODO
55 more intelligent misalignment avoidance for the horizontal scaler
56 write special vertical cubic upscale version
57 optimize C code (YV12 / minmax)
58 add support for packed pixel YUV input & output
59 add support for Y8 output
60 optimize BGR24 & BGR32
61 add BGR4 output support
62 write special BGR->BGR scaler
63 */
64 
65 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_4)[2][8]={
66 { 1, 3, 1, 3, 1, 3, 1, 3, },
67 { 2, 0, 2, 0, 2, 0, 2, 0, },
68 };
69 
70 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_8)[2][8]={
71 { 6, 2, 6, 2, 6, 2, 6, 2, },
72 { 0, 4, 0, 4, 0, 4, 0, 4, },
73 };
74 
75 DECLARE_ALIGNED(8, const uint8_t, dither_4x4_16)[4][8]={
76 { 8, 4, 11, 7, 8, 4, 11, 7, },
77 { 2, 14, 1, 13, 2, 14, 1, 13, },
78 { 10, 6, 9, 5, 10, 6, 9, 5, },
79 { 0, 12, 3, 15, 0, 12, 3, 15, },
80 };
81 
82 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_32)[8][8]={
83 { 17, 9, 23, 15, 16, 8, 22, 14, },
84 { 5, 29, 3, 27, 4, 28, 2, 26, },
85 { 21, 13, 19, 11, 20, 12, 18, 10, },
86 { 0, 24, 6, 30, 1, 25, 7, 31, },
87 { 16, 8, 22, 14, 17, 9, 23, 15, },
88 { 4, 28, 2, 26, 5, 29, 3, 27, },
89 { 20, 12, 18, 10, 21, 13, 19, 11, },
90 { 1, 25, 7, 31, 0, 24, 6, 30, },
91 };
92 
93 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_73)[8][8]={
94 { 0, 55, 14, 68, 3, 58, 17, 72, },
95 { 37, 18, 50, 32, 40, 22, 54, 35, },
96 { 9, 64, 5, 59, 13, 67, 8, 63, },
97 { 46, 27, 41, 23, 49, 31, 44, 26, },
98 { 2, 57, 16, 71, 1, 56, 15, 70, },
99 { 39, 21, 52, 34, 38, 19, 51, 33, },
100 { 11, 66, 7, 62, 10, 65, 6, 60, },
101 { 48, 30, 43, 25, 47, 29, 42, 24, },
102 };
103 
104 #if 1
105 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
106 {117, 62, 158, 103, 113, 58, 155, 100, },
107 { 34, 199, 21, 186, 31, 196, 17, 182, },
108 {144, 89, 131, 76, 141, 86, 127, 72, },
109 { 0, 165, 41, 206, 10, 175, 52, 217, },
110 {110, 55, 151, 96, 120, 65, 162, 107, },
111 { 28, 193, 14, 179, 38, 203, 24, 189, },
112 {138, 83, 124, 69, 148, 93, 134, 79, },
113 { 7, 172, 48, 213, 3, 168, 45, 210, },
114 };
115 #elif 1
116 // tries to correct a gamma of 1.5
117 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
118 { 0, 143, 18, 200, 2, 156, 25, 215, },
119 { 78, 28, 125, 64, 89, 36, 138, 74, },
120 { 10, 180, 3, 161, 16, 195, 8, 175, },
121 {109, 51, 93, 38, 121, 60, 105, 47, },
122 { 1, 152, 23, 210, 0, 147, 20, 205, },
123 { 85, 33, 134, 71, 81, 30, 130, 67, },
124 { 14, 190, 6, 171, 12, 185, 5, 166, },
125 {117, 57, 101, 44, 113, 54, 97, 41, },
126 };
127 #elif 1
128 // tries to correct a gamma of 2.0
129 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
130 { 0, 124, 8, 193, 0, 140, 12, 213, },
131 { 55, 14, 104, 42, 66, 19, 119, 52, },
132 { 3, 168, 1, 145, 6, 187, 3, 162, },
133 { 86, 31, 70, 21, 99, 39, 82, 28, },
134 { 0, 134, 11, 206, 0, 129, 9, 200, },
135 { 62, 17, 114, 48, 58, 16, 109, 45, },
136 { 5, 181, 2, 157, 4, 175, 1, 151, },
137 { 95, 36, 78, 26, 90, 34, 74, 24, },
138 };
139 #else
140 // tries to correct a gamma of 2.5
141 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
142 { 0, 107, 3, 187, 0, 125, 6, 212, },
143 { 39, 7, 86, 28, 49, 11, 102, 36, },
144 { 1, 158, 0, 131, 3, 180, 1, 151, },
145 { 68, 19, 52, 12, 81, 25, 64, 17, },
146 { 0, 119, 5, 203, 0, 113, 4, 195, },
147 { 45, 9, 96, 33, 42, 8, 91, 30, },
148 { 2, 172, 1, 144, 2, 165, 0, 137, },
149 { 77, 23, 60, 15, 72, 21, 56, 14, },
150 };
151 #endif
152 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_128)[8][8] = {
153 { 36, 68, 60, 92, 34, 66, 58, 90,},
154 { 100, 4,124, 28, 98, 2,122, 26,},
155 { 52, 84, 44, 76, 50, 82, 42, 74,},
156 { 116, 20,108, 12,114, 18,106, 10,},
157 { 32, 64, 56, 88, 38, 70, 62, 94,},
158 { 96, 0,120, 24,102, 6,126, 30,},
159 { 48, 80, 40, 72, 54, 86, 46, 78,},
160 { 112, 16,104, 8,118, 22,110, 14,},
161 };
162 DECLARE_ALIGNED(8, const uint8_t, ff_sws_pb_64)[8] =
163 { 64, 64, 64, 64, 64, 64, 64, 64 };
164 
165 #define output_pixel(pos, val, bias, signedness) \
166  if (big_endian) { \
167  AV_WB16(pos, bias + av_clip_ ## signedness ## 16(val >> shift)); \
168  } else { \
169  AV_WL16(pos, bias + av_clip_ ## signedness ## 16(val >> shift)); \
170  }
171 
172 static av_always_inline void
173 yuv2plane1_16_c_template(const int32_t *src, uint16_t *dest, int dstW,
174  int big_endian, int output_bits)
175 {
176  int i;
177  int shift = 19 - output_bits;
178 
179  for (i = 0; i < dstW; i++) {
180  int val = src[i] + (1 << (shift - 1));
181  output_pixel(&dest[i], val, 0, uint);
182  }
183 }
184 
185 static av_always_inline void
186 yuv2planeX_16_c_template(const int16_t *filter, int filterSize,
187  const int32_t **src, uint16_t *dest, int dstW,
188  int big_endian, int output_bits)
189 {
190  int i;
191  int shift = 15 + 16 - output_bits;
192 
193  for (i = 0; i < dstW; i++) {
194  int val = 1 << (30-output_bits);
195  int j;
196 
197  /* range of val is [0,0x7FFFFFFF], so 31 bits, but with lanczos/spline
198  * filters (or anything with negative coeffs, the range can be slightly
199  * wider in both directions. To account for this overflow, we subtract
200  * a constant so it always fits in the signed range (assuming a
201  * reasonable filterSize), and re-add that at the end. */
202  val -= 0x40000000;
203  for (j = 0; j < filterSize; j++)
204  val += src[j][i] * filter[j];
205 
206  output_pixel(&dest[i], val, 0x8000, int);
207  }
208 }
209 
210 #undef output_pixel
211 
212 #define output_pixel(pos, val) \
213  if (big_endian) { \
214  AV_WB16(pos, av_clip_uintp2(val >> shift, output_bits)); \
215  } else { \
216  AV_WL16(pos, av_clip_uintp2(val >> shift, output_bits)); \
217  }
218 
219 static av_always_inline void
220 yuv2plane1_10_c_template(const int16_t *src, uint16_t *dest, int dstW,
221  int big_endian, int output_bits)
222 {
223  int i;
224  int shift = 15 - output_bits;
225 
226  for (i = 0; i < dstW; i++) {
227  int val = src[i] + (1 << (shift - 1));
228  output_pixel(&dest[i], val);
229  }
230 }
231 
232 static av_always_inline void
233 yuv2planeX_10_c_template(const int16_t *filter, int filterSize,
234  const int16_t **src, uint16_t *dest, int dstW,
235  int big_endian, int output_bits)
236 {
237  int i;
238  int shift = 11 + 16 - output_bits;
239 
240  for (i = 0; i < dstW; i++) {
241  int val = 1 << (26-output_bits);
242  int j;
243 
244  for (j = 0; j < filterSize; j++)
245  val += src[j][i] * filter[j];
246 
247  output_pixel(&dest[i], val);
248  }
249 }
250 
251 #undef output_pixel
252 
253 #define yuv2NBPS(bits, BE_LE, is_be, template_size, typeX_t) \
254 static void yuv2plane1_ ## bits ## BE_LE ## _c(const int16_t *src, \
255  uint8_t *dest, int dstW, \
256  const uint8_t *dither, int offset)\
257 { \
258  yuv2plane1_ ## template_size ## _c_template((const typeX_t *) src, \
259  (uint16_t *) dest, dstW, is_be, bits); \
260 }\
261 static void yuv2planeX_ ## bits ## BE_LE ## _c(const int16_t *filter, int filterSize, \
262  const int16_t **src, uint8_t *dest, int dstW, \
263  const uint8_t *dither, int offset)\
264 { \
265  yuv2planeX_## template_size ## _c_template(filter, \
266  filterSize, (const typeX_t **) src, \
267  (uint16_t *) dest, dstW, is_be, bits); \
268 }
269 yuv2NBPS( 9, BE, 1, 10, int16_t)
270 yuv2NBPS( 9, LE, 0, 10, int16_t)
271 yuv2NBPS(10, BE, 1, 10, int16_t)
272 yuv2NBPS(10, LE, 0, 10, int16_t)
273 yuv2NBPS(16, BE, 1, 16, int32_t)
274 yuv2NBPS(16, LE, 0, 16, int32_t)
275 
276 static void yuv2planeX_8_c(const int16_t *filter, int filterSize,
277  const int16_t **src, uint8_t *dest, int dstW,
278  const uint8_t *dither, int offset)
279 {
280  int i;
281  for (i=0; i<dstW; i++) {
282  int val = dither[(i + offset) & 7] << 12;
283  int j;
284  for (j=0; j<filterSize; j++)
285  val += src[j][i] * filter[j];
286 
287  dest[i]= av_clip_uint8(val>>19);
288  }
289 }
290 
291 static void yuv2plane1_8_c(const int16_t *src, uint8_t *dest, int dstW,
292  const uint8_t *dither, int offset)
293 {
294  int i;
295  for (i=0; i<dstW; i++) {
296  int val = (src[i] + dither[(i + offset) & 7]) >> 7;
297  dest[i]= av_clip_uint8(val);
298  }
299 }
300 
301 static void yuv2nv12cX_c(SwsContext *c, const int16_t *chrFilter, int chrFilterSize,
302  const int16_t **chrUSrc, const int16_t **chrVSrc,
303  uint8_t *dest, int chrDstW)
304 {
305  enum PixelFormat dstFormat = c->dstFormat;
306  const uint8_t *chrDither = c->chrDither8;
307  int i;
308 
309  if (dstFormat == PIX_FMT_NV12)
310  for (i=0; i<chrDstW; i++) {
311  int u = chrDither[i & 7] << 12;
312  int v = chrDither[(i + 3) & 7] << 12;
313  int j;
314  for (j=0; j<chrFilterSize; j++) {
315  u += chrUSrc[j][i] * chrFilter[j];
316  v += chrVSrc[j][i] * chrFilter[j];
317  }
318 
319  dest[2*i]= av_clip_uint8(u>>19);
320  dest[2*i+1]= av_clip_uint8(v>>19);
321  }
322  else
323  for (i=0; i<chrDstW; i++) {
324  int u = chrDither[i & 7] << 12;
325  int v = chrDither[(i + 3) & 7] << 12;
326  int j;
327  for (j=0; j<chrFilterSize; j++) {
328  u += chrUSrc[j][i] * chrFilter[j];
329  v += chrVSrc[j][i] * chrFilter[j];
330  }
331 
332  dest[2*i]= av_clip_uint8(v>>19);
333  dest[2*i+1]= av_clip_uint8(u>>19);
334  }
335 }
336 
337 #define output_pixel(pos, val) \
338  if (target == PIX_FMT_GRAY16BE) { \
339  AV_WB16(pos, val); \
340  } else { \
341  AV_WL16(pos, val); \
342  }
343 
344 static av_always_inline void
345 yuv2gray16_X_c_template(SwsContext *c, const int16_t *lumFilter,
346  const int32_t **lumSrc, int lumFilterSize,
347  const int16_t *chrFilter, const int32_t **chrUSrc,
348  const int32_t **chrVSrc, int chrFilterSize,
349  const int32_t **alpSrc, uint16_t *dest, int dstW,
350  int y, enum PixelFormat target)
351 {
352  int i;
353 
354  for (i = 0; i < (dstW >> 1); i++) {
355  int j;
356  int Y1 = (1 << 14) - 0x40000000;
357  int Y2 = (1 << 14) - 0x40000000;
358 
359  for (j = 0; j < lumFilterSize; j++) {
360  Y1 += lumSrc[j][i * 2] * lumFilter[j];
361  Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
362  }
363  Y1 >>= 15;
364  Y2 >>= 15;
365  Y1 = av_clip_int16(Y1);
366  Y2 = av_clip_int16(Y2);
367  output_pixel(&dest[i * 2 + 0], 0x8000 + Y1);
368  output_pixel(&dest[i * 2 + 1], 0x8000 + Y2);
369  }
370 }
371 
372 static av_always_inline void
373 yuv2gray16_2_c_template(SwsContext *c, const int32_t *buf[2],
374  const int32_t *ubuf[2], const int32_t *vbuf[2],
375  const int32_t *abuf[2], uint16_t *dest, int dstW,
376  int yalpha, int uvalpha, int y,
377  enum PixelFormat target)
378 {
379  int yalpha1 = 4095 - yalpha;
380  int i;
381  const int32_t *buf0 = buf[0], *buf1 = buf[1];
382 
383  for (i = 0; i < (dstW >> 1); i++) {
384  int Y1 = (buf0[i * 2 ] * yalpha1 + buf1[i * 2 ] * yalpha) >> 15;
385  int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 15;
386 
387  output_pixel(&dest[i * 2 + 0], Y1);
388  output_pixel(&dest[i * 2 + 1], Y2);
389  }
390 }
391 
392 static av_always_inline void
393 yuv2gray16_1_c_template(SwsContext *c, const int32_t *buf0,
394  const int32_t *ubuf[2], const int32_t *vbuf[2],
395  const int32_t *abuf0, uint16_t *dest, int dstW,
396  int uvalpha, int y, enum PixelFormat target)
397 {
398  int i;
399 
400  for (i = 0; i < (dstW >> 1); i++) {
401  int Y1 = buf0[i * 2 ] << 1;
402  int Y2 = buf0[i * 2 + 1] << 1;
403 
404  output_pixel(&dest[i * 2 + 0], Y1);
405  output_pixel(&dest[i * 2 + 1], Y2);
406  }
407 }
408 
409 #undef output_pixel
410 
411 #define YUV2PACKED16WRAPPER(name, base, ext, fmt) \
412 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
413  const int16_t **_lumSrc, int lumFilterSize, \
414  const int16_t *chrFilter, const int16_t **_chrUSrc, \
415  const int16_t **_chrVSrc, int chrFilterSize, \
416  const int16_t **_alpSrc, uint8_t *_dest, int dstW, \
417  int y) \
418 { \
419  const int32_t **lumSrc = (const int32_t **) _lumSrc, \
420  **chrUSrc = (const int32_t **) _chrUSrc, \
421  **chrVSrc = (const int32_t **) _chrVSrc, \
422  **alpSrc = (const int32_t **) _alpSrc; \
423  uint16_t *dest = (uint16_t *) _dest; \
424  name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
425  chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
426  alpSrc, dest, dstW, y, fmt); \
427 } \
428  \
429 static void name ## ext ## _2_c(SwsContext *c, const int16_t *_buf[2], \
430  const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
431  const int16_t *_abuf[2], uint8_t *_dest, int dstW, \
432  int yalpha, int uvalpha, int y) \
433 { \
434  const int32_t **buf = (const int32_t **) _buf, \
435  **ubuf = (const int32_t **) _ubuf, \
436  **vbuf = (const int32_t **) _vbuf, \
437  **abuf = (const int32_t **) _abuf; \
438  uint16_t *dest = (uint16_t *) _dest; \
439  name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
440  dest, dstW, yalpha, uvalpha, y, fmt); \
441 } \
442  \
443 static void name ## ext ## _1_c(SwsContext *c, const int16_t *_buf0, \
444  const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
445  const int16_t *_abuf0, uint8_t *_dest, int dstW, \
446  int uvalpha, int y) \
447 { \
448  const int32_t *buf0 = (const int32_t *) _buf0, \
449  **ubuf = (const int32_t **) _ubuf, \
450  **vbuf = (const int32_t **) _vbuf, \
451  *abuf0 = (const int32_t *) _abuf0; \
452  uint16_t *dest = (uint16_t *) _dest; \
453  name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
454  dstW, uvalpha, y, fmt); \
455 }
456 
457 YUV2PACKED16WRAPPER(yuv2gray16,, LE, PIX_FMT_GRAY16LE)
458 YUV2PACKED16WRAPPER(yuv2gray16,, BE, PIX_FMT_GRAY16BE)
459 
460 #define output_pixel(pos, acc) \
461  if (target == PIX_FMT_MONOBLACK) { \
462  pos = acc; \
463  } else { \
464  pos = ~acc; \
465  }
466 
467 static av_always_inline void
468 yuv2mono_X_c_template(SwsContext *c, const int16_t *lumFilter,
469  const int16_t **lumSrc, int lumFilterSize,
470  const int16_t *chrFilter, const int16_t **chrUSrc,
471  const int16_t **chrVSrc, int chrFilterSize,
472  const int16_t **alpSrc, uint8_t *dest, int dstW,
473  int y, enum PixelFormat target)
474 {
475  const uint8_t * const d128=dither_8x8_220[y&7];
476  uint8_t *g = c->table_gU[128] + c->table_gV[128];
477  int i;
478  unsigned acc = 0;
479 
480  for (i = 0; i < dstW - 1; i += 2) {
481  int j;
482  int Y1 = 1 << 18;
483  int Y2 = 1 << 18;
484 
485  for (j = 0; j < lumFilterSize; j++) {
486  Y1 += lumSrc[j][i] * lumFilter[j];
487  Y2 += lumSrc[j][i+1] * lumFilter[j];
488  }
489  Y1 >>= 19;
490  Y2 >>= 19;
491  if ((Y1 | Y2) & 0x100) {
492  Y1 = av_clip_uint8(Y1);
493  Y2 = av_clip_uint8(Y2);
494  }
495  acc += acc + g[Y1 + d128[(i + 0) & 7]];
496  acc += acc + g[Y2 + d128[(i + 1) & 7]];
497  if ((i & 7) == 6) {
498  output_pixel(*dest++, acc);
499  }
500  }
501 }
502 
503 static av_always_inline void
504 yuv2mono_2_c_template(SwsContext *c, const int16_t *buf[2],
505  const int16_t *ubuf[2], const int16_t *vbuf[2],
506  const int16_t *abuf[2], uint8_t *dest, int dstW,
507  int yalpha, int uvalpha, int y,
508  enum PixelFormat target)
509 {
510  const int16_t *buf0 = buf[0], *buf1 = buf[1];
511  const uint8_t * const d128 = dither_8x8_220[y & 7];
512  uint8_t *g = c->table_gU[128] + c->table_gV[128];
513  int yalpha1 = 4095 - yalpha;
514  int i;
515 
516  for (i = 0; i < dstW - 7; i += 8) {
517  int acc = g[((buf0[i ] * yalpha1 + buf1[i ] * yalpha) >> 19) + d128[0]];
518  acc += acc + g[((buf0[i + 1] * yalpha1 + buf1[i + 1] * yalpha) >> 19) + d128[1]];
519  acc += acc + g[((buf0[i + 2] * yalpha1 + buf1[i + 2] * yalpha) >> 19) + d128[2]];
520  acc += acc + g[((buf0[i + 3] * yalpha1 + buf1[i + 3] * yalpha) >> 19) + d128[3]];
521  acc += acc + g[((buf0[i + 4] * yalpha1 + buf1[i + 4] * yalpha) >> 19) + d128[4]];
522  acc += acc + g[((buf0[i + 5] * yalpha1 + buf1[i + 5] * yalpha) >> 19) + d128[5]];
523  acc += acc + g[((buf0[i + 6] * yalpha1 + buf1[i + 6] * yalpha) >> 19) + d128[6]];
524  acc += acc + g[((buf0[i + 7] * yalpha1 + buf1[i + 7] * yalpha) >> 19) + d128[7]];
525  output_pixel(*dest++, acc);
526  }
527 }
528 
529 static av_always_inline void
530 yuv2mono_1_c_template(SwsContext *c, const int16_t *buf0,
531  const int16_t *ubuf[2], const int16_t *vbuf[2],
532  const int16_t *abuf0, uint8_t *dest, int dstW,
533  int uvalpha, int y, enum PixelFormat target)
534 {
535  const uint8_t * const d128 = dither_8x8_220[y & 7];
536  uint8_t *g = c->table_gU[128] + c->table_gV[128];
537  int i;
538 
539  for (i = 0; i < dstW - 7; i += 8) {
540  int acc = g[(buf0[i ] >> 7) + d128[0]];
541  acc += acc + g[(buf0[i + 1] >> 7) + d128[1]];
542  acc += acc + g[(buf0[i + 2] >> 7) + d128[2]];
543  acc += acc + g[(buf0[i + 3] >> 7) + d128[3]];
544  acc += acc + g[(buf0[i + 4] >> 7) + d128[4]];
545  acc += acc + g[(buf0[i + 5] >> 7) + d128[5]];
546  acc += acc + g[(buf0[i + 6] >> 7) + d128[6]];
547  acc += acc + g[(buf0[i + 7] >> 7) + d128[7]];
548  output_pixel(*dest++, acc);
549  }
550 }
551 
552 #undef output_pixel
553 
554 #define YUV2PACKEDWRAPPER(name, base, ext, fmt) \
555 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
556  const int16_t **lumSrc, int lumFilterSize, \
557  const int16_t *chrFilter, const int16_t **chrUSrc, \
558  const int16_t **chrVSrc, int chrFilterSize, \
559  const int16_t **alpSrc, uint8_t *dest, int dstW, \
560  int y) \
561 { \
562  name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
563  chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
564  alpSrc, dest, dstW, y, fmt); \
565 } \
566  \
567 static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
568  const int16_t *ubuf[2], const int16_t *vbuf[2], \
569  const int16_t *abuf[2], uint8_t *dest, int dstW, \
570  int yalpha, int uvalpha, int y) \
571 { \
572  name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
573  dest, dstW, yalpha, uvalpha, y, fmt); \
574 } \
575  \
576 static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
577  const int16_t *ubuf[2], const int16_t *vbuf[2], \
578  const int16_t *abuf0, uint8_t *dest, int dstW, \
579  int uvalpha, int y) \
580 { \
581  name ## base ## _1_c_template(c, buf0, ubuf, vbuf, \
582  abuf0, dest, dstW, uvalpha, \
583  y, fmt); \
584 }
585 
586 YUV2PACKEDWRAPPER(yuv2mono,, white, PIX_FMT_MONOWHITE)
587 YUV2PACKEDWRAPPER(yuv2mono,, black, PIX_FMT_MONOBLACK)
588 
589 #define output_pixels(pos, Y1, U, Y2, V) \
590  if (target == PIX_FMT_YUYV422) { \
591  dest[pos + 0] = Y1; \
592  dest[pos + 1] = U; \
593  dest[pos + 2] = Y2; \
594  dest[pos + 3] = V; \
595  } else { \
596  dest[pos + 0] = U; \
597  dest[pos + 1] = Y1; \
598  dest[pos + 2] = V; \
599  dest[pos + 3] = Y2; \
600  }
601 
602 static av_always_inline void
603 yuv2422_X_c_template(SwsContext *c, const int16_t *lumFilter,
604  const int16_t **lumSrc, int lumFilterSize,
605  const int16_t *chrFilter, const int16_t **chrUSrc,
606  const int16_t **chrVSrc, int chrFilterSize,
607  const int16_t **alpSrc, uint8_t *dest, int dstW,
608  int y, enum PixelFormat target)
609 {
610  int i;
611 
612  for (i = 0; i < (dstW >> 1); i++) {
613  int j;
614  int Y1 = 1 << 18;
615  int Y2 = 1 << 18;
616  int U = 1 << 18;
617  int V = 1 << 18;
618 
619  for (j = 0; j < lumFilterSize; j++) {
620  Y1 += lumSrc[j][i * 2] * lumFilter[j];
621  Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
622  }
623  for (j = 0; j < chrFilterSize; j++) {
624  U += chrUSrc[j][i] * chrFilter[j];
625  V += chrVSrc[j][i] * chrFilter[j];
626  }
627  Y1 >>= 19;
628  Y2 >>= 19;
629  U >>= 19;
630  V >>= 19;
631  if ((Y1 | Y2 | U | V) & 0x100) {
632  Y1 = av_clip_uint8(Y1);
633  Y2 = av_clip_uint8(Y2);
634  U = av_clip_uint8(U);
635  V = av_clip_uint8(V);
636  }
637  output_pixels(4*i, Y1, U, Y2, V);
638  }
639 }
640 
641 static av_always_inline void
642 yuv2422_2_c_template(SwsContext *c, const int16_t *buf[2],
643  const int16_t *ubuf[2], const int16_t *vbuf[2],
644  const int16_t *abuf[2], uint8_t *dest, int dstW,
645  int yalpha, int uvalpha, int y,
646  enum PixelFormat target)
647 {
648  const int16_t *buf0 = buf[0], *buf1 = buf[1],
649  *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
650  *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
651  int yalpha1 = 4095 - yalpha;
652  int uvalpha1 = 4095 - uvalpha;
653  int i;
654 
655  for (i = 0; i < (dstW >> 1); i++) {
656  int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19;
657  int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19;
658  int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;
659  int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;
660 
661  output_pixels(i * 4, Y1, U, Y2, V);
662  }
663 }
664 
665 static av_always_inline void
666 yuv2422_1_c_template(SwsContext *c, const int16_t *buf0,
667  const int16_t *ubuf[2], const int16_t *vbuf[2],
668  const int16_t *abuf0, uint8_t *dest, int dstW,
669  int uvalpha, int y, enum PixelFormat target)
670 {
671  const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
672  *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
673  int i;
674 
675  if (uvalpha < 2048) {
676  for (i = 0; i < (dstW >> 1); i++) {
677  int Y1 = buf0[i * 2] >> 7;
678  int Y2 = buf0[i * 2 + 1] >> 7;
679  int U = ubuf1[i] >> 7;
680  int V = vbuf1[i] >> 7;
681 
682  output_pixels(i * 4, Y1, U, Y2, V);
683  }
684  } else {
685  for (i = 0; i < (dstW >> 1); i++) {
686  int Y1 = buf0[i * 2] >> 7;
687  int Y2 = buf0[i * 2 + 1] >> 7;
688  int U = (ubuf0[i] + ubuf1[i]) >> 8;
689  int V = (vbuf0[i] + vbuf1[i]) >> 8;
690 
691  output_pixels(i * 4, Y1, U, Y2, V);
692  }
693  }
694 }
695 
696 #undef output_pixels
697 
698 YUV2PACKEDWRAPPER(yuv2, 422, yuyv422, PIX_FMT_YUYV422)
699 YUV2PACKEDWRAPPER(yuv2, 422, uyvy422, PIX_FMT_UYVY422)
700 
701 #define R_B ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? R : B)
702 #define B_R ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? B : R)
703 #define output_pixel(pos, val) \
704  if (isBE(target)) { \
705  AV_WB16(pos, val); \
706  } else { \
707  AV_WL16(pos, val); \
708  }
709 
710 static av_always_inline void
711 yuv2rgb48_X_c_template(SwsContext *c, const int16_t *lumFilter,
712  const int32_t **lumSrc, int lumFilterSize,
713  const int16_t *chrFilter, const int32_t **chrUSrc,
714  const int32_t **chrVSrc, int chrFilterSize,
715  const int32_t **alpSrc, uint16_t *dest, int dstW,
716  int y, enum PixelFormat target)
717 {
718  int i;
719 
720  for (i = 0; i < (dstW >> 1); i++) {
721  int j;
722  int Y1 = -0x40000000;
723  int Y2 = -0x40000000;
724  int U = -128 << 23; // 19
725  int V = -128 << 23;
726  int R, G, B;
727 
728  for (j = 0; j < lumFilterSize; j++) {
729  Y1 += lumSrc[j][i * 2] * lumFilter[j];
730  Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
731  }
732  for (j = 0; j < chrFilterSize; j++) {
733  U += chrUSrc[j][i] * chrFilter[j];
734  V += chrVSrc[j][i] * chrFilter[j];
735  }
736 
737  // 8bit: 12+15=27; 16-bit: 12+19=31
738  Y1 >>= 14; // 10
739  Y1 += 0x10000;
740  Y2 >>= 14;
741  Y2 += 0x10000;
742  U >>= 14;
743  V >>= 14;
744 
745  // 8bit: 27 -> 17bit, 16bit: 31 - 14 = 17bit
746  Y1 -= c->yuv2rgb_y_offset;
747  Y2 -= c->yuv2rgb_y_offset;
748  Y1 *= c->yuv2rgb_y_coeff;
749  Y2 *= c->yuv2rgb_y_coeff;
750  Y1 += 1 << 13; // 21
751  Y2 += 1 << 13;
752  // 8bit: 17 + 13bit = 30bit, 16bit: 17 + 13bit = 30bit
753 
754  R = V * c->yuv2rgb_v2r_coeff;
755  G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
756  B = U * c->yuv2rgb_u2b_coeff;
757 
758  // 8bit: 30 - 22 = 8bit, 16bit: 30bit - 14 = 16bit
759  output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
760  output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
761  output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
762  output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
763  output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
764  output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
765  dest += 6;
766  }
767 }
768 
769 static av_always_inline void
770 yuv2rgb48_2_c_template(SwsContext *c, const int32_t *buf[2],
771  const int32_t *ubuf[2], const int32_t *vbuf[2],
772  const int32_t *abuf[2], uint16_t *dest, int dstW,
773  int yalpha, int uvalpha, int y,
774  enum PixelFormat target)
775 {
776  const int32_t *buf0 = buf[0], *buf1 = buf[1],
777  *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
778  *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
779  int yalpha1 = 4095 - yalpha;
780  int uvalpha1 = 4095 - uvalpha;
781  int i;
782 
783  for (i = 0; i < (dstW >> 1); i++) {
784  int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 14;
785  int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 14;
786  int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha + (-128 << 23)) >> 14;
787  int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha + (-128 << 23)) >> 14;
788  int R, G, B;
789 
790  Y1 -= c->yuv2rgb_y_offset;
791  Y2 -= c->yuv2rgb_y_offset;
792  Y1 *= c->yuv2rgb_y_coeff;
793  Y2 *= c->yuv2rgb_y_coeff;
794  Y1 += 1 << 13;
795  Y2 += 1 << 13;
796 
797  R = V * c->yuv2rgb_v2r_coeff;
798  G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
799  B = U * c->yuv2rgb_u2b_coeff;
800 
801  output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
802  output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
803  output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
804  output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
805  output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
806  output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
807  dest += 6;
808  }
809 }
810 
811 static av_always_inline void
812 yuv2rgb48_1_c_template(SwsContext *c, const int32_t *buf0,
813  const int32_t *ubuf[2], const int32_t *vbuf[2],
814  const int32_t *abuf0, uint16_t *dest, int dstW,
815  int uvalpha, int y, enum PixelFormat target)
816 {
817  const int32_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
818  *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
819  int i;
820 
821  if (uvalpha < 2048) {
822  for (i = 0; i < (dstW >> 1); i++) {
823  int Y1 = (buf0[i * 2] ) >> 2;
824  int Y2 = (buf0[i * 2 + 1]) >> 2;
825  int U = (ubuf0[i] + (-128 << 11)) >> 2;
826  int V = (vbuf0[i] + (-128 << 11)) >> 2;
827  int R, G, B;
828 
829  Y1 -= c->yuv2rgb_y_offset;
830  Y2 -= c->yuv2rgb_y_offset;
831  Y1 *= c->yuv2rgb_y_coeff;
832  Y2 *= c->yuv2rgb_y_coeff;
833  Y1 += 1 << 13;
834  Y2 += 1 << 13;
835 
836  R = V * c->yuv2rgb_v2r_coeff;
837  G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
838  B = U * c->yuv2rgb_u2b_coeff;
839 
840  output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
841  output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
842  output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
843  output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
844  output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
845  output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
846  dest += 6;
847  }
848  } else {
849  for (i = 0; i < (dstW >> 1); i++) {
850  int Y1 = (buf0[i * 2] ) >> 2;
851  int Y2 = (buf0[i * 2 + 1]) >> 2;
852  int U = (ubuf0[i] + ubuf1[i] + (-128 << 11)) >> 3;
853  int V = (vbuf0[i] + vbuf1[i] + (-128 << 11)) >> 3;
854  int R, G, B;
855 
856  Y1 -= c->yuv2rgb_y_offset;
857  Y2 -= c->yuv2rgb_y_offset;
858  Y1 *= c->yuv2rgb_y_coeff;
859  Y2 *= c->yuv2rgb_y_coeff;
860  Y1 += 1 << 13;
861  Y2 += 1 << 13;
862 
863  R = V * c->yuv2rgb_v2r_coeff;
864  G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
865  B = U * c->yuv2rgb_u2b_coeff;
866 
867  output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
868  output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
869  output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
870  output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
871  output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
872  output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
873  dest += 6;
874  }
875  }
876 }
877 
878 #undef output_pixel
879 #undef r_b
880 #undef b_r
881 
882 YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48be, PIX_FMT_RGB48BE)
883 YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48le, PIX_FMT_RGB48LE)
884 YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48be, PIX_FMT_BGR48BE)
885 YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48le, PIX_FMT_BGR48LE)
886 
887 /*
888  * Write out 2 RGB pixels in the target pixel format. This function takes a
889  * R/G/B LUT as generated by ff_yuv2rgb_c_init_tables(), which takes care of
890  * things like endianness conversion and shifting. The caller takes care of
891  * setting the correct offset in these tables from the chroma (U/V) values.
892  * This function then uses the luminance (Y1/Y2) values to write out the
893  * correct RGB values into the destination buffer.
894  */
895 static av_always_inline void
896 yuv2rgb_write(uint8_t *_dest, int i, unsigned Y1, unsigned Y2,
897  unsigned A1, unsigned A2,
898  const void *_r, const void *_g, const void *_b, int y,
899  enum PixelFormat target, int hasAlpha)
900 {
901  if (target == PIX_FMT_ARGB || target == PIX_FMT_RGBA ||
902  target == PIX_FMT_ABGR || target == PIX_FMT_BGRA) {
903  uint32_t *dest = (uint32_t *) _dest;
904  const uint32_t *r = (const uint32_t *) _r;
905  const uint32_t *g = (const uint32_t *) _g;
906  const uint32_t *b = (const uint32_t *) _b;
907 
908 #if CONFIG_SMALL
909  int sh = hasAlpha ? ((target == PIX_FMT_RGB32_1 || target == PIX_FMT_BGR32_1) ? 0 : 24) : 0;
910 
911  dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (hasAlpha ? A1 << sh : 0);
912  dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (hasAlpha ? A2 << sh : 0);
913 #else
914  if (hasAlpha) {
915  int sh = (target == PIX_FMT_RGB32_1 || target == PIX_FMT_BGR32_1) ? 0 : 24;
916 
917  dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (A1 << sh);
918  dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (A2 << sh);
919  } else {
920  dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1];
921  dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2];
922  }
923 #endif
924  } else if (target == PIX_FMT_RGB24 || target == PIX_FMT_BGR24) {
925  uint8_t *dest = (uint8_t *) _dest;
926  const uint8_t *r = (const uint8_t *) _r;
927  const uint8_t *g = (const uint8_t *) _g;
928  const uint8_t *b = (const uint8_t *) _b;
929 
930 #define r_b ((target == PIX_FMT_RGB24) ? r : b)
931 #define b_r ((target == PIX_FMT_RGB24) ? b : r)
932  dest[i * 6 + 0] = r_b[Y1];
933  dest[i * 6 + 1] = g[Y1];
934  dest[i * 6 + 2] = b_r[Y1];
935  dest[i * 6 + 3] = r_b[Y2];
936  dest[i * 6 + 4] = g[Y2];
937  dest[i * 6 + 5] = b_r[Y2];
938 #undef r_b
939 #undef b_r
940  } else if (target == PIX_FMT_RGB565 || target == PIX_FMT_BGR565 ||
941  target == PIX_FMT_RGB555 || target == PIX_FMT_BGR555 ||
942  target == PIX_FMT_RGB444 || target == PIX_FMT_BGR444) {
943  uint16_t *dest = (uint16_t *) _dest;
944  const uint16_t *r = (const uint16_t *) _r;
945  const uint16_t *g = (const uint16_t *) _g;
946  const uint16_t *b = (const uint16_t *) _b;
947  int dr1, dg1, db1, dr2, dg2, db2;
948 
949  if (target == PIX_FMT_RGB565 || target == PIX_FMT_BGR565) {
950  dr1 = dither_2x2_8[ y & 1 ][0];
951  dg1 = dither_2x2_4[ y & 1 ][0];
952  db1 = dither_2x2_8[(y & 1) ^ 1][0];
953  dr2 = dither_2x2_8[ y & 1 ][1];
954  dg2 = dither_2x2_4[ y & 1 ][1];
955  db2 = dither_2x2_8[(y & 1) ^ 1][1];
956  } else if (target == PIX_FMT_RGB555 || target == PIX_FMT_BGR555) {
957  dr1 = dither_2x2_8[ y & 1 ][0];
958  dg1 = dither_2x2_8[ y & 1 ][1];
959  db1 = dither_2x2_8[(y & 1) ^ 1][0];
960  dr2 = dither_2x2_8[ y & 1 ][1];
961  dg2 = dither_2x2_8[ y & 1 ][0];
962  db2 = dither_2x2_8[(y & 1) ^ 1][1];
963  } else {
964  dr1 = dither_4x4_16[ y & 3 ][0];
965  dg1 = dither_4x4_16[ y & 3 ][1];
966  db1 = dither_4x4_16[(y & 3) ^ 3][0];
967  dr2 = dither_4x4_16[ y & 3 ][1];
968  dg2 = dither_4x4_16[ y & 3 ][0];
969  db2 = dither_4x4_16[(y & 3) ^ 3][1];
970  }
971 
972  dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
973  dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
974  } else /* 8/4-bit */ {
975  uint8_t *dest = (uint8_t *) _dest;
976  const uint8_t *r = (const uint8_t *) _r;
977  const uint8_t *g = (const uint8_t *) _g;
978  const uint8_t *b = (const uint8_t *) _b;
979  int dr1, dg1, db1, dr2, dg2, db2;
980 
981  if (target == PIX_FMT_RGB8 || target == PIX_FMT_BGR8) {
982  const uint8_t * const d64 = dither_8x8_73[y & 7];
983  const uint8_t * const d32 = dither_8x8_32[y & 7];
984  dr1 = dg1 = d32[(i * 2 + 0) & 7];
985  db1 = d64[(i * 2 + 0) & 7];
986  dr2 = dg2 = d32[(i * 2 + 1) & 7];
987  db2 = d64[(i * 2 + 1) & 7];
988  } else {
989  const uint8_t * const d64 = dither_8x8_73 [y & 7];
990  const uint8_t * const d128 = dither_8x8_220[y & 7];
991  dr1 = db1 = d128[(i * 2 + 0) & 7];
992  dg1 = d64[(i * 2 + 0) & 7];
993  dr2 = db2 = d128[(i * 2 + 1) & 7];
994  dg2 = d64[(i * 2 + 1) & 7];
995  }
996 
997  if (target == PIX_FMT_RGB4 || target == PIX_FMT_BGR4) {
998  dest[i] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1] +
999  ((r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2]) << 4);
1000  } else {
1001  dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
1002  dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
1003  }
1004  }
1005 }
1006 
1007 static av_always_inline void
1008 yuv2rgb_X_c_template(SwsContext *c, const int16_t *lumFilter,
1009  const int16_t **lumSrc, int lumFilterSize,
1010  const int16_t *chrFilter, const int16_t **chrUSrc,
1011  const int16_t **chrVSrc, int chrFilterSize,
1012  const int16_t **alpSrc, uint8_t *dest, int dstW,
1013  int y, enum PixelFormat target, int hasAlpha)
1014 {
1015  int i;
1016 
1017  for (i = 0; i < (dstW >> 1); i++) {
1018  int j;
1019  int Y1 = 1 << 18;
1020  int Y2 = 1 << 18;
1021  int U = 1 << 18;
1022  int V = 1 << 18;
1023  int av_unused A1, A2;
1024  const void *r, *g, *b;
1025 
1026  for (j = 0; j < lumFilterSize; j++) {
1027  Y1 += lumSrc[j][i * 2] * lumFilter[j];
1028  Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
1029  }
1030  for (j = 0; j < chrFilterSize; j++) {
1031  U += chrUSrc[j][i] * chrFilter[j];
1032  V += chrVSrc[j][i] * chrFilter[j];
1033  }
1034  Y1 >>= 19;
1035  Y2 >>= 19;
1036  U >>= 19;
1037  V >>= 19;
1038  if ((Y1 | Y2 | U | V) & 0x100) {
1039  Y1 = av_clip_uint8(Y1);
1040  Y2 = av_clip_uint8(Y2);
1041  U = av_clip_uint8(U);
1042  V = av_clip_uint8(V);
1043  }
1044  if (hasAlpha) {
1045  A1 = 1 << 18;
1046  A2 = 1 << 18;
1047  for (j = 0; j < lumFilterSize; j++) {
1048  A1 += alpSrc[j][i * 2 ] * lumFilter[j];
1049  A2 += alpSrc[j][i * 2 + 1] * lumFilter[j];
1050  }
1051  A1 >>= 19;
1052  A2 >>= 19;
1053  if ((A1 | A2) & 0x100) {
1054  A1 = av_clip_uint8(A1);
1055  A2 = av_clip_uint8(A2);
1056  }
1057  }
1058 
1059  /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/
1060  r = c->table_rV[V];
1061  g = (c->table_gU[U] + c->table_gV[V]);
1062  b = c->table_bU[U];
1063 
1064  yuv2rgb_write(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1065  r, g, b, y, target, hasAlpha);
1066  }
1067 }
1068 
1069 static av_always_inline void
1070 yuv2rgb_2_c_template(SwsContext *c, const int16_t *buf[2],
1071  const int16_t *ubuf[2], const int16_t *vbuf[2],
1072  const int16_t *abuf[2], uint8_t *dest, int dstW,
1073  int yalpha, int uvalpha, int y,
1074  enum PixelFormat target, int hasAlpha)
1075 {
1076  const int16_t *buf0 = buf[0], *buf1 = buf[1],
1077  *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
1078  *vbuf0 = vbuf[0], *vbuf1 = vbuf[1],
1079  *abuf0 = hasAlpha ? abuf[0] : NULL,
1080  *abuf1 = hasAlpha ? abuf[1] : NULL;
1081  int yalpha1 = 4095 - yalpha;
1082  int uvalpha1 = 4095 - uvalpha;
1083  int i;
1084 
1085  for (i = 0; i < (dstW >> 1); i++) {
1086  int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19;
1087  int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19;
1088  int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;
1089  int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;
1090  int A1, A2;
1091  const void *r = c->table_rV[V],
1092  *g = (c->table_gU[U] + c->table_gV[V]),
1093  *b = c->table_bU[U];
1094 
1095  if (hasAlpha) {
1096  A1 = (abuf0[i * 2 ] * yalpha1 + abuf1[i * 2 ] * yalpha) >> 19;
1097  A2 = (abuf0[i * 2 + 1] * yalpha1 + abuf1[i * 2 + 1] * yalpha) >> 19;
1098  }
1099 
1100  yuv2rgb_write(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1101  r, g, b, y, target, hasAlpha);
1102  }
1103 }
1104 
1105 static av_always_inline void
1106 yuv2rgb_1_c_template(SwsContext *c, const int16_t *buf0,
1107  const int16_t *ubuf[2], const int16_t *vbuf[2],
1108  const int16_t *abuf0, uint8_t *dest, int dstW,
1109  int uvalpha, int y, enum PixelFormat target,
1110  int hasAlpha)
1111 {
1112  const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
1113  *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
1114  int i;
1115 
1116  if (uvalpha < 2048) {
1117  for (i = 0; i < (dstW >> 1); i++) {
1118  int Y1 = buf0[i * 2] >> 7;
1119  int Y2 = buf0[i * 2 + 1] >> 7;
1120  int U = ubuf1[i] >> 7;
1121  int V = vbuf1[i] >> 7;
1122  int A1, A2;
1123  const void *r = c->table_rV[V],
1124  *g = (c->table_gU[U] + c->table_gV[V]),
1125  *b = c->table_bU[U];
1126 
1127  if (hasAlpha) {
1128  A1 = abuf0[i * 2 ] >> 7;
1129  A2 = abuf0[i * 2 + 1] >> 7;
1130  }
1131 
1132  yuv2rgb_write(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1133  r, g, b, y, target, hasAlpha);
1134  }
1135  } else {
1136  for (i = 0; i < (dstW >> 1); i++) {
1137  int Y1 = buf0[i * 2] >> 7;
1138  int Y2 = buf0[i * 2 + 1] >> 7;
1139  int U = (ubuf0[i] + ubuf1[i]) >> 8;
1140  int V = (vbuf0[i] + vbuf1[i]) >> 8;
1141  int A1, A2;
1142  const void *r = c->table_rV[V],
1143  *g = (c->table_gU[U] + c->table_gV[V]),
1144  *b = c->table_bU[U];
1145 
1146  if (hasAlpha) {
1147  A1 = abuf0[i * 2 ] >> 7;
1148  A2 = abuf0[i * 2 + 1] >> 7;
1149  }
1150 
1151  yuv2rgb_write(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1152  r, g, b, y, target, hasAlpha);
1153  }
1154  }
1155 }
1156 
1157 #define YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
1158 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
1159  const int16_t **lumSrc, int lumFilterSize, \
1160  const int16_t *chrFilter, const int16_t **chrUSrc, \
1161  const int16_t **chrVSrc, int chrFilterSize, \
1162  const int16_t **alpSrc, uint8_t *dest, int dstW, \
1163  int y) \
1164 { \
1165  name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
1166  chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
1167  alpSrc, dest, dstW, y, fmt, hasAlpha); \
1168 }
1169 #define YUV2RGBWRAPPER(name, base, ext, fmt, hasAlpha) \
1170 YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
1171 static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
1172  const int16_t *ubuf[2], const int16_t *vbuf[2], \
1173  const int16_t *abuf[2], uint8_t *dest, int dstW, \
1174  int yalpha, int uvalpha, int y) \
1175 { \
1176  name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
1177  dest, dstW, yalpha, uvalpha, y, fmt, hasAlpha); \
1178 } \
1179  \
1180 static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
1181  const int16_t *ubuf[2], const int16_t *vbuf[2], \
1182  const int16_t *abuf0, uint8_t *dest, int dstW, \
1183  int uvalpha, int y) \
1184 { \
1185  name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
1186  dstW, uvalpha, y, fmt, hasAlpha); \
1187 }
1188 
1189 #if CONFIG_SMALL
1190 YUV2RGBWRAPPER(yuv2rgb,, 32_1, PIX_FMT_RGB32_1, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1192 #else
1193 #if CONFIG_SWSCALE_ALPHA
1194 YUV2RGBWRAPPER(yuv2rgb,, a32_1, PIX_FMT_RGB32_1, 1)
1195 YUV2RGBWRAPPER(yuv2rgb,, a32, PIX_FMT_RGB32, 1)
1196 #endif
1197 YUV2RGBWRAPPER(yuv2rgb,, x32_1, PIX_FMT_RGB32_1, 0)
1198 YUV2RGBWRAPPER(yuv2rgb,, x32, PIX_FMT_RGB32, 0)
1199 #endif
1200 YUV2RGBWRAPPER(yuv2, rgb, rgb24, PIX_FMT_RGB24, 0)
1201 YUV2RGBWRAPPER(yuv2, rgb, bgr24, PIX_FMT_BGR24, 0)
1202 YUV2RGBWRAPPER(yuv2rgb,, 16, PIX_FMT_RGB565, 0)
1203 YUV2RGBWRAPPER(yuv2rgb,, 15, PIX_FMT_RGB555, 0)
1204 YUV2RGBWRAPPER(yuv2rgb,, 12, PIX_FMT_RGB444, 0)
1205 YUV2RGBWRAPPER(yuv2rgb,, 8, PIX_FMT_RGB8, 0)
1206 YUV2RGBWRAPPER(yuv2rgb,, 4, PIX_FMT_RGB4, 0)
1207 YUV2RGBWRAPPER(yuv2rgb,, 4b, PIX_FMT_RGB4_BYTE, 0)
1208 
1209 static av_always_inline void
1210 yuv2rgb_full_X_c_template(SwsContext *c, const int16_t *lumFilter,
1211  const int16_t **lumSrc, int lumFilterSize,
1212  const int16_t *chrFilter, const int16_t **chrUSrc,
1213  const int16_t **chrVSrc, int chrFilterSize,
1214  const int16_t **alpSrc, uint8_t *dest,
1215  int dstW, int y, enum PixelFormat target, int hasAlpha)
1216 {
1217  int i;
1218  int step = (target == PIX_FMT_RGB24 || target == PIX_FMT_BGR24) ? 3 : 4;
1219 
1220  for (i = 0; i < dstW; i++) {
1221  int j;
1222  int Y = 0;
1223  int U = -128 << 19;
1224  int V = -128 << 19;
1225  int av_unused A;
1226  int R, G, B;
1227 
1228  for (j = 0; j < lumFilterSize; j++) {
1229  Y += lumSrc[j][i] * lumFilter[j];
1230  }
1231  for (j = 0; j < chrFilterSize; j++) {
1232  U += chrUSrc[j][i] * chrFilter[j];
1233  V += chrVSrc[j][i] * chrFilter[j];
1234  }
1235  Y >>= 10;
1236  U >>= 10;
1237  V >>= 10;
1238  if (hasAlpha) {
1239  A = 1 << 21;
1240  for (j = 0; j < lumFilterSize; j++) {
1241  A += alpSrc[j][i] * lumFilter[j];
1242  }
1243  A >>= 19;
1244  if (A & 0x100)
1245  A = av_clip_uint8(A);
1246  }
1247  Y -= c->yuv2rgb_y_offset;
1248  Y *= c->yuv2rgb_y_coeff;
1249  Y += 1 << 21;
1250  R = Y + V*c->yuv2rgb_v2r_coeff;
1251  G = Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff;
1252  B = Y + U*c->yuv2rgb_u2b_coeff;
1253  if ((R | G | B) & 0xC0000000) {
1254  R = av_clip_uintp2(R, 30);
1255  G = av_clip_uintp2(G, 30);
1256  B = av_clip_uintp2(B, 30);
1257  }
1258 
1259  switch(target) {
1260  case PIX_FMT_ARGB:
1261  dest[0] = hasAlpha ? A : 255;
1262  dest[1] = R >> 22;
1263  dest[2] = G >> 22;
1264  dest[3] = B >> 22;
1265  break;
1266  case PIX_FMT_RGB24:
1267  dest[0] = R >> 22;
1268  dest[1] = G >> 22;
1269  dest[2] = B >> 22;
1270  break;
1271  case PIX_FMT_RGBA:
1272  dest[0] = R >> 22;
1273  dest[1] = G >> 22;
1274  dest[2] = B >> 22;
1275  dest[3] = hasAlpha ? A : 255;
1276  break;
1277  case PIX_FMT_ABGR:
1278  dest[0] = hasAlpha ? A : 255;
1279  dest[1] = B >> 22;
1280  dest[2] = G >> 22;
1281  dest[3] = R >> 22;
1282  dest += 4;
1283  break;
1284  case PIX_FMT_BGR24:
1285  dest[0] = B >> 22;
1286  dest[1] = G >> 22;
1287  dest[2] = R >> 22;
1288  break;
1289  case PIX_FMT_BGRA:
1290  dest[0] = B >> 22;
1291  dest[1] = G >> 22;
1292  dest[2] = R >> 22;
1293  dest[3] = hasAlpha ? A : 255;
1294  break;
1295  }
1296  dest += step;
1297  }
1298 }
1299 
1300 #if CONFIG_SMALL
1301 YUV2RGBWRAPPERX(yuv2, rgb_full, bgra32_full, PIX_FMT_BGRA, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1302 YUV2RGBWRAPPERX(yuv2, rgb_full, abgr32_full, PIX_FMT_ABGR, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1303 YUV2RGBWRAPPERX(yuv2, rgb_full, rgba32_full, PIX_FMT_RGBA, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1304 YUV2RGBWRAPPERX(yuv2, rgb_full, argb32_full, PIX_FMT_ARGB, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
1305 #else
1306 #if CONFIG_SWSCALE_ALPHA
1307 YUV2RGBWRAPPERX(yuv2, rgb_full, bgra32_full, PIX_FMT_BGRA, 1)
1308 YUV2RGBWRAPPERX(yuv2, rgb_full, abgr32_full, PIX_FMT_ABGR, 1)
1309 YUV2RGBWRAPPERX(yuv2, rgb_full, rgba32_full, PIX_FMT_RGBA, 1)
1310 YUV2RGBWRAPPERX(yuv2, rgb_full, argb32_full, PIX_FMT_ARGB, 1)
1311 #endif
1312 YUV2RGBWRAPPERX(yuv2, rgb_full, bgrx32_full, PIX_FMT_BGRA, 0)
1313 YUV2RGBWRAPPERX(yuv2, rgb_full, xbgr32_full, PIX_FMT_ABGR, 0)
1314 YUV2RGBWRAPPERX(yuv2, rgb_full, rgbx32_full, PIX_FMT_RGBA, 0)
1315 YUV2RGBWRAPPERX(yuv2, rgb_full, xrgb32_full, PIX_FMT_ARGB, 0)
1316 #endif
1317 YUV2RGBWRAPPERX(yuv2, rgb_full, bgr24_full, PIX_FMT_BGR24, 0)
1318 YUV2RGBWRAPPERX(yuv2, rgb_full, rgb24_full, PIX_FMT_RGB24, 0)
1319 
1320 static av_always_inline void fillPlane(uint8_t* plane, int stride,
1321  int width, int height,
1322  int y, uint8_t val)
1323 {
1324  int i;
1325  uint8_t *ptr = plane + stride*y;
1326  for (i=0; i<height; i++) {
1327  memset(ptr, val, width);
1328  ptr += stride;
1329  }
1330 }
1331 
1332 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
1333 
1334 #define r ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? b_r : r_b)
1335 #define b ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? r_b : b_r)
1336 
1337 static av_always_inline void
1338 rgb48ToY_c_template(uint16_t *dst, const uint16_t *src, int width,
1339  enum PixelFormat origin)
1340 {
1341  int i;
1342  for (i = 0; i < width; i++) {
1343  unsigned int r_b = input_pixel(&src[i*3+0]);
1344  unsigned int g = input_pixel(&src[i*3+1]);
1345  unsigned int b_r = input_pixel(&src[i*3+2]);
1346 
1347  dst[i] = (RY*r + GY*g + BY*b + (0x2001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1348  }
1349 }
1350 
1351 static av_always_inline void
1352 rgb48ToUV_c_template(uint16_t *dstU, uint16_t *dstV,
1353  const uint16_t *src1, const uint16_t *src2,
1354  int width, enum PixelFormat origin)
1355 {
1356  int i;
1357  assert(src1==src2);
1358  for (i = 0; i < width; i++) {
1359  int r_b = input_pixel(&src1[i*3+0]);
1360  int g = input_pixel(&src1[i*3+1]);
1361  int b_r = input_pixel(&src1[i*3+2]);
1362 
1363  dstU[i] = (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1364  dstV[i] = (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1365  }
1366 }
1367 
1368 static av_always_inline void
1369 rgb48ToUV_half_c_template(uint16_t *dstU, uint16_t *dstV,
1370  const uint16_t *src1, const uint16_t *src2,
1371  int width, enum PixelFormat origin)
1372 {
1373  int i;
1374  assert(src1==src2);
1375  for (i = 0; i < width; i++) {
1376  int r_b = (input_pixel(&src1[6 * i + 0]) + input_pixel(&src1[6 * i + 3]) + 1) >> 1;
1377  int g = (input_pixel(&src1[6 * i + 1]) + input_pixel(&src1[6 * i + 4]) + 1) >> 1;
1378  int b_r = (input_pixel(&src1[6 * i + 2]) + input_pixel(&src1[6 * i + 5]) + 1) >> 1;
1379 
1380  dstU[i]= (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1381  dstV[i]= (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1382  }
1383 }
1384 
1385 #undef r
1386 #undef b
1387 #undef input_pixel
1388 
1389 #define rgb48funcs(pattern, BE_LE, origin) \
1390 static void pattern ## 48 ## BE_LE ## ToY_c(uint8_t *_dst, const uint8_t *_src, \
1391  int width, uint32_t *unused) \
1392 { \
1393  const uint16_t *src = (const uint16_t *) _src; \
1394  uint16_t *dst = (uint16_t *) _dst; \
1395  rgb48ToY_c_template(dst, src, width, origin); \
1396 } \
1397  \
1398 static void pattern ## 48 ## BE_LE ## ToUV_c(uint8_t *_dstU, uint8_t *_dstV, \
1399  const uint8_t *_src1, const uint8_t *_src2, \
1400  int width, uint32_t *unused) \
1401 { \
1402  const uint16_t *src1 = (const uint16_t *) _src1, \
1403  *src2 = (const uint16_t *) _src2; \
1404  uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
1405  rgb48ToUV_c_template(dstU, dstV, src1, src2, width, origin); \
1406 } \
1407  \
1408 static void pattern ## 48 ## BE_LE ## ToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, \
1409  const uint8_t *_src1, const uint8_t *_src2, \
1410  int width, uint32_t *unused) \
1411 { \
1412  const uint16_t *src1 = (const uint16_t *) _src1, \
1413  *src2 = (const uint16_t *) _src2; \
1414  uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
1415  rgb48ToUV_half_c_template(dstU, dstV, src1, src2, width, origin); \
1416 }
1417 
1419 rgb48funcs(rgb, BE, PIX_FMT_RGB48BE)
1420 rgb48funcs(bgr, LE, PIX_FMT_BGR48LE)
1421 rgb48funcs(bgr, BE, PIX_FMT_BGR48BE)
1422 
1423 #define input_pixel(i) ((origin == PIX_FMT_RGBA || origin == PIX_FMT_BGRA || \
1424  origin == PIX_FMT_ARGB || origin == PIX_FMT_ABGR) ? AV_RN32A(&src[(i)*4]) : \
1425  (isBE(origin) ? AV_RB16(&src[(i)*2]) : AV_RL16(&src[(i)*2])))
1426 
1427 static av_always_inline void
1428 rgb16_32ToY_c_template(uint8_t *dst, const uint8_t *src,
1429  int width, enum PixelFormat origin,
1430  int shr, int shg, int shb, int shp,
1431  int maskr, int maskg, int maskb,
1432  int rsh, int gsh, int bsh, int S)
1433 {
1434  const int ry = RY << rsh, gy = GY << gsh, by = BY << bsh;
1435  const unsigned rnd = 33u << (S - 1);
1436  int i;
1437 
1438  for (i = 0; i < width; i++) {
1439  int px = input_pixel(i) >> shp;
1440  int b = (px & maskb) >> shb;
1441  int g = (px & maskg) >> shg;
1442  int r = (px & maskr) >> shr;
1443 
1444  dst[i] = (ry * r + gy * g + by * b + rnd) >> S;
1445  }
1446 }
1447 
1448 static av_always_inline void
1449 rgb16_32ToUV_c_template(uint8_t *dstU, uint8_t *dstV,
1450  const uint8_t *src, int width,
1451  enum PixelFormat origin,
1452  int shr, int shg, int shb, int shp,
1453  int maskr, int maskg, int maskb,
1454  int rsh, int gsh, int bsh, int S)
1455 {
1456  const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
1457  rv = RV << rsh, gv = GV << gsh, bv = BV << bsh;
1458  const unsigned rnd = 257u << (S - 1);
1459  int i;
1460 
1461  for (i = 0; i < width; i++) {
1462  int px = input_pixel(i) >> shp;
1463  int b = (px & maskb) >> shb;
1464  int g = (px & maskg) >> shg;
1465  int r = (px & maskr) >> shr;
1466 
1467  dstU[i] = (ru * r + gu * g + bu * b + rnd) >> S;
1468  dstV[i] = (rv * r + gv * g + bv * b + rnd) >> S;
1469  }
1470 }
1471 
1472 static av_always_inline void
1473 rgb16_32ToUV_half_c_template(uint8_t *dstU, uint8_t *dstV,
1474  const uint8_t *src, int width,
1475  enum PixelFormat origin,
1476  int shr, int shg, int shb, int shp,
1477  int maskr, int maskg, int maskb,
1478  int rsh, int gsh, int bsh, int S)
1479 {
1480  const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
1481  rv = RV << rsh, gv = GV << gsh, bv = BV << bsh,
1482  maskgx = ~(maskr | maskb);
1483  const unsigned rnd = 257u << S;
1484  int i;
1485 
1486  maskr |= maskr << 1; maskb |= maskb << 1; maskg |= maskg << 1;
1487  for (i = 0; i < width; i++) {
1488  int px0 = input_pixel(2 * i + 0) >> shp;
1489  int px1 = input_pixel(2 * i + 1) >> shp;
1490  int b, r, g = (px0 & maskgx) + (px1 & maskgx);
1491  int rb = px0 + px1 - g;
1492 
1493  b = (rb & maskb) >> shb;
1494  if (shp || origin == PIX_FMT_BGR565LE || origin == PIX_FMT_BGR565BE ||
1495  origin == PIX_FMT_RGB565LE || origin == PIX_FMT_RGB565BE) {
1496  g >>= shg;
1497  } else {
1498  g = (g & maskg) >> shg;
1499  }
1500  r = (rb & maskr) >> shr;
1501 
1502  dstU[i] = (ru * r + gu * g + bu * b + rnd) >> (S + 1);
1503  dstV[i] = (rv * r + gv * g + bv * b + rnd) >> (S + 1);
1504  }
1505 }
1506 
1507 #undef input_pixel
1508 
1509 #define rgb16_32_wrapper(fmt, name, shr, shg, shb, shp, maskr, \
1510  maskg, maskb, rsh, gsh, bsh, S) \
1511 static void name ## ToY_c(uint8_t *dst, const uint8_t *src, \
1512  int width, uint32_t *unused) \
1513 { \
1514  rgb16_32ToY_c_template(dst, src, width, fmt, shr, shg, shb, shp, \
1515  maskr, maskg, maskb, rsh, gsh, bsh, S); \
1516 } \
1517  \
1518 static void name ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
1519  const uint8_t *src, const uint8_t *dummy, \
1520  int width, uint32_t *unused) \
1521 { \
1522  rgb16_32ToUV_c_template(dstU, dstV, src, width, fmt, shr, shg, shb, shp, \
1523  maskr, maskg, maskb, rsh, gsh, bsh, S); \
1524 } \
1525  \
1526 static void name ## ToUV_half_c(uint8_t *dstU, uint8_t *dstV, \
1527  const uint8_t *src, const uint8_t *dummy, \
1528  int width, uint32_t *unused) \
1529 { \
1530  rgb16_32ToUV_half_c_template(dstU, dstV, src, width, fmt, shr, shg, shb, shp, \
1531  maskr, maskg, maskb, rsh, gsh, bsh, S); \
1532 }
1533 
1534 rgb16_32_wrapper(PIX_FMT_BGR32, bgr32, 16, 0, 0, 0, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8)
1535 rgb16_32_wrapper(PIX_FMT_BGR32_1, bgr321, 16, 0, 0, 8, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8)
1536 rgb16_32_wrapper(PIX_FMT_RGB32, rgb32, 0, 0, 16, 0, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8)
1537 rgb16_32_wrapper(PIX_FMT_RGB32_1, rgb321, 0, 0, 16, 8, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8)
1538 rgb16_32_wrapper(PIX_FMT_BGR565LE, bgr16le, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8)
1539 rgb16_32_wrapper(PIX_FMT_BGR555LE, bgr15le, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7)
1540 rgb16_32_wrapper(PIX_FMT_BGR444LE, bgr12le, 0, 0, 0, 0, 0x000F, 0x00F0, 0x0F00, 8, 4, 0, RGB2YUV_SHIFT+4)
1541 rgb16_32_wrapper(PIX_FMT_RGB565LE, rgb16le, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8)
1542 rgb16_32_wrapper(PIX_FMT_RGB555LE, rgb15le, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7)
1543 rgb16_32_wrapper(PIX_FMT_RGB444LE, rgb12le, 0, 0, 0, 0, 0x0F00, 0x00F0, 0x000F, 0, 4, 8, RGB2YUV_SHIFT+4)
1544 rgb16_32_wrapper(PIX_FMT_BGR565BE, bgr16be, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8)
1545 rgb16_32_wrapper(PIX_FMT_BGR555BE, bgr15be, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7)
1546 rgb16_32_wrapper(PIX_FMT_BGR444BE, bgr12be, 0, 0, 0, 0, 0x000F, 0x00F0, 0x0F00, 8, 4, 0, RGB2YUV_SHIFT+4)
1547 rgb16_32_wrapper(PIX_FMT_RGB565BE, rgb16be, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8)
1548 rgb16_32_wrapper(PIX_FMT_RGB555BE, rgb15be, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7)
1549 rgb16_32_wrapper(PIX_FMT_RGB444BE, rgb12be, 0, 0, 0, 0, 0x0F00, 0x00F0, 0x000F, 0, 4, 8, RGB2YUV_SHIFT+4)
1550 
1551 static void abgrToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
1552 {
1553  int i;
1554  for (i=0; i<width; i++) {
1555  dst[i]= src[4*i];
1556  }
1557 }
1558 
1559 static void rgbaToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
1560 {
1561  int i;
1562  for (i=0; i<width; i++) {
1563  dst[i]= src[4*i+3];
1564  }
1565 }
1566 
1567 static void palToY_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *pal)
1568 {
1569  int i;
1570  for (i=0; i<width; i++) {
1571  int d= src[i];
1572 
1573  dst[i]= pal[d] & 0xFF;
1574  }
1575 }
1576 
1577 static void palToUV_c(uint8_t *dstU, uint8_t *dstV,
1578  const uint8_t *src1, const uint8_t *src2,
1579  int width, uint32_t *pal)
1580 {
1581  int i;
1582  assert(src1 == src2);
1583  for (i=0; i<width; i++) {
1584  int p= pal[src1[i]];
1585 
1586  dstU[i]= p>>8;
1587  dstV[i]= p>>16;
1588  }
1589 }
1590 
1591 static void monowhite2Y_c(uint8_t *dst, const uint8_t *src,
1592  int width, uint32_t *unused)
1593 {
1594  int i, j;
1595  for (i=0; i<width/8; i++) {
1596  int d= ~src[i];
1597  for(j=0; j<8; j++)
1598  dst[8*i+j]= ((d>>(7-j))&1)*255;
1599  }
1600 }
1601 
1602 static void monoblack2Y_c(uint8_t *dst, const uint8_t *src,
1603  int width, uint32_t *unused)
1604 {
1605  int i, j;
1606  for (i=0; i<width/8; i++) {
1607  int d= src[i];
1608  for(j=0; j<8; j++)
1609  dst[8*i+j]= ((d>>(7-j))&1)*255;
1610  }
1611 }
1612 
1613 //FIXME yuy2* can read up to 7 samples too much
1614 
1615 static void yuy2ToY_c(uint8_t *dst, const uint8_t *src, int width,
1616  uint32_t *unused)
1617 {
1618  int i;
1619  for (i=0; i<width; i++)
1620  dst[i]= src[2*i];
1621 }
1622 
1623 static void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1624  const uint8_t *src2, int width, uint32_t *unused)
1625 {
1626  int i;
1627  for (i=0; i<width; i++) {
1628  dstU[i]= src1[4*i + 1];
1629  dstV[i]= src1[4*i + 3];
1630  }
1631  assert(src1 == src2);
1632 }
1633 
1634 static void bswap16Y_c(uint8_t *_dst, const uint8_t *_src, int width, uint32_t *unused)
1635 {
1636  int i;
1637  const uint16_t *src = (const uint16_t *) _src;
1638  uint16_t *dst = (uint16_t *) _dst;
1639  for (i=0; i<width; i++) {
1640  dst[i] = av_bswap16(src[i]);
1641  }
1642 }
1643 
1644 static void bswap16UV_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *_src1,
1645  const uint8_t *_src2, int width, uint32_t *unused)
1646 {
1647  int i;
1648  const uint16_t *src1 = (const uint16_t *) _src1,
1649  *src2 = (const uint16_t *) _src2;
1650  uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV;
1651  for (i=0; i<width; i++) {
1652  dstU[i] = av_bswap16(src1[i]);
1653  dstV[i] = av_bswap16(src2[i]);
1654  }
1655 }
1656 
1657 /* This is almost identical to the previous, end exists only because
1658  * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
1659 static void uyvyToY_c(uint8_t *dst, const uint8_t *src, int width,
1660  uint32_t *unused)
1661 {
1662  int i;
1663  for (i=0; i<width; i++)
1664  dst[i]= src[2*i+1];
1665 }
1666 
1667 static void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1668  const uint8_t *src2, int width, uint32_t *unused)
1669 {
1670  int i;
1671  for (i=0; i<width; i++) {
1672  dstU[i]= src1[4*i + 0];
1673  dstV[i]= src1[4*i + 2];
1674  }
1675  assert(src1 == src2);
1676 }
1677 
1678 static av_always_inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2,
1679  const uint8_t *src, int width)
1680 {
1681  int i;
1682  for (i = 0; i < width; i++) {
1683  dst1[i] = src[2*i+0];
1684  dst2[i] = src[2*i+1];
1685  }
1686 }
1687 
1688 static void nv12ToUV_c(uint8_t *dstU, uint8_t *dstV,
1689  const uint8_t *src1, const uint8_t *src2,
1690  int width, uint32_t *unused)
1691 {
1692  nvXXtoUV_c(dstU, dstV, src1, width);
1693 }
1694 
1695 static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
1696  const uint8_t *src1, const uint8_t *src2,
1697  int width, uint32_t *unused)
1698 {
1699  nvXXtoUV_c(dstV, dstU, src1, width);
1700 }
1701 
1702 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
1703 
1704 static void bgr24ToY_c(uint8_t *dst, const uint8_t *src,
1705  int width, uint32_t *unused)
1706 {
1707  int i;
1708  for (i=0; i<width; i++) {
1709  int b= src[i*3+0];
1710  int g= src[i*3+1];
1711  int r= src[i*3+2];
1712 
1713  dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
1714  }
1715 }
1716 
1717 static void bgr24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1718  const uint8_t *src2, int width, uint32_t *unused)
1719 {
1720  int i;
1721  for (i=0; i<width; i++) {
1722  int b= src1[3*i + 0];
1723  int g= src1[3*i + 1];
1724  int r= src1[3*i + 2];
1725 
1726  dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1727  dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1728  }
1729  assert(src1 == src2);
1730 }
1731 
1732 static void bgr24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1733  const uint8_t *src2, int width, uint32_t *unused)
1734 {
1735  int i;
1736  for (i=0; i<width; i++) {
1737  int b= src1[6*i + 0] + src1[6*i + 3];
1738  int g= src1[6*i + 1] + src1[6*i + 4];
1739  int r= src1[6*i + 2] + src1[6*i + 5];
1740 
1741  dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1742  dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1743  }
1744  assert(src1 == src2);
1745 }
1746 
1747 static void rgb24ToY_c(uint8_t *dst, const uint8_t *src, int width,
1748  uint32_t *unused)
1749 {
1750  int i;
1751  for (i=0; i<width; i++) {
1752  int r= src[i*3+0];
1753  int g= src[i*3+1];
1754  int b= src[i*3+2];
1755 
1756  dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
1757  }
1758 }
1759 
1760 static void rgb24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1761  const uint8_t *src2, int width, uint32_t *unused)
1762 {
1763  int i;
1764  assert(src1==src2);
1765  for (i=0; i<width; i++) {
1766  int r= src1[3*i + 0];
1767  int g= src1[3*i + 1];
1768  int b= src1[3*i + 2];
1769 
1770  dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1771  dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1772  }
1773 }
1774 
1775 static void rgb24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
1776  const uint8_t *src2, int width, uint32_t *unused)
1777 {
1778  int i;
1779  assert(src1==src2);
1780  for (i=0; i<width; i++) {
1781  int r= src1[6*i + 0] + src1[6*i + 3];
1782  int g= src1[6*i + 1] + src1[6*i + 4];
1783  int b= src1[6*i + 2] + src1[6*i + 5];
1784 
1785  dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1786  dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1787  }
1788 }
1789 
1790 static void planar_rgb_to_y(uint8_t *dst, const uint8_t *src[4], int width)
1791 {
1792  int i;
1793  for (i = 0; i < width; i++) {
1794  int g = src[0][i];
1795  int b = src[1][i];
1796  int r = src[2][i];
1797 
1798  dst[i] = ((RY * r + GY * g + BY * b + (33 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT);
1799  }
1800 }
1801 
1802 static void planar_rgb16le_to_y(uint8_t *_dst, const uint8_t *_src[4], int width)
1803 {
1804  int i;
1805  const uint16_t **src = (const uint16_t **) _src;
1806  uint16_t *dst = (uint16_t *) _dst;
1807  for (i = 0; i < width; i++) {
1808  int g = AV_RL16(src[0] + i);
1809  int b = AV_RL16(src[1] + i);
1810  int r = AV_RL16(src[2] + i);
1811 
1812  dst[i] = ((RY * r + GY * g + BY * b + (33 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT);
1813  }
1814 }
1815 
1816 static void planar_rgb16be_to_y(uint8_t *_dst, const uint8_t *_src[4], int width)
1817 {
1818  int i;
1819  const uint16_t **src = (const uint16_t **) _src;
1820  uint16_t *dst = (uint16_t *) _dst;
1821  for (i = 0; i < width; i++) {
1822  int g = AV_RB16(src[0] + i);
1823  int b = AV_RB16(src[1] + i);
1824  int r = AV_RB16(src[2] + i);
1825 
1826  dst[i] = ((RY * r + GY * g + BY * b + (33 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT);
1827  }
1828 }
1829 
1830 static void planar_rgb_to_uv(uint8_t *dstU, uint8_t *dstV, const uint8_t *src[4], int width)
1831 {
1832  int i;
1833  for (i = 0; i < width; i++) {
1834  int g = src[0][i];
1835  int b = src[1][i];
1836  int r = src[2][i];
1837 
1838  dstU[i] = (RU * r + GU * g + BU * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
1839  dstV[i] = (RV * r + GV * g + BV * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
1840  }
1841 }
1842 
1843 static void planar_rgb16le_to_uv(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *_src[4], int width)
1844 {
1845  int i;
1846  const uint16_t **src = (const uint16_t **) _src;
1847  uint16_t *dstU = (uint16_t *) _dstU;
1848  uint16_t *dstV = (uint16_t *) _dstV;
1849  for (i = 0; i < width; i++) {
1850  int g = AV_RL16(src[0] + i);
1851  int b = AV_RL16(src[1] + i);
1852  int r = AV_RL16(src[2] + i);
1853 
1854  dstU[i] = (RU * r + GU * g + BU * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
1855  dstV[i] = (RV * r + GV * g + BV * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
1856  }
1857 }
1858 
1859 static void planar_rgb16be_to_uv(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *_src[4], int width)
1860 {
1861  int i;
1862  const uint16_t **src = (const uint16_t **) _src;
1863  uint16_t *dstU = (uint16_t *) _dstU;
1864  uint16_t *dstV = (uint16_t *) _dstV;
1865  for (i = 0; i < width; i++) {
1866  int g = AV_RB16(src[0] + i);
1867  int b = AV_RB16(src[1] + i);
1868  int r = AV_RB16(src[2] + i);
1869 
1870  dstU[i] = (RU * r + GU * g + BU * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
1871  dstV[i] = (RV * r + GV * g + BV * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
1872  }
1873 }
1874 
1875 static void hScale16To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *_src,
1876  const int16_t *filter,
1877  const int32_t *filterPos, int filterSize)
1878 {
1879  int i;
1880  int32_t *dst = (int32_t *) _dst;
1881  const uint16_t *src = (const uint16_t *) _src;
1883  int sh = bits - 4;
1884 
1885  for (i = 0; i < dstW; i++) {
1886  int j;
1887  int srcPos = filterPos[i];
1888  int val = 0;
1889 
1890  for (j = 0; j < filterSize; j++) {
1891  val += src[srcPos + j] * filter[filterSize * i + j];
1892  }
1893  // filter=14 bit, input=16 bit, output=30 bit, >> 11 makes 19 bit
1894  dst[i] = FFMIN(val >> sh, (1 << 19) - 1);
1895  }
1896 }
1897 
1898 static void hScale16To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *_src,
1899  const int16_t *filter,
1900  const int32_t *filterPos, int filterSize)
1901 {
1902  int i;
1903  const uint16_t *src = (const uint16_t *) _src;
1905 
1906  for (i = 0; i < dstW; i++) {
1907  int j;
1908  int srcPos = filterPos[i];
1909  int val = 0;
1910 
1911  for (j = 0; j < filterSize; j++) {
1912  val += src[srcPos + j] * filter[filterSize * i + j];
1913  }
1914  // filter=14 bit, input=16 bit, output=30 bit, >> 15 makes 15 bit
1915  dst[i] = FFMIN(val >> sh, (1 << 15) - 1);
1916  }
1917 }
1918 
1919 // bilinear / bicubic scaling
1920 static void hScale8To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *src,
1921  const int16_t *filter, const int32_t *filterPos,
1922  int filterSize)
1923 {
1924  int i;
1925  for (i=0; i<dstW; i++) {
1926  int j;
1927  int srcPos= filterPos[i];
1928  int val=0;
1929  for (j=0; j<filterSize; j++) {
1930  val += ((int)src[srcPos + j])*filter[filterSize*i + j];
1931  }
1932  //filter += hFilterSize;
1933  dst[i] = FFMIN(val>>7, (1<<15)-1); // the cubic equation does overflow ...
1934  //dst[i] = val>>7;
1935  }
1936 }
1937 
1938 static void hScale8To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *src,
1939  const int16_t *filter, const int32_t *filterPos,
1940  int filterSize)
1941 {
1942  int i;
1943  int32_t *dst = (int32_t *) _dst;
1944  for (i=0; i<dstW; i++) {
1945  int j;
1946  int srcPos= filterPos[i];
1947  int val=0;
1948  for (j=0; j<filterSize; j++) {
1949  val += ((int)src[srcPos + j])*filter[filterSize*i + j];
1950  }
1951  //filter += hFilterSize;
1952  dst[i] = FFMIN(val>>3, (1<<19)-1); // the cubic equation does overflow ...
1953  //dst[i] = val>>7;
1954  }
1955 }
1956 
1957 //FIXME all pal and rgb srcFormats could do this convertion as well
1958 //FIXME all scalers more complex than bilinear could do half of this transform
1959 static void chrRangeToJpeg_c(int16_t *dstU, int16_t *dstV, int width)
1960 {
1961  int i;
1962  for (i = 0; i < width; i++) {
1963  dstU[i] = (FFMIN(dstU[i],30775)*4663 - 9289992)>>12; //-264
1964  dstV[i] = (FFMIN(dstV[i],30775)*4663 - 9289992)>>12; //-264
1965  }
1966 }
1967 static void chrRangeFromJpeg_c(int16_t *dstU, int16_t *dstV, int width)
1968 {
1969  int i;
1970  for (i = 0; i < width; i++) {
1971  dstU[i] = (dstU[i]*1799 + 4081085)>>11; //1469
1972  dstV[i] = (dstV[i]*1799 + 4081085)>>11; //1469
1973  }
1974 }
1975 static void lumRangeToJpeg_c(int16_t *dst, int width)
1976 {
1977  int i;
1978  for (i = 0; i < width; i++)
1979  dst[i] = (FFMIN(dst[i],30189)*19077 - 39057361)>>14;
1980 }
1981 static void lumRangeFromJpeg_c(int16_t *dst, int width)
1982 {
1983  int i;
1984  for (i = 0; i < width; i++)
1985  dst[i] = (dst[i]*14071 + 33561947)>>14;
1986 }
1987 
1988 static void chrRangeToJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
1989 {
1990  int i;
1991  int32_t *dstU = (int32_t *) _dstU;
1992  int32_t *dstV = (int32_t *) _dstV;
1993  for (i = 0; i < width; i++) {
1994  dstU[i] = (FFMIN(dstU[i],30775<<4)*4663 - (9289992<<4))>>12; //-264
1995  dstV[i] = (FFMIN(dstV[i],30775<<4)*4663 - (9289992<<4))>>12; //-264
1996  }
1997 }
1998 static void chrRangeFromJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
1999 {
2000  int i;
2001  int32_t *dstU = (int32_t *) _dstU;
2002  int32_t *dstV = (int32_t *) _dstV;
2003  for (i = 0; i < width; i++) {
2004  dstU[i] = (dstU[i]*1799 + (4081085<<4))>>11; //1469
2005  dstV[i] = (dstV[i]*1799 + (4081085<<4))>>11; //1469
2006  }
2007 }
2008 static void lumRangeToJpeg16_c(int16_t *_dst, int width)
2009 {
2010  int i;
2011  int32_t *dst = (int32_t *) _dst;
2012  for (i = 0; i < width; i++)
2013  dst[i] = (FFMIN(dst[i],30189<<4)*4769 - (39057361<<2))>>12;
2014 }
2015 static void lumRangeFromJpeg16_c(int16_t *_dst, int width)
2016 {
2017  int i;
2018  int32_t *dst = (int32_t *) _dst;
2019  for (i = 0; i < width; i++)
2020  dst[i] = (dst[i]*14071 + (33561947<<4))>>14;
2021 }
2022 
2023 static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
2024  const uint8_t *src, int srcW, int xInc)
2025 {
2026  int i;
2027  unsigned int xpos=0;
2028  for (i=0;i<dstWidth;i++) {
2029  register unsigned int xx=xpos>>16;
2030  register unsigned int xalpha=(xpos&0xFFFF)>>9;
2031  dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
2032  xpos+=xInc;
2033  }
2034 }
2035 
2036 // *** horizontal scale Y line to temp buffer
2037 static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth,
2038  const uint8_t *src_in[4], int srcW, int xInc,
2039  const int16_t *hLumFilter,
2040  const int32_t *hLumFilterPos, int hLumFilterSize,
2041  uint8_t *formatConvBuffer,
2042  uint32_t *pal, int isAlpha)
2043 {
2044  void (*toYV12)(uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
2045  void (*convertRange)(int16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
2046  const uint8_t *src = src_in[isAlpha ? 3 : 0];
2047 
2048  if (toYV12) {
2049  toYV12(formatConvBuffer, src, srcW, pal);
2050  src= formatConvBuffer;
2051  } else if (c->readLumPlanar && !isAlpha) {
2052  c->readLumPlanar(formatConvBuffer, src_in, srcW);
2053  src = formatConvBuffer;
2054  }
2055 
2056  if (!c->hyscale_fast) {
2057  c->hyScale(c, dst, dstWidth, src, hLumFilter, hLumFilterPos, hLumFilterSize);
2058  } else { // fast bilinear upscale / crap downscale
2059  c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
2060  }
2061 
2062  if (convertRange)
2063  convertRange(dst, dstWidth);
2064 }
2065 
2066 static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
2067  int dstWidth, const uint8_t *src1,
2068  const uint8_t *src2, int srcW, int xInc)
2069 {
2070  int i;
2071  unsigned int xpos=0;
2072  for (i=0;i<dstWidth;i++) {
2073  register unsigned int xx=xpos>>16;
2074  register unsigned int xalpha=(xpos&0xFFFF)>>9;
2075  dst1[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
2076  dst2[i]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
2077  xpos+=xInc;
2078  }
2079 }
2080 
2081 static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, int16_t *dst2, int dstWidth,
2082  const uint8_t *src_in[4],
2083  int srcW, int xInc, const int16_t *hChrFilter,
2084  const int32_t *hChrFilterPos, int hChrFilterSize,
2085  uint8_t *formatConvBuffer, uint32_t *pal)
2086 {
2087  const uint8_t *src1 = src_in[1], *src2 = src_in[2];
2088  if (c->chrToYV12) {
2089  uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW * FFALIGN(c->srcBpc, 8) >> 3, 16);
2090  c->chrToYV12(formatConvBuffer, buf2, src1, src2, srcW, pal);
2091  src1= formatConvBuffer;
2092  src2= buf2;
2093  } else if (c->readChrPlanar) {
2094  uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW * FFALIGN(c->srcBpc, 8) >> 3, 16);
2095  c->readChrPlanar(formatConvBuffer, buf2, src_in, srcW);
2096  src1= formatConvBuffer;
2097  src2= buf2;
2098  }
2099 
2100  if (!c->hcscale_fast) {
2101  c->hcScale(c, dst1, dstWidth, src1, hChrFilter, hChrFilterPos, hChrFilterSize);
2102  c->hcScale(c, dst2, dstWidth, src2, hChrFilter, hChrFilterPos, hChrFilterSize);
2103  } else { // fast bilinear upscale / crap downscale
2104  c->hcscale_fast(c, dst1, dst2, dstWidth, src1, src2, srcW, xInc);
2105  }
2106 
2107  if (c->chrConvertRange)
2108  c->chrConvertRange(dst1, dst2, dstWidth);
2109 }
2110 
2111 static av_always_inline void
2117 {
2118  enum PixelFormat dstFormat = c->dstFormat;
2119 
2120  if (is16BPS(dstFormat)) {
2121  *yuv2planeX = isBE(dstFormat) ? yuv2planeX_16BE_c : yuv2planeX_16LE_c;
2122  *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_16BE_c : yuv2plane1_16LE_c;
2123  } else if (is9_OR_10BPS(dstFormat)) {
2124  if (av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1 == 8) {
2125  *yuv2planeX = isBE(dstFormat) ? yuv2planeX_9BE_c : yuv2planeX_9LE_c;
2126  *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_9BE_c : yuv2plane1_9LE_c;
2127  } else {
2128  *yuv2planeX = isBE(dstFormat) ? yuv2planeX_10BE_c : yuv2planeX_10LE_c;
2129  *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_10BE_c : yuv2plane1_10LE_c;
2130  }
2131  } else {
2132  *yuv2plane1 = yuv2plane1_8_c;
2133  *yuv2planeX = yuv2planeX_8_c;
2134  if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21)
2135  *yuv2nv12cX = yuv2nv12cX_c;
2136  }
2137 
2138  if(c->flags & SWS_FULL_CHR_H_INT) {
2139  switch (dstFormat) {
2140  case PIX_FMT_RGBA:
2141 #if CONFIG_SMALL
2142  *yuv2packedX = yuv2rgba32_full_X_c;
2143 #else
2144 #if CONFIG_SWSCALE_ALPHA
2145  if (c->alpPixBuf) {
2146  *yuv2packedX = yuv2rgba32_full_X_c;
2147  } else
2148 #endif /* CONFIG_SWSCALE_ALPHA */
2149  {
2150  *yuv2packedX = yuv2rgbx32_full_X_c;
2151  }
2152 #endif /* !CONFIG_SMALL */
2153  break;
2154  case PIX_FMT_ARGB:
2155 #if CONFIG_SMALL
2156  *yuv2packedX = yuv2argb32_full_X_c;
2157 #else
2158 #if CONFIG_SWSCALE_ALPHA
2159  if (c->alpPixBuf) {
2160  *yuv2packedX = yuv2argb32_full_X_c;
2161  } else
2162 #endif /* CONFIG_SWSCALE_ALPHA */
2163  {
2164  *yuv2packedX = yuv2xrgb32_full_X_c;
2165  }
2166 #endif /* !CONFIG_SMALL */
2167  break;
2168  case PIX_FMT_BGRA:
2169 #if CONFIG_SMALL
2170  *yuv2packedX = yuv2bgra32_full_X_c;
2171 #else
2172 #if CONFIG_SWSCALE_ALPHA
2173  if (c->alpPixBuf) {
2174  *yuv2packedX = yuv2bgra32_full_X_c;
2175  } else
2176 #endif /* CONFIG_SWSCALE_ALPHA */
2177  {
2178  *yuv2packedX = yuv2bgrx32_full_X_c;
2179  }
2180 #endif /* !CONFIG_SMALL */
2181  break;
2182  case PIX_FMT_ABGR:
2183 #if CONFIG_SMALL
2184  *yuv2packedX = yuv2abgr32_full_X_c;
2185 #else
2186 #if CONFIG_SWSCALE_ALPHA
2187  if (c->alpPixBuf) {
2188  *yuv2packedX = yuv2abgr32_full_X_c;
2189  } else
2190 #endif /* CONFIG_SWSCALE_ALPHA */
2191  {
2192  *yuv2packedX = yuv2xbgr32_full_X_c;
2193  }
2194 #endif /* !CONFIG_SMALL */
2195  break;
2196  case PIX_FMT_RGB24:
2197  *yuv2packedX = yuv2rgb24_full_X_c;
2198  break;
2199  case PIX_FMT_BGR24:
2200  *yuv2packedX = yuv2bgr24_full_X_c;
2201  break;
2202  }
2203  } else {
2204  switch (dstFormat) {
2205  case PIX_FMT_RGB48LE:
2206  *yuv2packed1 = yuv2rgb48le_1_c;
2207  *yuv2packed2 = yuv2rgb48le_2_c;
2208  *yuv2packedX = yuv2rgb48le_X_c;
2209  break;
2210  case PIX_FMT_RGB48BE:
2211  *yuv2packed1 = yuv2rgb48be_1_c;
2212  *yuv2packed2 = yuv2rgb48be_2_c;
2213  *yuv2packedX = yuv2rgb48be_X_c;
2214  break;
2215  case PIX_FMT_BGR48LE:
2216  *yuv2packed1 = yuv2bgr48le_1_c;
2217  *yuv2packed2 = yuv2bgr48le_2_c;
2218  *yuv2packedX = yuv2bgr48le_X_c;
2219  break;
2220  case PIX_FMT_BGR48BE:
2221  *yuv2packed1 = yuv2bgr48be_1_c;
2222  *yuv2packed2 = yuv2bgr48be_2_c;
2223  *yuv2packedX = yuv2bgr48be_X_c;
2224  break;
2225  case PIX_FMT_RGB32:
2226  case PIX_FMT_BGR32:
2227 #if CONFIG_SMALL
2228  *yuv2packed1 = yuv2rgb32_1_c;
2229  *yuv2packed2 = yuv2rgb32_2_c;
2230  *yuv2packedX = yuv2rgb32_X_c;
2231 #else
2232 #if CONFIG_SWSCALE_ALPHA
2233  if (c->alpPixBuf) {
2234  *yuv2packed1 = yuv2rgba32_1_c;
2235  *yuv2packed2 = yuv2rgba32_2_c;
2236  *yuv2packedX = yuv2rgba32_X_c;
2237  } else
2238 #endif /* CONFIG_SWSCALE_ALPHA */
2239  {
2240  *yuv2packed1 = yuv2rgbx32_1_c;
2241  *yuv2packed2 = yuv2rgbx32_2_c;
2242  *yuv2packedX = yuv2rgbx32_X_c;
2243  }
2244 #endif /* !CONFIG_SMALL */
2245  break;
2246  case PIX_FMT_RGB32_1:
2247  case PIX_FMT_BGR32_1:
2248 #if CONFIG_SMALL
2249  *yuv2packed1 = yuv2rgb32_1_1_c;
2250  *yuv2packed2 = yuv2rgb32_1_2_c;
2251  *yuv2packedX = yuv2rgb32_1_X_c;
2252 #else
2253 #if CONFIG_SWSCALE_ALPHA
2254  if (c->alpPixBuf) {
2255  *yuv2packed1 = yuv2rgba32_1_1_c;
2256  *yuv2packed2 = yuv2rgba32_1_2_c;
2257  *yuv2packedX = yuv2rgba32_1_X_c;
2258  } else
2259 #endif /* CONFIG_SWSCALE_ALPHA */
2260  {
2261  *yuv2packed1 = yuv2rgbx32_1_1_c;
2262  *yuv2packed2 = yuv2rgbx32_1_2_c;
2263  *yuv2packedX = yuv2rgbx32_1_X_c;
2264  }
2265 #endif /* !CONFIG_SMALL */
2266  break;
2267  case PIX_FMT_RGB24:
2268  *yuv2packed1 = yuv2rgb24_1_c;
2269  *yuv2packed2 = yuv2rgb24_2_c;
2270  *yuv2packedX = yuv2rgb24_X_c;
2271  break;
2272  case PIX_FMT_BGR24:
2273  *yuv2packed1 = yuv2bgr24_1_c;
2274  *yuv2packed2 = yuv2bgr24_2_c;
2275  *yuv2packedX = yuv2bgr24_X_c;
2276  break;
2277  case PIX_FMT_RGB565LE:
2278  case PIX_FMT_RGB565BE:
2279  case PIX_FMT_BGR565LE:
2280  case PIX_FMT_BGR565BE:
2281  *yuv2packed1 = yuv2rgb16_1_c;
2282  *yuv2packed2 = yuv2rgb16_2_c;
2283  *yuv2packedX = yuv2rgb16_X_c;
2284  break;
2285  case PIX_FMT_RGB555LE:
2286  case PIX_FMT_RGB555BE:
2287  case PIX_FMT_BGR555LE:
2288  case PIX_FMT_BGR555BE:
2289  *yuv2packed1 = yuv2rgb15_1_c;
2290  *yuv2packed2 = yuv2rgb15_2_c;
2291  *yuv2packedX = yuv2rgb15_X_c;
2292  break;
2293  case PIX_FMT_RGB444LE:
2294  case PIX_FMT_RGB444BE:
2295  case PIX_FMT_BGR444LE:
2296  case PIX_FMT_BGR444BE:
2297  *yuv2packed1 = yuv2rgb12_1_c;
2298  *yuv2packed2 = yuv2rgb12_2_c;
2299  *yuv2packedX = yuv2rgb12_X_c;
2300  break;
2301  case PIX_FMT_RGB8:
2302  case PIX_FMT_BGR8:
2303  *yuv2packed1 = yuv2rgb8_1_c;
2304  *yuv2packed2 = yuv2rgb8_2_c;
2305  *yuv2packedX = yuv2rgb8_X_c;
2306  break;
2307  case PIX_FMT_RGB4:
2308  case PIX_FMT_BGR4:
2309  *yuv2packed1 = yuv2rgb4_1_c;
2310  *yuv2packed2 = yuv2rgb4_2_c;
2311  *yuv2packedX = yuv2rgb4_X_c;
2312  break;
2313  case PIX_FMT_RGB4_BYTE:
2314  case PIX_FMT_BGR4_BYTE:
2315  *yuv2packed1 = yuv2rgb4b_1_c;
2316  *yuv2packed2 = yuv2rgb4b_2_c;
2317  *yuv2packedX = yuv2rgb4b_X_c;
2318  break;
2319  }
2320  }
2321  switch (dstFormat) {
2322  case PIX_FMT_GRAY16BE:
2323  *yuv2packed1 = yuv2gray16BE_1_c;
2324  *yuv2packed2 = yuv2gray16BE_2_c;
2325  *yuv2packedX = yuv2gray16BE_X_c;
2326  break;
2327  case PIX_FMT_GRAY16LE:
2328  *yuv2packed1 = yuv2gray16LE_1_c;
2329  *yuv2packed2 = yuv2gray16LE_2_c;
2330  *yuv2packedX = yuv2gray16LE_X_c;
2331  break;
2332  case PIX_FMT_MONOWHITE:
2333  *yuv2packed1 = yuv2monowhite_1_c;
2334  *yuv2packed2 = yuv2monowhite_2_c;
2335  *yuv2packedX = yuv2monowhite_X_c;
2336  break;
2337  case PIX_FMT_MONOBLACK:
2338  *yuv2packed1 = yuv2monoblack_1_c;
2339  *yuv2packed2 = yuv2monoblack_2_c;
2340  *yuv2packedX = yuv2monoblack_X_c;
2341  break;
2342  case PIX_FMT_YUYV422:
2343  *yuv2packed1 = yuv2yuyv422_1_c;
2344  *yuv2packed2 = yuv2yuyv422_2_c;
2345  *yuv2packedX = yuv2yuyv422_X_c;
2346  break;
2347  case PIX_FMT_UYVY422:
2348  *yuv2packed1 = yuv2uyvy422_1_c;
2349  *yuv2packed2 = yuv2uyvy422_2_c;
2350  *yuv2packedX = yuv2uyvy422_X_c;
2351  break;
2352  }
2353 }
2354 
2355 #define DEBUG_SWSCALE_BUFFERS 0
2356 #define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
2357 
2358 static int swScale(SwsContext *c, const uint8_t* src[],
2359  int srcStride[], int srcSliceY,
2360  int srcSliceH, uint8_t* dst[], int dstStride[])
2361 {
2362  /* load a few things into local vars to make the code more readable? and faster */
2363  const int srcW= c->srcW;
2364  const int dstW= c->dstW;
2365  const int dstH= c->dstH;
2366  const int chrDstW= c->chrDstW;
2367  const int chrSrcW= c->chrSrcW;
2368  const int lumXInc= c->lumXInc;
2369  const int chrXInc= c->chrXInc;
2370  const enum PixelFormat dstFormat= c->dstFormat;
2371  const int flags= c->flags;
2372  int32_t *vLumFilterPos= c->vLumFilterPos;
2373  int32_t *vChrFilterPos= c->vChrFilterPos;
2374  int32_t *hLumFilterPos= c->hLumFilterPos;
2375  int32_t *hChrFilterPos= c->hChrFilterPos;
2376  int16_t *vLumFilter= c->vLumFilter;
2377  int16_t *vChrFilter= c->vChrFilter;
2378  int16_t *hLumFilter= c->hLumFilter;
2379  int16_t *hChrFilter= c->hChrFilter;
2380  int32_t *lumMmxFilter= c->lumMmxFilter;
2381  int32_t *chrMmxFilter= c->chrMmxFilter;
2382  int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
2383  const int vLumFilterSize= c->vLumFilterSize;
2384  const int vChrFilterSize= c->vChrFilterSize;
2385  const int hLumFilterSize= c->hLumFilterSize;
2386  const int hChrFilterSize= c->hChrFilterSize;
2387  int16_t **lumPixBuf= c->lumPixBuf;
2388  int16_t **chrUPixBuf= c->chrUPixBuf;
2389  int16_t **chrVPixBuf= c->chrVPixBuf;
2390  int16_t **alpPixBuf= c->alpPixBuf;
2391  const int vLumBufSize= c->vLumBufSize;
2392  const int vChrBufSize= c->vChrBufSize;
2393  uint8_t *formatConvBuffer= c->formatConvBuffer;
2394  const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
2395  const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
2396  int lastDstY;
2397  uint32_t *pal=c->pal_yuv;
2404  int should_dither = is9_OR_10BPS(c->srcFormat) || is16BPS(c->srcFormat);
2405 
2406  /* vars which will change and which we need to store back in the context */
2407  int dstY= c->dstY;
2408  int lumBufIndex= c->lumBufIndex;
2409  int chrBufIndex= c->chrBufIndex;
2410  int lastInLumBuf= c->lastInLumBuf;
2411  int lastInChrBuf= c->lastInChrBuf;
2412 
2413  if (isPacked(c->srcFormat)) {
2414  src[0]=
2415  src[1]=
2416  src[2]=
2417  src[3]= src[0];
2418  srcStride[0]=
2419  srcStride[1]=
2420  srcStride[2]=
2421  srcStride[3]= srcStride[0];
2422  }
2423  srcStride[1]<<= c->vChrDrop;
2424  srcStride[2]<<= c->vChrDrop;
2425 
2426  DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
2427  src[0], srcStride[0], src[1], srcStride[1], src[2], srcStride[2], src[3], srcStride[3],
2428  dst[0], dstStride[0], dst[1], dstStride[1], dst[2], dstStride[2], dst[3], dstStride[3]);
2429  DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
2430  srcSliceY, srcSliceH, dstY, dstH);
2431  DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
2432  vLumFilterSize, vLumBufSize, vChrFilterSize, vChrBufSize);
2433 
2434  if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0 || dstStride[3]%8 != 0) {
2435  static int warnedAlready=0; //FIXME move this into the context perhaps
2436  if (flags & SWS_PRINT_INFO && !warnedAlready) {
2437  av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n"
2438  " ->cannot do aligned memory accesses anymore\n");
2439  warnedAlready=1;
2440  }
2441  }
2442 
2443  /* Note the user might start scaling the picture in the middle so this
2444  will not get executed. This is not really intended but works
2445  currently, so people might do it. */
2446  if (srcSliceY ==0) {
2447  lumBufIndex=-1;
2448  chrBufIndex=-1;
2449  dstY=0;
2450  lastInLumBuf= -1;
2451  lastInChrBuf= -1;
2452  }
2453 
2454  if (!should_dither) {
2456  }
2457  lastDstY= dstY;
2458 
2459  for (;dstY < dstH; dstY++) {
2460  const int chrDstY= dstY>>c->chrDstVSubSample;
2461  uint8_t *dest[4] = {
2462  dst[0] + dstStride[0] * dstY,
2463  dst[1] + dstStride[1] * chrDstY,
2464  dst[2] + dstStride[2] * chrDstY,
2465  (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3] + dstStride[3] * dstY : NULL,
2466  };
2467 
2468  const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
2469  const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)];
2470  const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
2471 
2472  // Last line needed as input
2473  int lastLumSrcY = FFMIN(c->srcH, firstLumSrcY + vLumFilterSize) - 1;
2474  int lastLumSrcY2 = FFMIN(c->srcH, firstLumSrcY2 + vLumFilterSize) - 1;
2475  int lastChrSrcY = FFMIN(c->chrSrcH, firstChrSrcY + vChrFilterSize) - 1;
2476  int enough_lines;
2477 
2478  //handle holes (FAST_BILINEAR & weird filters)
2479  if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
2480  if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
2481  assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
2482  assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
2483 
2484  DEBUG_BUFFERS("dstY: %d\n", dstY);
2485  DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
2486  firstLumSrcY, lastLumSrcY, lastInLumBuf);
2487  DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
2488  firstChrSrcY, lastChrSrcY, lastInChrBuf);
2489 
2490  // Do we have enough lines in this slice to output the dstY line
2491  enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample);
2492 
2493  if (!enough_lines) {
2494  lastLumSrcY = srcSliceY + srcSliceH - 1;
2495  lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
2496  DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
2497  lastLumSrcY, lastChrSrcY);
2498  }
2499 
2500  //Do horizontal scaling
2501  while(lastInLumBuf < lastLumSrcY) {
2502  const uint8_t *src1[4] = {
2503  src[0] + (lastInLumBuf + 1 - srcSliceY) * srcStride[0],
2504  src[1] + (lastInLumBuf + 1 - srcSliceY) * srcStride[1],
2505  src[2] + (lastInLumBuf + 1 - srcSliceY) * srcStride[2],
2506  src[3] + (lastInLumBuf + 1 - srcSliceY) * srcStride[3],
2507  };
2508  lumBufIndex++;
2509  assert(lumBufIndex < 2*vLumBufSize);
2510  assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
2511  assert(lastInLumBuf + 1 - srcSliceY >= 0);
2512  hyscale(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
2513  hLumFilter, hLumFilterPos, hLumFilterSize,
2514  formatConvBuffer,
2515  pal, 0);
2516  if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
2517  hyscale(c, alpPixBuf[ lumBufIndex ], dstW, src1, srcW,
2518  lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize,
2519  formatConvBuffer,
2520  pal, 1);
2521  lastInLumBuf++;
2522  DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
2523  lumBufIndex, lastInLumBuf);
2524  }
2525  while(lastInChrBuf < lastChrSrcY) {
2526  const uint8_t *src1[4] = {
2527  src[0] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[0],
2528  src[1] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[1],
2529  src[2] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[2],
2530  src[3] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[3],
2531  };
2532  chrBufIndex++;
2533  assert(chrBufIndex < 2*vChrBufSize);
2534  assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
2535  assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
2536  //FIXME replace parameters through context struct (some at least)
2537 
2538  if (c->needs_hcscale)
2539  hcscale(c, chrUPixBuf[chrBufIndex], chrVPixBuf[chrBufIndex],
2540  chrDstW, src1, chrSrcW, chrXInc,
2541  hChrFilter, hChrFilterPos, hChrFilterSize,
2542  formatConvBuffer, pal);
2543  lastInChrBuf++;
2544  DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
2545  chrBufIndex, lastInChrBuf);
2546  }
2547  //wrap buf index around to stay inside the ring buffer
2548  if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize;
2549  if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize;
2550  if (!enough_lines)
2551  break; //we can't output a dstY line so let's try with the next slice
2552 
2553 #if HAVE_MMX
2554  updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex, lastInLumBuf, lastInChrBuf);
2555 #endif
2556  if (should_dither) {
2557  c->chrDither8 = dither_8x8_128[chrDstY & 7];
2558  c->lumDither8 = dither_8x8_128[dstY & 7];
2559  }
2560  if (dstY >= dstH-2) {
2561  // hmm looks like we can't use MMX here without overwriting this array's tail
2562  find_c_packed_planar_out_funcs(c, &yuv2plane1, &yuv2planeX, &yuv2nv12cX,
2563  &yuv2packed1, &yuv2packed2, &yuv2packedX);
2564  }
2565 
2566  {
2567  const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
2568  const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
2569  const int16_t **chrVSrcPtr= (const int16_t **) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
2570  const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
2571 
2572  if (firstLumSrcY < 0 || firstLumSrcY + vLumFilterSize > c->srcH) {
2573  const int16_t **tmpY = (const int16_t **) lumPixBuf + 2 * vLumBufSize;
2574  int neg = -firstLumSrcY, i, end = FFMIN(c->srcH - firstLumSrcY, vLumFilterSize);
2575  for (i = 0; i < neg; i++)
2576  tmpY[i] = lumSrcPtr[neg];
2577  for ( ; i < end; i++)
2578  tmpY[i] = lumSrcPtr[i];
2579  for ( ; i < vLumFilterSize; i++)
2580  tmpY[i] = tmpY[i-1];
2581  lumSrcPtr = tmpY;
2582 
2583  if (alpSrcPtr) {
2584  const int16_t **tmpA = (const int16_t **) alpPixBuf + 2 * vLumBufSize;
2585  for (i = 0; i < neg; i++)
2586  tmpA[i] = alpSrcPtr[neg];
2587  for ( ; i < end; i++)
2588  tmpA[i] = alpSrcPtr[i];
2589  for ( ; i < vLumFilterSize; i++)
2590  tmpA[i] = tmpA[i - 1];
2591  alpSrcPtr = tmpA;
2592  }
2593  }
2594  if (firstChrSrcY < 0 || firstChrSrcY + vChrFilterSize > c->chrSrcH) {
2595  const int16_t **tmpU = (const int16_t **) chrUPixBuf + 2 * vChrBufSize,
2596  **tmpV = (const int16_t **) chrVPixBuf + 2 * vChrBufSize;
2597  int neg = -firstChrSrcY, i, end = FFMIN(c->chrSrcH - firstChrSrcY, vChrFilterSize);
2598  for (i = 0; i < neg; i++) {
2599  tmpU[i] = chrUSrcPtr[neg];
2600  tmpV[i] = chrVSrcPtr[neg];
2601  }
2602  for ( ; i < end; i++) {
2603  tmpU[i] = chrUSrcPtr[i];
2604  tmpV[i] = chrVSrcPtr[i];
2605  }
2606  for ( ; i < vChrFilterSize; i++) {
2607  tmpU[i] = tmpU[i - 1];
2608  tmpV[i] = tmpV[i - 1];
2609  }
2610  chrUSrcPtr = tmpU;
2611  chrVSrcPtr = tmpV;
2612  }
2613 
2614  if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
2615  const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
2616 
2617  if (vLumFilterSize == 1) {
2618  yuv2plane1(lumSrcPtr[0], dest[0], dstW, c->lumDither8, 0);
2619  } else {
2620  yuv2planeX(vLumFilter + dstY * vLumFilterSize, vLumFilterSize,
2621  lumSrcPtr, dest[0], dstW, c->lumDither8, 0);
2622  }
2623 
2624  if (!((dstY&chrSkipMask) || isGray(dstFormat))) {
2625  if (yuv2nv12cX) {
2626  yuv2nv12cX(c, vChrFilter + chrDstY * vChrFilterSize, vChrFilterSize, chrUSrcPtr, chrVSrcPtr, dest[1], chrDstW);
2627  } else if (vChrFilterSize == 1) {
2628  yuv2plane1(chrUSrcPtr[0], dest[1], chrDstW, c->chrDither8, 0);
2629  yuv2plane1(chrVSrcPtr[0], dest[2], chrDstW, c->chrDither8, 3);
2630  } else {
2631  yuv2planeX(vChrFilter + chrDstY * vChrFilterSize, vChrFilterSize,
2632  chrUSrcPtr, dest[1], chrDstW, c->chrDither8, 0);
2633  yuv2planeX(vChrFilter + chrDstY * vChrFilterSize, vChrFilterSize,
2634  chrVSrcPtr, dest[2], chrDstW, c->chrDither8, 3);
2635  }
2636  }
2637 
2638  if (CONFIG_SWSCALE_ALPHA && alpPixBuf){
2639  if (vLumFilterSize == 1) {
2640  yuv2plane1(alpSrcPtr[0], dest[3], dstW, c->lumDither8, 0);
2641  } else {
2642  yuv2planeX(vLumFilter + dstY * vLumFilterSize, vLumFilterSize,
2643  alpSrcPtr, dest[3], dstW, c->lumDither8, 0);
2644  }
2645  }
2646  } else {
2647  if (c->yuv2packed1 && vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB
2648  int chrAlpha = vChrFilter[2 * dstY + 1];
2649  yuv2packed1(c, *lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
2650  alpPixBuf ? *alpSrcPtr : NULL,
2651  dest[0], dstW, chrAlpha, dstY);
2652  } else if (c->yuv2packed2 && vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB
2653  int lumAlpha = vLumFilter[2 * dstY + 1];
2654  int chrAlpha = vChrFilter[2 * dstY + 1];
2655  lumMmxFilter[2] =
2656  lumMmxFilter[3] = vLumFilter[2 * dstY ] * 0x10001;
2657  chrMmxFilter[2] =
2658  chrMmxFilter[3] = vChrFilter[2 * chrDstY] * 0x10001;
2659  yuv2packed2(c, lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
2660  alpPixBuf ? alpSrcPtr : NULL,
2661  dest[0], dstW, lumAlpha, chrAlpha, dstY);
2662  } else { //general RGB
2663  yuv2packedX(c, vLumFilter + dstY * vLumFilterSize,
2664  lumSrcPtr, vLumFilterSize,
2665  vChrFilter + dstY * vChrFilterSize,
2666  chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
2667  alpSrcPtr, dest[0], dstW, dstY);
2668  }
2669  }
2670  }
2671  }
2672 
2673  if ((dstFormat == PIX_FMT_YUVA420P) && !alpPixBuf)
2674  fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
2675 
2676 #if HAVE_MMX2
2678  __asm__ volatile("sfence":::"memory");
2679 #endif
2680  emms_c();
2681 
2682  /* store changed local vars back in the context */
2683  c->dstY= dstY;
2688 
2689  return dstY - lastDstY;
2690 }
2691 
2693 {
2694  enum PixelFormat srcFormat = c->srcFormat;
2695 
2697  &c->yuv2nv12cX, &c->yuv2packed1, &c->yuv2packed2,
2698  &c->yuv2packedX);
2699 
2700  c->chrToYV12 = NULL;
2701  switch(srcFormat) {
2702  case PIX_FMT_YUYV422 : c->chrToYV12 = yuy2ToUV_c; break;
2703  case PIX_FMT_UYVY422 : c->chrToYV12 = uyvyToUV_c; break;
2704  case PIX_FMT_NV12 : c->chrToYV12 = nv12ToUV_c; break;
2705  case PIX_FMT_NV21 : c->chrToYV12 = nv21ToUV_c; break;
2706  case PIX_FMT_RGB8 :
2707  case PIX_FMT_BGR8 :
2708  case PIX_FMT_PAL8 :
2709  case PIX_FMT_BGR4_BYTE:
2710  case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV_c; break;
2711  case PIX_FMT_GBRP9LE:
2712  case PIX_FMT_GBRP10LE:
2714  case PIX_FMT_GBRP9BE:
2715  case PIX_FMT_GBRP10BE:
2717  case PIX_FMT_GBRP: c->readChrPlanar = planar_rgb_to_uv; break;
2718 #if HAVE_BIGENDIAN
2719  case PIX_FMT_YUV444P9LE:
2720  case PIX_FMT_YUV422P9LE:
2721  case PIX_FMT_YUV420P9LE:
2722  case PIX_FMT_YUV422P10LE:
2723  case PIX_FMT_YUV444P10LE:
2724  case PIX_FMT_YUV420P10LE:
2725  case PIX_FMT_YUV420P16LE:
2726  case PIX_FMT_YUV422P16LE:
2727  case PIX_FMT_YUV444P16LE: c->chrToYV12 = bswap16UV_c; break;
2728 #else
2729  case PIX_FMT_YUV444P9BE:
2730  case PIX_FMT_YUV422P9BE:
2731  case PIX_FMT_YUV420P9BE:
2732  case PIX_FMT_YUV444P10BE:
2733  case PIX_FMT_YUV422P10BE:
2734  case PIX_FMT_YUV420P10BE:
2735  case PIX_FMT_YUV420P16BE:
2736  case PIX_FMT_YUV422P16BE:
2737  case PIX_FMT_YUV444P16BE: c->chrToYV12 = bswap16UV_c; break;
2738 #endif
2739  }
2740  if (c->chrSrcHSubSample) {
2741  switch(srcFormat) {
2742  case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_half_c; break;
2743  case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_half_c; break;
2744  case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_half_c; break;
2745  case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_half_c; break;
2746  case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_half_c; break;
2747  case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_half_c; break;
2748  case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_half_c; break;
2749  case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_half_c; break;
2750  case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_half_c; break;
2751  case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_half_c; break;
2752  case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_half_c; break;
2753  case PIX_FMT_BGR444LE: c->chrToYV12 = bgr12leToUV_half_c; break;
2754  case PIX_FMT_BGR444BE: c->chrToYV12 = bgr12beToUV_half_c; break;
2755  case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_half_c; break;
2756  case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_half_c; break;
2757  case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_half_c; break;
2758  case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_half_c; break;
2759  case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_half_c; break;
2760  case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_half_c; break;
2761  case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_half_c; break;
2762  case PIX_FMT_RGB444LE: c->chrToYV12 = rgb12leToUV_half_c; break;
2763  case PIX_FMT_RGB444BE: c->chrToYV12 = rgb12beToUV_half_c; break;
2764  }
2765  } else {
2766  switch(srcFormat) {
2767  case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_c; break;
2768  case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_c; break;
2769  case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_c; break;
2770  case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_c; break;
2771  case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_c; break;
2772  case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_c; break;
2773  case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_c; break;
2774  case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_c; break;
2775  case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_c; break;
2776  case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_c; break;
2777  case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_c; break;
2778  case PIX_FMT_BGR444LE: c->chrToYV12 = bgr12leToUV_c; break;
2779  case PIX_FMT_BGR444BE: c->chrToYV12 = bgr12beToUV_c; break;
2780  case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_c; break;
2781  case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_c; break;
2782  case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_c; break;
2783  case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_c; break;
2784  case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_c; break;
2785  case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_c; break;
2786  case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_c; break;
2787  case PIX_FMT_RGB444LE: c->chrToYV12 = rgb12leToUV_c; break;
2788  case PIX_FMT_RGB444BE: c->chrToYV12 = rgb12beToUV_c; break;
2789  }
2790  }
2791 
2792  c->lumToYV12 = NULL;
2793  c->alpToYV12 = NULL;
2794  switch (srcFormat) {
2795  case PIX_FMT_GBRP9LE:
2796  case PIX_FMT_GBRP10LE:
2798  case PIX_FMT_GBRP9BE:
2799  case PIX_FMT_GBRP10BE:
2801  case PIX_FMT_GBRP: c->readLumPlanar = planar_rgb_to_y; break;
2802 #if HAVE_BIGENDIAN
2803  case PIX_FMT_YUV444P9LE:
2804  case PIX_FMT_YUV422P9LE:
2805  case PIX_FMT_YUV420P9LE:
2806  case PIX_FMT_YUV444P10LE:
2807  case PIX_FMT_YUV422P10LE:
2808  case PIX_FMT_YUV420P10LE:
2809  case PIX_FMT_YUV420P16LE:
2810  case PIX_FMT_YUV422P16LE:
2811  case PIX_FMT_YUV444P16LE:
2812  case PIX_FMT_GRAY16LE: c->lumToYV12 = bswap16Y_c; break;
2813 #else
2814  case PIX_FMT_YUV444P9BE:
2815  case PIX_FMT_YUV422P9BE:
2816  case PIX_FMT_YUV420P9BE:
2817  case PIX_FMT_YUV444P10BE:
2818  case PIX_FMT_YUV422P10BE:
2819  case PIX_FMT_YUV420P10BE:
2820  case PIX_FMT_YUV420P16BE:
2821  case PIX_FMT_YUV422P16BE:
2822  case PIX_FMT_YUV444P16BE:
2823  case PIX_FMT_GRAY16BE: c->lumToYV12 = bswap16Y_c; break;
2824 #endif
2825  case PIX_FMT_YUYV422 :
2826  case PIX_FMT_Y400A : c->lumToYV12 = yuy2ToY_c; break;
2827  case PIX_FMT_UYVY422 : c->lumToYV12 = uyvyToY_c; break;
2828  case PIX_FMT_BGR24 : c->lumToYV12 = bgr24ToY_c; break;
2829  case PIX_FMT_BGR565LE : c->lumToYV12 = bgr16leToY_c; break;
2830  case PIX_FMT_BGR565BE : c->lumToYV12 = bgr16beToY_c; break;
2831  case PIX_FMT_BGR555LE : c->lumToYV12 = bgr15leToY_c; break;
2832  case PIX_FMT_BGR555BE : c->lumToYV12 = bgr15beToY_c; break;
2833  case PIX_FMT_BGR444LE : c->lumToYV12 = bgr12leToY_c; break;
2834  case PIX_FMT_BGR444BE : c->lumToYV12 = bgr12beToY_c; break;
2835  case PIX_FMT_RGB24 : c->lumToYV12 = rgb24ToY_c; break;
2836  case PIX_FMT_RGB565LE : c->lumToYV12 = rgb16leToY_c; break;
2837  case PIX_FMT_RGB565BE : c->lumToYV12 = rgb16beToY_c; break;
2838  case PIX_FMT_RGB555LE : c->lumToYV12 = rgb15leToY_c; break;
2839  case PIX_FMT_RGB555BE : c->lumToYV12 = rgb15beToY_c; break;
2840  case PIX_FMT_RGB444LE : c->lumToYV12 = rgb12leToY_c; break;
2841  case PIX_FMT_RGB444BE : c->lumToYV12 = rgb12beToY_c; break;
2842  case PIX_FMT_RGB8 :
2843  case PIX_FMT_BGR8 :
2844  case PIX_FMT_PAL8 :
2845  case PIX_FMT_BGR4_BYTE:
2846  case PIX_FMT_RGB4_BYTE: c->lumToYV12 = palToY_c; break;
2847  case PIX_FMT_MONOBLACK: c->lumToYV12 = monoblack2Y_c; break;
2848  case PIX_FMT_MONOWHITE: c->lumToYV12 = monowhite2Y_c; break;
2849  case PIX_FMT_RGB32 : c->lumToYV12 = bgr32ToY_c; break;
2850  case PIX_FMT_RGB32_1: c->lumToYV12 = bgr321ToY_c; break;
2851  case PIX_FMT_BGR32 : c->lumToYV12 = rgb32ToY_c; break;
2852  case PIX_FMT_BGR32_1: c->lumToYV12 = rgb321ToY_c; break;
2853  case PIX_FMT_RGB48BE: c->lumToYV12 = rgb48BEToY_c; break;
2854  case PIX_FMT_RGB48LE: c->lumToYV12 = rgb48LEToY_c; break;
2855  case PIX_FMT_BGR48BE: c->lumToYV12 = bgr48BEToY_c; break;
2856  case PIX_FMT_BGR48LE: c->lumToYV12 = bgr48LEToY_c; break;
2857  }
2858  if (c->alpPixBuf) {
2859  switch (srcFormat) {
2860  case PIX_FMT_BGRA:
2861  case PIX_FMT_RGBA: c->alpToYV12 = rgbaToA_c; break;
2862  case PIX_FMT_ABGR:
2863  case PIX_FMT_ARGB: c->alpToYV12 = abgrToA_c; break;
2864  case PIX_FMT_Y400A: c->alpToYV12 = uyvyToY_c; break;
2865  }
2866  }
2867 
2868  if (c->srcBpc == 8) {
2869  if (c->dstBpc <= 10) {
2870  c->hyScale = c->hcScale = hScale8To15_c;
2871  if (c->flags & SWS_FAST_BILINEAR) {
2874  }
2875  } else {
2876  c->hyScale = c->hcScale = hScale8To19_c;
2877  }
2878  } else {
2879  c->hyScale = c->hcScale = c->dstBpc > 10 ? hScale16To19_c : hScale16To15_c;
2880  }
2881 
2882  if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
2883  if (c->dstBpc <= 10) {
2884  if (c->srcRange) {
2887  } else {
2890  }
2891  } else {
2892  if (c->srcRange) {
2895  } else {
2898  }
2899  }
2900  }
2901 
2902  if (!(isGray(srcFormat) || isGray(c->dstFormat) ||
2903  srcFormat == PIX_FMT_MONOBLACK || srcFormat == PIX_FMT_MONOWHITE))
2904  c->needs_hcscale = 1;
2905 }
2906 
2908 {
2909  sws_init_swScale_c(c);
2910 
2911  if (HAVE_MMX)
2913  if (HAVE_ALTIVEC)
2915 
2916  return swScale;
2917 }