34 const uint8_t *end = s + src_size;
58 const uint8_t *end = s + src_size;
86 register const uint8_t *s = src;
87 register const uint8_t *end = s + src_size;
88 const uint8_t *mm_end = end - 3;
91 register unsigned x = *((
const uint32_t *)s);
92 *((uint32_t *)d) = (x & 0x7FFF7FFF) + (x & 0x7FE07FE0);
97 register unsigned short x = *((
const uint16_t *)s);
98 *((uint16_t *)d) = (x & 0x7FFF) + (x & 0x7FE0);
105 register const uint8_t *s = src;
106 register const uint8_t *end = s + src_size;
107 const uint8_t *mm_end = end - 3;
110 register uint32_t x = *((
const uint32_t *)s);
111 *((uint32_t *)d) = ((x >> 1) & 0x7FE07FE0) | (x & 0x001F001F);
116 register uint16_t x = *((
const uint16_t *)s);
117 *((uint16_t *)d) = ((x >> 1) & 0x7FE0) | (x & 0x001F);
123 uint16_t *d = (uint16_t *)dst;
125 const uint8_t *end = s + src_size;
128 register int rgb = *(
const uint32_t *)s;
130 *d++ = ((rgb & 0xFF) >> 3) +
131 ((rgb & 0xFC00) >> 5) +
132 ((rgb & 0xF80000) >> 8);
139 uint16_t *d = (uint16_t *)dst;
141 const uint8_t *end = s + src_size;
144 register int rgb = *(
const uint32_t *)s;
146 *d++ = ((rgb & 0xF8) << 8) +
147 ((rgb & 0xFC00) >> 5) +
148 ((rgb & 0xF80000) >> 19);
154 uint16_t *d = (uint16_t *)dst;
156 const uint8_t *end = s + src_size;
159 register int rgb = *(
const uint32_t *)s;
161 *d++ = ((rgb & 0xFF) >> 3) +
162 ((rgb & 0xF800) >> 6) +
163 ((rgb & 0xF80000) >> 9);
170 uint16_t *d = (uint16_t *)dst;
172 const uint8_t *end = s + src_size;
175 register int rgb = *(
const uint32_t *)s;
177 *d++ = ((rgb & 0xF8) << 7) +
178 ((rgb & 0xF800) >> 6) +
179 ((rgb & 0xF80000) >> 19);
186 uint16_t *d = (uint16_t *)dst;
188 const uint8_t *end = s + src_size;
194 *d++ = (b >> 3) | ((g & 0xFC) << 3) | ((r & 0xF8) << 8);
200 uint16_t *d = (uint16_t *)dst;
202 const uint8_t *end = s + src_size;
208 *d++ = (b >> 3) | ((g & 0xFC) << 3) | ((r & 0xF8) << 8);
215 uint16_t *d = (uint16_t *)dst;
217 const uint8_t *end = s + src_size;
223 *d++ = (b >> 3) | ((g & 0xF8) << 2) | ((r & 0xF8) << 7);
229 uint16_t *d = (uint16_t *)dst;
231 const uint8_t *end = s + src_size;
237 *d++ = (b >> 3) | ((g & 0xF8) << 2) | ((r & 0xF8) << 7);
266 const uint16_t *s = (
const uint16_t *)src;
267 const uint16_t *end = s + src_size / 2;
270 register uint16_t bgr = *s++;
271 *d++ = (bgr & 0x1F) << 3;
272 *d++ = (bgr & 0x3E0) >> 2;
273 *d++ = (bgr & 0x7C00) >> 7;
281 const uint16_t *s = (
const uint16_t *)src;
282 const uint16_t *end = s + src_size / 2;
285 register uint16_t bgr = *s++;
286 *d++ = (bgr & 0x1F) << 3;
287 *d++ = (bgr & 0x7E0) >> 3;
288 *d++ = (bgr & 0xF800) >> 8;
295 const uint16_t *s = (
const uint16_t *)src;
296 const uint16_t *end = s + src_size / 2;
299 register uint16_t bgr = *s++;
302 *d++ = (bgr & 0x7C00) >> 7;
303 *d++ = (bgr & 0x3E0) >> 2;
304 *d++ = (bgr & 0x1F) << 3;
306 *d++ = (bgr & 0x1F) << 3;
307 *d++ = (bgr & 0x3E0) >> 2;
308 *d++ = (bgr & 0x7C00) >> 7;
317 const uint16_t *s = (
const uint16_t *)src;
318 const uint16_t *end = s + src_size / 2;
321 register uint16_t bgr = *s++;
324 *d++ = (bgr & 0xF800) >> 8;
325 *d++ = (bgr & 0x7E0) >> 3;
326 *d++ = (bgr & 0x1F) << 3;
328 *d++ = (bgr & 0x1F) << 3;
329 *d++ = (bgr & 0x7E0) >> 3;
330 *d++ = (bgr & 0xF800) >> 8;
339 int idx = 15 - src_size;
343 for (; idx < 15; idx += 4) {
344 register int v = *(
const uint32_t *)&s[idx],
g = v & 0xff00ff00;
346 *(uint32_t *)&d[idx] = (v >> 16) +
g + (v << 16);
354 for (i = 0; i < src_size; i += 3) {
355 register uint8_t x = src[i + 2];
356 dst[i + 1] = src[i + 1];
357 dst[i + 2] = src[i + 0];
365 int lumStride,
int chromStride,
366 int dstStride,
int vertLumPerChroma)
369 const int chromWidth = width >> 1;
371 for (y = 0; y <
height; y++) {
373 uint64_t *ldst = (uint64_t *)dst;
374 const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
375 for (i = 0; i < chromWidth; i += 2) {
376 uint64_t k = yc[0] + (uc[0] << 8) +
377 (yc[1] << 16) + (vc[0] << 24);
378 uint64_t l = yc[2] + (uc[1] << 8) +
379 (yc[3] << 16) + (vc[1] << 24);
380 *ldst++ = k + (l << 32);
388 const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
390 for (i = 0; i < chromWidth; i++) {
392 *idst++ = (yc[0] << 24) + (uc[0] << 16) +
393 (yc[1] << 8) + (vc[0] << 0);
395 *idst++ = yc[0] + (uc[0] << 8) +
396 (yc[1] << 16) + (vc[0] << 24);
403 if ((y & (vertLumPerChroma - 1)) == vertLumPerChroma - 1) {
419 int chromStride,
int dstStride)
423 chromStride, dstStride, 2);
429 int lumStride,
int chromStride,
430 int dstStride,
int vertLumPerChroma)
433 const int chromWidth = width >> 1;
435 for (y = 0; y <
height; y++) {
437 uint64_t *ldst = (uint64_t *)dst;
438 const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
439 for (i = 0; i < chromWidth; i += 2) {
440 uint64_t k = uc[0] + (yc[0] << 8) +
441 (vc[0] << 16) + (yc[1] << 24);
442 uint64_t l = uc[1] + (yc[2] << 8) +
443 (vc[1] << 16) + (yc[3] << 24);
444 *ldst++ = k + (l << 32);
452 const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
454 for (i = 0; i < chromWidth; i++) {
456 *idst++ = (uc[0] << 24) + (yc[0] << 16) +
457 (vc[0] << 8) + (yc[1] << 0);
459 *idst++ = uc[0] + (yc[0] << 8) +
460 (vc[0] << 16) + (yc[1] << 24);
467 if ((y & (vertLumPerChroma - 1)) == vertLumPerChroma - 1) {
483 int chromStride,
int dstStride)
487 chromStride, dstStride, 2);
496 int chromStride,
int dstStride)
499 chromStride, dstStride, 1);
508 int chromStride,
int dstStride)
511 chromStride, dstStride, 1);
521 int chromStride,
int srcStride)
524 const int chromWidth = width >> 1;
526 for (y = 0; y <
height; y += 2) {
528 for (i = 0; i < chromWidth; i++) {
529 ydst[2 * i + 0] = src[4 * i + 0];
530 udst[i] = src[4 * i + 1];
531 ydst[2 * i + 1] = src[4 * i + 2];
532 vdst[i] = src[4 * i + 3];
537 for (i = 0; i < chromWidth; i++) {
538 ydst[2 * i + 0] = src[4 * i + 0];
539 ydst[2 * i + 1] = src[4 * i + 2];
549 int srcHeight,
int srcStride,
int dstStride)
556 for (x = 0; x < srcWidth - 1; x++) {
557 dst[2 * x + 1] = (3 * src[x] + src[x + 1]) >> 2;
558 dst[2 * x + 2] = (src[x] + 3 * src[x + 1]) >> 2;
560 dst[2 * srcWidth - 1] = src[srcWidth - 1];
564 for (y = 1; y < srcHeight; y++) {
565 const int mmxSize = 1;
567 dst[0] = (src[0] * 3 + src[srcStride]) >> 2;
568 dst[dstStride] = (src[0] + 3 * src[srcStride]) >> 2;
570 for (x = mmxSize - 1; x < srcWidth - 1; x++) {
571 dst[2 * x + 1] = (src[x + 0] * 3 + src[x + srcStride + 1]) >> 2;
572 dst[2 * x + dstStride + 2] = (src[x + 0] + 3 * src[x + srcStride + 1]) >> 2;
573 dst[2 * x + dstStride + 1] = (src[x + 1] + 3 * src[x + srcStride]) >> 2;
574 dst[2 * x + 2] = (src[x + 1] * 3 + src[x + srcStride]) >> 2;
576 dst[srcWidth * 2 - 1] = (src[srcWidth - 1] * 3 + src[srcWidth - 1 + srcStride]) >> 2;
577 dst[srcWidth * 2 - 1 + dstStride] = (src[srcWidth - 1] + 3 * src[srcWidth - 1 + srcStride]) >> 2;
579 dst += dstStride * 2;
586 for (x = 0; x < srcWidth - 1; x++) {
587 dst[2 * x + 1] = (src[x] * 3 + src[x + 1]) >> 2;
588 dst[2 * x + 2] = (src[x] + 3 * src[x + 1]) >> 2;
590 dst[2 * srcWidth - 1] = src[srcWidth - 1];
602 int chromStride,
int srcStride)
605 const int chromWidth = width >> 1;
607 for (y = 0; y <
height; y += 2) {
609 for (i = 0; i < chromWidth; i++) {
610 udst[i] = src[4 * i + 0];
611 ydst[2 * i + 0] = src[4 * i + 1];
612 vdst[i] = src[4 * i + 2];
613 ydst[2 * i + 1] = src[4 * i + 3];
618 for (i = 0; i < chromWidth; i++) {
619 ydst[2 * i + 0] = src[4 * i + 1];
620 ydst[2 * i + 1] = src[4 * i + 3];
638 int chromStride,
int srcStride)
641 const int chromWidth = width >> 1;
643 for (y = 0; y <
height; y += 2) {
645 for (i = 0; i < chromWidth; i++) {
646 unsigned int b = src[6 * i + 0];
647 unsigned int g = src[6 * i + 1];
648 unsigned int r = src[6 * i + 2];
668 for (i = 0; i < chromWidth; i++) {
669 unsigned int b = src[6 * i + 0];
670 unsigned int g = src[6 * i + 1];
671 unsigned int r = src[6 * i + 2];
693 int src1Stride,
int src2Stride,
int dstStride)
697 for (h = 0; h <
height; h++) {
699 for (w = 0; w <
width; w++) {
700 dest[2 * w + 0] = src1[w];
701 dest[2 * w + 1] = src2[w];
712 int srcStride1,
int srcStride2,
713 int dstStride1,
int dstStride2)
719 for (y = 0; y < h; y++) {
720 const uint8_t *
s1 = src1 + srcStride1 * (y >> 1);
721 uint8_t *d = dst1 + dstStride1 * y;
722 for (x = 0; x < w; x++)
723 d[2 * x] = d[2 * x + 1] = s1[x];
725 for (y = 0; y < h; y++) {
726 const uint8_t *
s2 = src2 + srcStride2 * (y >> 1);
727 uint8_t *d = dst2 + dstStride2 * y;
728 for (x = 0; x < w; x++)
729 d[2 * x] = d[2 * x + 1] = s2[x];
736 int srcStride1,
int srcStride2,
737 int srcStride3,
int dstStride)
743 for (y = 0; y < h; y++) {
744 const uint8_t *yp = src1 + srcStride1 * y;
745 const uint8_t *up = src2 + srcStride2 * (y >> 2);
746 const uint8_t *vp = src3 + srcStride3 * (y >> 2);
747 uint8_t *d = dst + dstStride * y;
748 for (x = 0; x < w; x++) {
749 const int x2 = x << 2;
750 d[8 * x + 0] = yp[x2];
751 d[8 * x + 1] = up[x];
752 d[8 * x + 2] = yp[x2 + 1];
753 d[8 * x + 3] = vp[x];
754 d[8 * x + 4] = yp[x2 + 2];
755 d[8 * x + 5] = up[x];
756 d[8 * x + 6] = yp[x2 + 3];
757 d[8 * x + 7] = vp[x];
768 dst[count] = src[2 * count];
781 dst0[count] = src[4 * count + 0];
782 dst1[count] = src[4 * count + 2];
796 dst0[count] = (src0[4 * count + 0] + src1[4 * count + 0]) >> 1;
797 dst1[count] = (src0[4 * count + 2] + src1[4 * count + 2]) >> 1;
811 dst0[count] = src[4 * count + 0];
812 dst1[count] = src[4 * count + 2];
828 dst0[count] = (src0[4 * count + 0] + src1[4 * count + 0]) >> 1;
829 dst1[count] = (src0[4 * count + 2] + src1[4 * count + 2]) >> 1;
836 int lumStride,
int chromStride,
int srcStride)
839 const int chromWidth = -((-
width) >> 1);
841 for (y = 0; y <
height; y++) {
856 int lumStride,
int chromStride,
int srcStride)
859 const int chromWidth = -((-
width) >> 1);
861 for (y = 0; y <
height; y++) {
874 int lumStride,
int chromStride,
int srcStride)
877 const int chromWidth = -((-
width) >> 1);
879 for (y = 0; y <
height; y++) {
894 int lumStride,
int chromStride,
int srcStride)
897 const int chromWidth = -((-
width) >> 1);
899 for (y = 0; y <
height; y++) {