21 #define _SVID_SOURCE //needed for MAP_ANONYMOUS
30 #if defined(MAP_ANON) && !defined(MAP_ANONYMOUS)
31 #define MAP_ANONYMOUS MAP_ANON
35 #define WIN32_LEAN_AND_MEAN
62 #define LICENSE_PREFIX "libswscale license: "
66 #define RET 0xC3 //near return opcode for x86
170 return "Unknown format";
175 if (dist<=1.0)
return ((d*dist + c)*dist + b)*dist +a;
183 static int initFilter(int16_t **outFilter, int32_t **filterPos,
int *outFilterSize,
int xInc,
184 int srcW,
int dstW,
int filterAlign,
int one,
int flags,
int cpu_flags,
192 int64_t *filter2=
NULL;
193 const int64_t fone= 1LL<<54;
201 if (
FFABS(xInc - 0x10000) <10) {
206 for (i=0; i<dstW; i++) {
207 filter[i*filterSize]= fone;
217 xDstInSrc= xInc/2 - 0x8000;
218 for (i=0; i<dstW; i++) {
219 int xx= (xDstInSrc - ((filterSize-1)<<15) + (1<<15))>>16;
231 xDstInSrc= xInc/2 - 0x8000;
232 for (i=0; i<dstW; i++) {
233 int xx= (xDstInSrc - ((filterSize-1)<<15) + (1<<15))>>16;
238 for (j=0; j<filterSize; j++) {
239 int64_t coeff= fone -
FFABS((xx<<16) - xDstInSrc)*(fone>>16);
240 if (coeff<0) coeff=0;
241 filter[i*filterSize + j]= coeff;
251 else if (flags&
SWS_X) sizeFactor= 8;
252 else if (flags&
SWS_AREA) sizeFactor= 1;
255 else if (flags&
SWS_SINC) sizeFactor= 20;
263 if (xInc <= 1<<16) filterSize= 1 + sizeFactor;
264 else filterSize= 1 + (sizeFactor*srcW + dstW - 1)/ dstW;
266 filterSize = av_clip(filterSize, 1, srcW - 2);
270 xDstInSrc= xInc - 0x10000;
271 for (i=0; i<dstW; i++) {
272 int xx = (xDstInSrc - ((int64_t)(filterSize - 2) << 16)) / (1 << 17);
275 for (j=0; j<filterSize; j++) {
276 int64_t d= ((int64_t)
FFABS((xx<<17) - xDstInSrc))<<13;
282 floatd= d * (1.0/(1<<30));
284 if (flags & SWS_BICUBIC) {
291 int64_t dd = (d * d) >> 30;
292 int64_t ddd = (dd * d) >> 30;
295 coeff = (12*(1<<24)-9*B-6*C)*ddd + (-18*(1<<24)+12*B+6*C)*dd + (6*(1<<24)-2*B)*(1<<30);
297 coeff = (-B-6*C)*ddd + (6*B+30*C)*dd + (-12*B-48*C)*d + (8*B+24*C)*(1<<30);
299 coeff *= fone>>(30+24);
306 else if (flags & SWS_X) {
311 c = cos(floatd*
M_PI);
314 if (c<0.0) c= -pow(-c, A);
316 coeff= (c*0.5 + 0.5)*fone;
317 }
else if (flags & SWS_AREA) {
318 int64_t d2= d - (1<<29);
319 if (d2*xInc < -(1LL<<(29+16))) coeff= 1.0 * (1LL<<(30+16));
320 else if (d2*xInc < (1LL<<(29+16))) coeff= -d2*xInc + (1LL<<(29+16));
322 coeff *= fone>>(30+16);
323 }
else if (flags & SWS_GAUSS) {
325 coeff = (pow(2.0, - p*floatd*floatd))*fone;
326 }
else if (flags & SWS_SINC) {
327 coeff = (d ? sin(floatd*
M_PI)/(floatd*
M_PI) : 1.0)*fone;
328 }
else if (flags & SWS_LANCZOS) {
330 coeff = (d ? sin(floatd*
M_PI)*sin(floatd*M_PI/p)/(floatd*floatd*M_PI*M_PI/p) : 1.0)*fone;
331 if (floatd>p) coeff=0;
332 }
else if (flags & SWS_BILINEAR) {
334 if (coeff<0) coeff=0;
336 }
else if (flags & SWS_SPLINE) {
337 double p=-2.196152422706632;
344 filter[i*filterSize + j]= coeff;
354 assert(filterSize>0);
355 filter2Size= filterSize;
356 if (srcFilter) filter2Size+= srcFilter->
length - 1;
357 if (dstFilter) filter2Size+= dstFilter->
length - 1;
358 assert(filter2Size>0);
361 for (i=0; i<dstW; i++) {
365 for (k=0; k<srcFilter->
length; k++) {
366 for (j=0; j<filterSize; j++)
367 filter2[i*filter2Size + k + j] += srcFilter->
coeff[k]*filter[i*filterSize + j];
370 for (j=0; j<filterSize; j++)
371 filter2[i*filter2Size + j]= filter[i*filterSize + j];
375 (*filterPos)[i]+= (filterSize-1)/2 - (filter2Size-1)/2;
382 for (i=dstW-1; i>=0; i--) {
383 int min= filter2Size;
388 for (j=0; j<filter2Size; j++) {
390 cutOff +=
FFABS(filter2[i*filter2Size]);
395 if (i<dstW-1 && (*filterPos)[i] >= (*filterPos)[i+1])
break;
398 for (k=1; k<filter2Size; k++)
399 filter2[i*filter2Size + k - 1]= filter2[i*filter2Size + k];
400 filter2[i*filter2Size + k - 1]= 0;
406 for (j=filter2Size-1; j>0; j--) {
407 cutOff +=
FFABS(filter2[i*filter2Size + j]);
413 if (min>minFilterSize) minFilterSize=
min;
419 if (minFilterSize < 5)
427 if (minFilterSize < 3)
433 if (minFilterSize == 1 && filterAlign == 2)
437 assert(minFilterSize > 0);
438 filterSize= (minFilterSize +(filterAlign-1)) & (~(filterAlign-1));
439 assert(filterSize > 0);
440 filter=
av_malloc(filterSize*dstW*
sizeof(*filter));
443 *outFilterSize= filterSize;
448 for (i=0; i<dstW; i++) {
451 for (j=0; j<filterSize; j++) {
452 if (j>=filter2Size) filter[i*filterSize + j]= 0;
453 else filter[i*filterSize + j]= filter2[i*filter2Size + j];
455 filter[i*filterSize + j]= 0;
463 for (i = 0; i < dstW; i++) {
465 if ((*filterPos)[i] < 0) {
467 for (j = 1; j < filterSize; j++) {
468 int left =
FFMAX(j + (*filterPos)[i], 0);
469 filter[i * filterSize + left] += filter[i * filterSize + j];
470 filter[i * filterSize + j ] = 0;
475 if ((*filterPos)[i] + filterSize > srcW) {
476 int shift = (*filterPos)[i] + filterSize - srcW;
478 for (j = filterSize - 2; j >= 0; j--) {
479 int right =
FFMIN(j + shift, filterSize - 1);
480 filter[i * filterSize + right] += filter[i * filterSize + j];
481 filter[i * filterSize + j ] = 0;
483 (*filterPos)[i] = srcW - filterSize;
493 for (i=0; i<dstW; i++) {
498 for (j=0; j<filterSize; j++) {
499 sum+= filter[i*filterSize + j];
501 sum= (sum + one/2)/ one;
502 for (j=0; j<*outFilterSize; j++) {
503 int64_t
v= filter[i*filterSize + j] + error;
505 (*outFilter)[i*(*outFilterSize) + j]= intV;
510 (*filterPos)[dstW+0] =
511 (*filterPos)[dstW+1] =
512 (*filterPos)[dstW+2] = (*filterPos)[dstW-1];
513 for (i=0; i<*outFilterSize; i++) {
514 int k= (dstW - 1) * (*outFilterSize) + i;
515 (*outFilter)[k + 1 * (*outFilterSize)] =
516 (*outFilter)[k + 2 * (*outFilterSize)] =
517 (*outFilter)[k + 3 * (*outFilterSize)] = (*outFilter)[k];
528 static int initMMX2HScaler(
int dstW,
int xInc, uint8_t *filterCode, int16_t *
filter, int32_t *filterPos,
int numSplits)
556 "movq (%%"REG_d
", %%"REG_a
"), %%mm3 \n\t"
557 "movd (%%"REG_c
", %%"REG_S
"), %%mm0 \n\t"
558 "movd 1(%%"REG_c
", %%"REG_S
"), %%mm1 \n\t"
559 "punpcklbw %%mm7, %%mm1 \n\t"
560 "punpcklbw %%mm7, %%mm0 \n\t"
561 "pshufw $0xFF, %%mm1, %%mm1 \n\t"
563 "pshufw $0xFF, %%mm0, %%mm0 \n\t"
565 "psubw %%mm1, %%mm0 \n\t"
566 "movl 8(%%"REG_b
", %%"REG_a
"), %%esi \n\t"
567 "pmullw %%mm3, %%mm0 \n\t"
568 "psllw $7, %%mm1 \n\t"
569 "paddw %%mm1, %%mm0 \n\t"
571 "movq %%mm0, (%%"REG_D
", %%"REG_a
") \n\t"
573 "add $8, %%"REG_a
" \n\t"
588 :
"=r" (fragmentA),
"=r" (imm8OfPShufW1A),
"=r" (imm8OfPShufW2A),
589 "=r" (fragmentLengthA)
596 "movq (%%"REG_d
", %%"REG_a
"), %%mm3 \n\t"
597 "movd (%%"REG_c
", %%"REG_S
"), %%mm0 \n\t"
598 "punpcklbw %%mm7, %%mm0 \n\t"
599 "pshufw $0xFF, %%mm0, %%mm1 \n\t"
601 "pshufw $0xFF, %%mm0, %%mm0 \n\t"
603 "psubw %%mm1, %%mm0 \n\t"
604 "movl 8(%%"REG_b
", %%"REG_a
"), %%esi \n\t"
605 "pmullw %%mm3, %%mm0 \n\t"
606 "psllw $7, %%mm1 \n\t"
607 "paddw %%mm1, %%mm0 \n\t"
609 "movq %%mm0, (%%"REG_D
", %%"REG_a
") \n\t"
611 "add $8, %%"REG_a
" \n\t"
626 :
"=r" (fragmentB),
"=r" (imm8OfPShufW1B),
"=r" (imm8OfPShufW2B),
627 "=r" (fragmentLengthB)
633 for (i=0; i<dstW/numSplits; i++) {
638 int b=((xpos+xInc)>>16) - xx;
639 int c=((xpos+xInc*2)>>16) - xx;
640 int d=((xpos+xInc*3)>>16) - xx;
642 uint8_t *fragment = (d+1<4) ? fragmentB : fragmentA;
643 x86_reg imm8OfPShufW1 = (d+1<4) ? imm8OfPShufW1B : imm8OfPShufW1A;
644 x86_reg imm8OfPShufW2 = (d+1<4) ? imm8OfPShufW2B : imm8OfPShufW2A;
645 x86_reg fragmentLength = (d+1<4) ? fragmentLengthB : fragmentLengthA;
646 int maxShift= 3-(d+inc);
650 filter[i ] = (( xpos & 0xFFFF) ^ 0xFFFF)>>9;
651 filter[i+1] = (((xpos+xInc ) & 0xFFFF) ^ 0xFFFF)>>9;
652 filter[i+2] = (((xpos+xInc*2) & 0xFFFF) ^ 0xFFFF)>>9;
653 filter[i+3] = (((xpos+xInc*3) & 0xFFFF) ^ 0xFFFF)>>9;
656 memcpy(filterCode + fragmentPos, fragment, fragmentLength);
658 filterCode[fragmentPos + imm8OfPShufW1]=
659 (a+inc) | ((b+inc)<<2) | ((c+inc)<<4) | ((d+inc)<<6);
660 filterCode[fragmentPos + imm8OfPShufW2]=
661 a | (b<<2) | (c<<4) | (d<<6);
663 if (i+4-inc>=dstW) shift=maxShift;
664 else if ((filterPos[i/2]&3) <= maxShift) shift=filterPos[i/2]&3;
666 if (shift && i>=shift) {
667 filterCode[fragmentPos + imm8OfPShufW1]+= 0x55*shift;
668 filterCode[fragmentPos + imm8OfPShufW2]+= 0x55*shift;
669 filterPos[i/2]-=shift;
673 fragmentPos+= fragmentLength;
676 filterCode[fragmentPos]=
RET;
681 filterPos[((i/2)+1)&(~1)]= xpos>>16;
683 return fragmentPos + 1;
694 int srcRange,
const int table[4],
int dstRange,
695 int brightness,
int contrast,
int saturation)
719 int *srcRange,
int **table,
int *dstRange,
720 int *brightness,
int *contrast,
int *saturation)
759 int usesVFilter, usesHFilter;
766 int dst_stride =
FFALIGN(dstW *
sizeof(int16_t) + 16, 16), dst_stride_px = dst_stride >> 1;
776 unscaled = (srcW == dstW && srcH == dstH);
798 if(!i || (i & (i-1))) {
803 if (srcW<4 || srcH<1 || dstW<8 || dstH<1) {
805 srcW, srcH, dstW, dstH);
809 if (!dstFilter) dstFilter= &dummyFilter;
810 if (!srcFilter) srcFilter= &dummyFilter;
812 c->
lumXInc= (((int64_t)srcW<<16) + (dstW>>1))/dstW;
813 c->
lumYInc= (((int64_t)srcH<<16) + (dstH>>1))/dstH;
816 c->
vRounder= 4* 0x0001000100010001ULL;
840 "full chroma interpolation for destination format '%s' not yet implemented\n",
889 c->
canMMX2BeUsed= (dstW >=srcW && (dstW&31)==0 && (srcW&15)==0) ? 1 : 0;
892 av_log(c,
AV_LOG_INFO,
"output width is not a multiple of 32 -> no MMX2 scaler\n");
908 if (flags&SWS_FAST_BILINEAR) {
915 c->
lumXInc = ((int64_t)(srcW-2)<<16)/(dstW-2) - 20;
931 #elif HAVE_VIRTUALALLOC
956 const int filterAlign=
962 srcW , dstW, filterAlign, 1<<14,
968 (flags&SWS_BICUBLIN) ? (flags|
SWS_BILINEAR) : flags, cpu_flags,
976 const int filterAlign=
982 srcH , dstH, filterAlign, (1<<12),
998 short *p = (
short *)&c->vYCoeffsBank[i];
1005 short *p = (
short *)&c->vCCoeffsBank[i];
1015 for (i=0; i<dstH; i++) {
1016 int chrI = (int64_t) i * c->
chrDstH / dstH;
1057 memset(c->
chrUPixBuf[i], 64, dst_stride*2+1);
1105 #if FF_API_SWS_GETCONTEXT
1126 c->
param[0] = param[0];
1127 c->
param[1] = param[1];
1141 float lumaSharpen,
float chromaSharpen,
1142 float chromaHShift,
float chromaVShift,
1149 if (lumaGBlur!=0.0) {
1157 if (chromaGBlur!=0.0) {
1165 if (chromaSharpen!=0.0) {
1174 if (lumaSharpen!=0.0) {
1183 if (chromaHShift != 0.0)
1186 if (chromaVShift != 0.0)
1214 const int length= (int)(variance*quality + 0.5) | 1;
1216 double middle= (length-1)*0.5;
1222 for (i=0; i<length; i++) {
1223 double dist= i-middle;
1224 vec->
coeff[i]= exp(-dist*dist/(2*variance*variance)) / sqrt(2*variance*
M_PI);
1240 for (i=0; i<length; i++)
1256 for (i=0; i<a->
length; i++)
1266 for (i=0; i<a->
length; i++)
1267 a->
coeff[i]*= scalar;
1284 for (i=0; i<a->
length; i++) {
1285 for (j=0; j<b->
length; j++) {
1333 for (i=0; i<a->
length; i++) {
1396 for (i=0; i<a->
length; i++)
1399 for (i=0; i<a->
length; i++)
1404 for (i=0; i<a->
length; i++) {
1405 int x= (int)((a->
coeff[i]-min)*60.0/range +0.5);
1406 av_log(log_ctx, log_level,
"%1.3f ", a->
coeff[i]);
1407 for (;x>0; x--)
av_log(log_ctx, log_level,
" ");
1408 av_log(log_ctx, log_level,
"|\n");
1422 if (!filter)
return;
1470 #ifdef MAP_ANONYMOUS
1473 #elif HAVE_VIRTUALALLOC
1498 param = default_param;
1501 (context->
srcW != srcW ||
1502 context->
srcH != srcH ||
1504 context->
dstW != dstW ||
1505 context->
dstH != dstH ||
1507 context->
flags != flags ||
1508 context->
param[0] != param[0] ||
1509 context->
param[1] != param[1])) {
1526 context->
param[0] = param[0];
1527 context->
param[1] = param[1];