103 #define LICENSE_PREFIX "libpostproc license: "
111 #define GET_MODE_BUFFER_SIZE 500
112 #define OPTIONS_ARRAY_SIZE 10
114 #define TEMP_STRIDE 8
141 {
"dr",
"dering", 1, 5, 6,
DERING},
142 {
"al",
"autolevels", 0, 1, 2,
LEVEL_FIX},
156 "default",
"hb:a,vb:a,dr:a",
157 "de",
"hb:a,vb:a,dr:a",
158 "fast",
"h1:a,v1:a,dr:a",
159 "fa",
"h1:a,v1:a,dr:a",
160 "ac",
"ha:a:128:7,va:a,dr:a",
166 static inline void prefetchnta(
void *p)
168 __asm__
volatile(
"prefetchnta (%0)\n\t"
173 static inline void prefetcht0(
void *p)
175 __asm__
volatile(
"prefetcht0 (%0)\n\t"
180 static inline void prefetcht1(
void *p)
182 __asm__
volatile(
"prefetcht1 (%0)\n\t"
187 static inline void prefetcht2(
void *p)
189 __asm__
volatile(
"prefetcht2 (%0)\n\t"
206 const int dcThreshold= dcOffset*2 + 1;
209 if(((
unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
210 if(((
unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
211 if(((
unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
212 if(((
unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
213 if(((
unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
214 if(((
unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
215 if(((
unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
229 const int dcThreshold= dcOffset*2 + 1;
233 if(((
unsigned)(src[0] - src[0+
stride] + dcOffset)) < dcThreshold) numEq++;
234 if(((
unsigned)(src[1] - src[1+
stride] + dcOffset)) < dcThreshold) numEq++;
235 if(((
unsigned)(src[2] - src[2+
stride] + dcOffset)) < dcThreshold) numEq++;
236 if(((
unsigned)(src[3] - src[3+
stride] + dcOffset)) < dcThreshold) numEq++;
237 if(((
unsigned)(src[4] - src[4+
stride] + dcOffset)) < dcThreshold) numEq++;
238 if(((
unsigned)(src[5] - src[5+
stride] + dcOffset)) < dcThreshold) numEq++;
239 if(((
unsigned)(src[6] - src[6+
stride] + dcOffset)) < dcThreshold) numEq++;
240 if(((
unsigned)(src[7] - src[7+
stride] + dcOffset)) < dcThreshold) numEq++;
250 if((
unsigned)(src[0] - src[5] + 2*QP) > 4*QP)
return 0;
252 if((
unsigned)(src[2] - src[7] + 2*QP) > 4*QP)
return 0;
254 if((
unsigned)(src[4] - src[1] + 2*QP) > 4*QP)
return 0;
256 if((
unsigned)(src[6] - src[3] + 2*QP) > 4*QP)
return 0;
267 if((
unsigned)(src[ x + 0*stride] - src[ x + 5*stride] + 2*QP) > 4*QP)
return 0;
268 if((
unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP)
return 0;
269 if((
unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP)
return 0;
270 if((
unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP)
return 0;
303 const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
305 if(
FFABS(middleEnergy) < 8*c->
QP){
306 const int q=(dst[3] - dst[4])/2;
307 const int leftEnergy= 5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
308 const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
314 d*=
FFSIGN(-middleEnergy);
342 const int first=
FFABS(dst[-1] - dst[0]) < c->
QP ? dst[-1] : dst[0];
343 const int last=
FFABS(dst[8] - dst[7]) < c->
QP ? dst[8] : dst[7];
346 sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
347 sums[1] = sums[0] - first + dst[3];
348 sums[2] = sums[1] - first + dst[4];
349 sums[3] = sums[2] - first + dst[5];
350 sums[4] = sums[3] - first + dst[6];
351 sums[5] = sums[4] - dst[0] + dst[7];
352 sums[6] = sums[5] - dst[1] + last;
353 sums[7] = sums[6] - dst[2] + last;
354 sums[8] = sums[7] - dst[3] + last;
355 sums[9] = sums[8] - dst[4] + last;
357 dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
358 dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
359 dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
360 dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
361 dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
362 dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
363 dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
364 dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
381 static uint64_t *lut=
NULL;
388 int v= i < 128 ? 2*i : 2*(i-256);
397 uint64_t a= (v/16) & 0xFF;
398 uint64_t
b= (v*3/16) & 0xFF;
399 uint64_t c= (v*5/16) & 0xFF;
400 uint64_t d= (7*v/16) & 0xFF;
401 uint64_t
A= (0x100 - a)&0xFF;
402 uint64_t
B= (0x100 -
b)&0xFF;
403 uint64_t C= (0x100 - c)&0xFF;
404 uint64_t
D= (0x100 - c)&0xFF;
406 lut[i] = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
407 (D<<24) | (C<<16) | (B<<8) | (A);
413 int a= src[1] - src[2];
414 int b= src[3] - src[4];
415 int c= src[5] - src[6];
440 const int dcThreshold= dcOffset*2 + 1;
446 if(((
unsigned)(src[-1*
step] - src[0*
step] + dcOffset)) < dcThreshold) numEq++;
447 if(((
unsigned)(src[ 0*
step] - src[1*
step] + dcOffset)) < dcThreshold) numEq++;
448 if(((
unsigned)(src[ 1*
step] - src[2*
step] + dcOffset)) < dcThreshold) numEq++;
449 if(((
unsigned)(src[ 2*
step] - src[3*
step] + dcOffset)) < dcThreshold) numEq++;
450 if(((
unsigned)(src[ 3*
step] - src[4*
step] + dcOffset)) < dcThreshold) numEq++;
451 if(((
unsigned)(src[ 4*
step] - src[5*
step] + dcOffset)) < dcThreshold) numEq++;
452 if(((
unsigned)(src[ 5*
step] - src[6*
step] + dcOffset)) < dcThreshold) numEq++;
453 if(((
unsigned)(src[ 6*
step] - src[7*
step] + dcOffset)) < dcThreshold) numEq++;
454 if(((
unsigned)(src[ 7*
step] - src[8*
step] + dcOffset)) < dcThreshold) numEq++;
458 if(src[0] > src[step]){
466 if(src[x*step] > src[(x+1)*
step]){
467 if(src[x *step] > max) max= src[ x *
step];
470 if(src[(x+1)*
step] > max) max= src[(x+1)*
step];
471 if(src[ x *step] < min) min= src[ x *
step];
475 const int first=
FFABS(src[-1*step] - src[0]) < QP ? src[-1*
step] : src[0];
476 const int last=
FFABS(src[8*step] - src[7*step]) < QP ? src[8*
step] : src[7*
step];
479 sums[0] = 4*first + src[0*
step] + src[1*
step] + src[2*
step] + 4;
480 sums[1] = sums[0] - first + src[3*
step];
481 sums[2] = sums[1] - first + src[4*
step];
482 sums[3] = sums[2] - first + src[5*
step];
483 sums[4] = sums[3] - first + src[6*
step];
484 sums[5] = sums[4] - src[0*
step] + src[7*
step];
485 sums[6] = sums[5] - src[1*
step] + last;
486 sums[7] = sums[6] - src[2*
step] + last;
487 sums[8] = sums[7] - src[3*
step] + last;
488 sums[9] = sums[8] - src[4*
step] + last;
490 src[0*
step]= (sums[0] + sums[2] + 2*src[0*
step])>>4;
491 src[1*
step]= (sums[1] + sums[3] + 2*src[1*
step])>>4;
492 src[2*
step]= (sums[2] + sums[4] + 2*src[2*
step])>>4;
493 src[3*
step]= (sums[3] + sums[5] + 2*src[3*
step])>>4;
494 src[4*
step]= (sums[4] + sums[6] + 2*src[4*
step])>>4;
495 src[5*
step]= (sums[5] + sums[7] + 2*src[5*
step])>>4;
496 src[6*
step]= (sums[6] + sums[8] + 2*src[6*
step])>>4;
497 src[7*
step]= (sums[7] + sums[9] + 2*src[7*
step])>>4;
500 const int middleEnergy= 5*(src[4*
step] - src[3*
step]) + 2*(src[2*step] - src[5*step]);
502 if(
FFABS(middleEnergy) < 8*
QP){
503 const int q=(src[3*
step] - src[4*
step])/2;
504 const int leftEnergy= 5*(src[2*
step] - src[1*
step]) + 2*(src[0*step] - src[3*step]);
505 const int rightEnergy= 5*(src[6*
step] - src[5*
step]) + 2*(src[4*step] - src[7*step]);
511 d*=
FFSIGN(-middleEnergy);
537 #if !(HAVE_MMX || HAVE_ALTIVEC) || CONFIG_RUNTIME_CPUDETECT
542 #define COMPILE_ALTIVEC
543 #endif //HAVE_ALTIVEC
547 #if (HAVE_MMX && !HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT
551 #if HAVE_MMX2 || CONFIG_RUNTIME_CPUDETECT
555 #if (HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT
556 #define COMPILE_3DNOW
565 #define HAVE_AMD3DNOW 0
567 #define HAVE_ALTIVEC 0
570 #define RENAME(a) a ## _C
574 #ifdef COMPILE_ALTIVEC
577 #define HAVE_ALTIVEC 1
578 #define RENAME(a) a ## _altivec
588 #define RENAME(a) a ## _MMX
599 #define RENAME(a) a ## _MMX2
611 #define HAVE_AMD3DNOW 1
612 #define RENAME(a) a ## _3DNow
628 #if CONFIG_RUNTIME_CPUDETECT
632 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
634 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
636 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
638 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
642 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
645 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
649 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
651 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
653 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
655 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
657 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
668 "Available postprocessing filters:\n"
670 "short long name short long option Description\n"
671 "* * a autoq CPU power dependent enabler\n"
672 " c chrom chrominance filtering enabled\n"
673 " y nochrom chrominance filtering disabled\n"
674 " n noluma luma filtering disabled\n"
675 "hb hdeblock (2 threshold) horizontal deblocking filter\n"
676 " 1. difference factor: default=32, higher -> more deblocking\n"
677 " 2. flatness threshold: default=39, lower -> more deblocking\n"
678 " the h & v deblocking filters share these\n"
679 " so you can't set different thresholds for h / v\n"
680 "vb vdeblock (2 threshold) vertical deblocking filter\n"
681 "ha hadeblock (2 threshold) horizontal deblocking filter\n"
682 "va vadeblock (2 threshold) vertical deblocking filter\n"
683 "h1 x1hdeblock experimental h deblock filter 1\n"
684 "v1 x1vdeblock experimental v deblock filter 1\n"
685 "dr dering deringing filter\n"
686 "al autolevels automatic brightness / contrast\n"
687 " f fullyrange stretch luminance to (0..255)\n"
688 "lb linblenddeint linear blend deinterlacer\n"
689 "li linipoldeint linear interpolating deinterlace\n"
690 "ci cubicipoldeint cubic interpolating deinterlacer\n"
691 "md mediandeint median deinterlacer\n"
692 "fd ffmpegdeint ffmpeg deinterlacer\n"
693 "l5 lowpass5 FIR lowpass deinterlacer\n"
694 "de default hb:a,vb:a,dr:a\n"
695 "fa fast h1:a,v1:a,dr:a\n"
696 "ac ha:a:128:7,va:a,dr:a\n"
697 "tn tmpnoise (3 threshold) temporal noise reducer\n"
698 " 1. <= 2. <= 3. larger -> stronger filtering\n"
699 "fq forceQuant <quantizer> force quantizer\n"
701 "<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
702 "long form example:\n"
703 "vdeblock:autoq/hdeblock:autoq/linblenddeint default,-vdeblock\n"
704 "short form example:\n"
705 "vb:a/hb:a/lb de,-vb\n"
715 static const char filterDelimiters[] =
",/";
716 static const char optionDelimiters[] =
":";
748 int numOfUnknownOptions=0;
751 filterToken= strtok(p, filterDelimiters);
752 if(filterToken ==
NULL)
break;
753 p+= strlen(filterToken) + 1;
754 filterName= strtok(filterToken, optionDelimiters);
757 if(*filterName ==
'-'){
763 option= strtok(
NULL, optionDelimiters);
764 if(option ==
NULL)
break;
767 if(!strcmp(
"autoq", option) || !strcmp(
"a", option)) q= quality;
768 else if(!strcmp(
"nochrom", option) || !strcmp(
"y", option)) chrom=0;
769 else if(!strcmp(
"chrom", option) || !strcmp(
"c", option)) chrom=1;
770 else if(!strcmp(
"noluma", option) || !strcmp(
"n", option)) luma=0;
772 options[numOfUnknownOptions] =
option;
773 numOfUnknownOptions++;
777 options[numOfUnknownOptions] =
NULL;
786 if(p==
NULL) p= temp, *p=0;
790 spaceLeft= p - temp + plen;
795 memmove(p + newlen, p, plen+1);
802 if( !strcmp(filters[i].longName, filterName)
803 || !strcmp(filters[i].shortName, filterName)){
810 if(q >= filters[i].minLumQuality && luma)
812 if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
813 if(q >= filters[i].minChromQuality)
820 for(o=0; options[o]!=
NULL; o++){
821 if( !strcmp(options[o],
"fullyrange")
822 ||!strcmp(options[o],
"f")){
825 numOfUnknownOptions--;
834 for(o=0; options[o]!=
NULL; o++){
837 strtol(options[o], &tail, 0);
838 if(tail!=options[o]){
840 numOfUnknownOptions--;
841 if(numOfNoises >= 3)
break;
849 for(o=0; options[o]!=
NULL && o<2; o++){
851 int val= strtol(options[o], &tail, 0);
852 if(tail==options[o])
break;
854 numOfUnknownOptions--;
863 for(o=0; options[o]!=
NULL && o<1; o++){
865 int val= strtol(options[o], &tail, 0);
866 if(tail==options[o])
break;
868 numOfUnknownOptions--;
874 if(!filterNameOk) ppMode->
error++;
875 ppMode->
error += numOfUnknownOptions;
897 int mbWidth = (width+15)>>4;
898 int mbHeight= (height+15)>>4;
932 int qpStride= (width+15)/16 + 2;
974 uint8_t * dst[3],
const int dstStride[3],
977 pp_mode *vm,
void *vc,
int pict_type)
979 int mbWidth = (width+15)>>4;
980 int mbHeight= (height+15)>>4;
984 int absQPStride =
FFABS(QPStride);
995 absQPStride = QPStride = 0;
1004 const int count= mbHeight * absQPStride;
1005 for(i=0; i<(count>>2); i++){
1006 ((uint32_t*)c->
stdQPTable)[i] = (((
const uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
1008 for(i<<=2; i<count; i++){
1012 QPStride= absQPStride;
1017 for(y=0; y<mbHeight; y++){
1018 for(x=0; x<mbWidth; x++){
1026 if((pict_type&7)!=3){
1029 const int count= mbHeight * QPStride;
1030 for(i=0; i<(count>>2); i++){
1031 ((uint32_t*)c->
nonBQPTable)[i] = ((
const uint32_t*)QP_store)[i] & 0x3F3F3F3F;
1033 for(i<<=2; i<count; i++){
1038 for(i=0; i<mbHeight; i++) {
1039 for(j=0; j<absQPStride; j++) {
1040 c->
nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
1049 postProcess(src[0], srcStride[0], dst[0], dstStride[0],
1050 width, height, QP_store, QPStride, 0, mode, c);
1056 postProcess(src[1], srcStride[1], dst[1], dstStride[1],
1057 width, height, QP_store, QPStride, 1, mode, c);
1058 postProcess(src[2], srcStride[2], dst[2], dstStride[2],
1059 width, height, QP_store, QPStride, 2, mode, c);
1061 else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]){
1062 linecpy(dst[1], src[1], height, srcStride[1]);
1063 linecpy(dst[2], src[2], height, srcStride[2]);
1067 memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
1068 memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);