1
mirror of https://github.com/mpv-player/mpv synced 2025-01-24 19:37:30 +01:00
mpv/libmpcodecs/vf_divtc.c
aurel 798d2d1337 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@13721 b3059339-0415-0410-9bf9-f77b7e298cf2
2004-10-21 11:55:20 +00:00

699 lines
15 KiB
C

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <limits.h>
#include <math.h>
#include "../config.h"
#include "../mp_msg.h"
#include "../cpudetect.h"
#include "../bswap.h"
#include "img_format.h"
#include "mp_image.h"
#include "vf.h"
#include "../libvo/fastmemcpy.h"
vf_info_t vf_info_divtc;
struct vf_priv_s
{
int deghost, pass, phase, window, fcount, bcount, frameno, misscount,
ocount, sum[5];
double threshold;
FILE *file;
char *bdata;
unsigned int *csdata;
int *history;
};
/*
* diff_MMX and diff_C stolen from vf_decimate.c
*/
#ifdef HAVE_MMX
static int diff_MMX(unsigned char *old, unsigned char *new, int os, int ns)
{
volatile short out[4];
asm (
"movl $8, %%ecx \n\t"
"pxor %%mm4, %%mm4 \n\t"
"pxor %%mm7, %%mm7 \n\t"
".balign 16 \n\t"
"1: \n\t"
"movq (%%"REG_S"), %%mm0 \n\t"
"movq (%%"REG_S"), %%mm2 \n\t"
"add %%"REG_a", %%"REG_S" \n\t"
"movq (%%"REG_D"), %%mm1 \n\t"
"add %%"REG_b", %%"REG_D" \n\t"
"psubusb %%mm1, %%mm2 \n\t"
"psubusb %%mm0, %%mm1 \n\t"
"movq %%mm2, %%mm0 \n\t"
"movq %%mm1, %%mm3 \n\t"
"punpcklbw %%mm7, %%mm0 \n\t"
"punpcklbw %%mm7, %%mm1 \n\t"
"punpckhbw %%mm7, %%mm2 \n\t"
"punpckhbw %%mm7, %%mm3 \n\t"
"paddw %%mm0, %%mm4 \n\t"
"paddw %%mm1, %%mm4 \n\t"
"paddw %%mm2, %%mm4 \n\t"
"paddw %%mm3, %%mm4 \n\t"
"decl %%ecx \n\t"
"jnz 1b \n\t"
"movq %%mm4, (%%"REG_d") \n\t"
"emms \n\t"
:
: "S" (old), "D" (new), "a" ((long)os), "b" ((long)ns), "d" (out)
: "memory"
);
return out[0]+out[1]+out[2]+out[3];
}
#endif
static int diff_C(unsigned char *old, unsigned char *new, int os, int ns)
{
int x, y, d=0;
for(y=8; y; y--, new+=ns, old+=os)
for(x=8; x; x--)
d+=abs(new[x]-old[x]);
return d;
}
static int (*diff)(unsigned char *, unsigned char *, int, int);
static int diff_plane(unsigned char *old, unsigned char *new,
int w, int h, int os, int ns, int arg)
{
int x, y, d, max=0, sum=0, n=0;
for(y=0; y<h-7; y+=8)
{
for(x=0; x<w-7; x+=8)
{
d=diff(old+x+y*os, new+x+y*ns, os, ns);
if(d>max) max=d;
sum+=d;
n++;
}
}
return (sum+n*max)/2;
}
/*
static unsigned int checksum_plane(unsigned char *p, unsigned char *z,
int w, int h, int s, int zs, int arg)
{
unsigned int shift, sum;
unsigned char *e;
for(sum=0; h; h--, p+=s-w)
for(e=p+w, shift=32; p<e;)
sum^=(*p++)<<(shift=(shift-8)&31);
return sum;
}
*/
static unsigned int checksum_plane(unsigned char *p, unsigned char *z,
int w, int h, int s, int zs, int arg)
{
unsigned int shift;
uint32_t sum, t;
unsigned char *e, *e2;
#if MP_WORDSIZE==64
typedef uint64_t wsum_t;
#else
typedef uint32_t wsum_t;
#endif
wsum_t wsum;
for(sum=0; h; h--, p+=s-w)
{
for(shift=0, e=p+w; (int)p&(sizeof(wsum_t)-1) && p<e;)
sum^=*p++<<(shift=(shift-8)&31);
for(wsum=0, e2=e-sizeof(wsum_t)+1; p<e2; p+=sizeof(wsum_t))
wsum^=*(wsum_t *)p;
#if MP_WORDSIZE==64
t=be2me_32((uint32_t)(wsum>>32^wsum));
#else
t=be2me_32(wsum);
#endif
for(sum^=(t<<shift|t>>(32-shift)); p<e;)
sum^=*p++<<(shift=(shift-8)&31);
}
return sum;
}
static int deghost_plane(unsigned char *d, unsigned char *s,
int w, int h, int ds, int ss, int threshold)
{
int t;
unsigned char *e;
for(; h; h--, s+=ss-w, d+=ds-w)
for(e=d+w; d<e; d++, s++)
if(abs(*d-*s)>=threshold)
*d=(t=(*d<<1)-*s)<0?0:t>255?255:t;
return 0;
}
static int imgop(int(*planeop)(unsigned char *, unsigned char *,
int, int, int, int, int),
mp_image_t *dst, mp_image_t *src, int arg)
{
if(dst->flags&MP_IMGFLAG_PLANAR)
return planeop(dst->planes[0], src?src->planes[0]:0,
dst->w, dst->h,
dst->stride[0], src?src->stride[0]:0, arg)+
planeop(dst->planes[1], src?src->planes[1]:0,
dst->chroma_width, dst->chroma_height,
dst->stride[1], src?src->stride[1]:0, arg)+
planeop(dst->planes[2], src?src->planes[2]:0,
dst->chroma_width, dst->chroma_height,
dst->stride[2], src?src->stride[2]:0, arg);
return planeop(dst->planes[0], src?src->planes[0]:0,
dst->w*(dst->bpp/8), dst->h,
dst->stride[0], src?src->stride[0]:0, arg);
}
/*
* Find the phase in which the telecine pattern fits best to the
* given 5 frame slice of frame difference measurements.
*
* If phase1 and phase2 are not negative, only the two specified
* phases are tested.
*/
static int cmp(int *a, int *b) { return *b-*a; }
static int match(struct vf_priv_s *p, int *diffs,
int phase1, int phase2, double *strength)
{
static const int pattern1[]={ -4, 1, 1, 1, 1 },
pattern2[]={ -2, -3, 4, 4, -3 }, *pattern;
int f, m, n, t[5];
pattern=p->deghost>0?pattern2:pattern1;
for(f=0; f<5; f++)
{
if(phase1<0 || phase2<0 || f==phase1 || f==phase2)
{
for(n=t[f]=0; n<5; n++)
t[f]+=diffs[n]*pattern[(n-f+5)%5];
}
else
t[f]=INT_MIN;
}
/* find the best match */
for(m=0, n=1; n<5; n++)
if(t[n]>t[m]) m=n;
if(strength)
{
/* the second best match */
for(f=m?0:1, n=f+1; n<5; n++)
if(n!=m && t[n]>t[f]) f=n;
*strength=(t[m]>0?(double)(t[m]-t[f])/t[m]:0.0);
}
return m;
}
static int put_image(struct vf_instance_s* vf, mp_image_t *mpi)
{
mp_image_t *dmpi, *tmpi=0;
int n, m, f, newphase;
struct vf_priv_s *p=vf->priv;
unsigned int checksum;
double d;
dmpi=vf_get_image(vf->next, mpi->imgfmt,
MP_IMGTYPE_STATIC, MP_IMGFLAG_ACCEPT_STRIDE |
MP_IMGFLAG_PRESERVE | MP_IMGFLAG_READABLE,
mpi->width, mpi->height);
vf_clone_mpi_attributes(dmpi, mpi);
newphase=p->phase;
switch(p->pass)
{
case 1:
fprintf(p->file, "%08x %d\n",
(unsigned int)imgop((void *)checksum_plane, mpi, 0, 0),
p->frameno?imgop(diff_plane, dmpi, mpi, 0):0);
break;
case 2:
if(p->frameno/5>p->bcount)
{
mp_msg(MSGT_VFILTER, MSGL_ERR,
"\n%s: Log file ends prematurely! "
"Switching to one pass mode.\n", vf->info->name);
p->pass=0;
break;
}
checksum=(unsigned int)imgop((void *)checksum_plane, mpi, 0, 0);
if(checksum!=p->csdata[p->frameno])
{
for(f=0; f<100; f++)
if(p->frameno+f<p->fcount && p->csdata[p->frameno+f]==checksum)
break;
else if(p->frameno-f>=0 && p->csdata[p->frameno-f]==checksum)
{
f=-f;
break;
}
if(f<100)
{
mp_msg(MSGT_VFILTER, MSGL_INFO,
"\n%s: Mismatch with pass-1: %+d frame(s).\n",
vf->info->name, f);
p->frameno+=f;
p->misscount=0;
}
else if(p->misscount++>=30)
{
mp_msg(MSGT_VFILTER, MSGL_ERR,
"\n%s: Sync with pass-1 lost! "
"Switching to one pass mode.\n", vf->info->name);
p->pass=0;
break;
}
}
n=(p->frameno)/5;
if(n>=p->bcount) n=p->bcount-1;
newphase=p->bdata[n];
break;
default:
if(p->frameno)
{
int *sump=p->sum+p->frameno%5,
*histp=p->history+p->frameno%p->window;
*sump-=*histp;
*sump+=(*histp=imgop(diff_plane, dmpi, mpi, 0));
}
m=match(p, p->sum, -1, -1, &d);
if(d>=p->threshold)
newphase=m;
}
n=p->ocount++%5;
if(newphase!=p->phase && ((p->phase+4)%5<n)==((newphase+4)%5<n))
{
p->phase=newphase;
mp_msg(MSGT_VFILTER, MSGL_STATUS,
"\n%s: Telecine phase %d.\n", vf->info->name, p->phase);
}
switch((p->frameno++-p->phase+10)%5)
{
case 0:
imgop((void *)memcpy_pic, dmpi, mpi, 0);
return 0;
case 4:
if(p->deghost>0)
{
tmpi=vf_get_image(vf->next, mpi->imgfmt,
MP_IMGTYPE_TEMP, MP_IMGFLAG_ACCEPT_STRIDE,
mpi->width, mpi->height);
vf_clone_mpi_attributes(tmpi, mpi);
imgop((void *)memcpy_pic, tmpi, mpi, 0);
imgop(deghost_plane, tmpi, dmpi, p->deghost);
imgop((void *)memcpy_pic, dmpi, mpi, 0);
return vf_next_put_image(vf, tmpi);
}
}
imgop((void *)memcpy_pic, dmpi, mpi, 0);
return vf_next_put_image(vf, dmpi);
}
static int analyze(struct vf_priv_s *p)
{
int *buf=0, *bp, bufsize=0, n, b, f, i, j, m, s;
unsigned int *cbuf=0, *cp;
char *pbuf;
char lbuf[256];
int sum[5];
double d;
/* read the file */
n=15;
while(fgets(lbuf, 256, p->file))
{
if(n>=bufsize-19)
{
bufsize=bufsize?bufsize*2:30000;
if((bp=realloc(buf, bufsize*sizeof *buf))) buf=bp;
if((cp=realloc(cbuf, bufsize*sizeof *cbuf))) cbuf=cp;
if(!bp || !cp)
{
mp_msg(MSGT_VFILTER, MSGL_FATAL, "%s: Not enough memory.\n",
vf_info_divtc.name);
free(buf);
free(cbuf);
return 0;
}
}
sscanf(lbuf, "%x %d", cbuf+n, buf+n);
n++;
}
if(!n)
{
mp_msg(MSGT_VFILTER, MSGL_FATAL, "%s: Empty 2-pass log file.\n",
vf_info_divtc.name);
free(buf);
free(cbuf);
return 0;
}
/* generate some dummy data past the beginning and end of the array */
buf+=15, cbuf+=15;
n-=15;
memcpy(buf-15, buf, 15*sizeof *buf);
memset(cbuf-15, 0, 15*sizeof *cbuf);
while(n%5)
buf[n]=buf[n-5], cbuf[n]=0, n++;
memcpy(buf+n, buf+n-15, 15*sizeof *buf);
memset(cbuf+n, 0, 15*sizeof *cbuf);
p->csdata=cbuf;
p->fcount=n;
/* array with one slot for each slice of 5 frames */
p->bdata=pbuf=malloc(p->bcount=b=(n/5));
memset(pbuf, 255, b);
/* resolve the automatic mode */
if(p->deghost<0)
{
int deghost=-p->deghost;
double s0=0.0, s1=0.0;
for(f=0; f<n; f+=5)
{
p->deghost=0; match(p, buf+f, -1, -1, &d); s0+=d;
p->deghost=1; match(p, buf+f, -1, -1, &d); s1+=d;
}
p->deghost=s1>s0?deghost:0;
mp_msg(MSGT_VFILTER, MSGL_INFO,
"%s: Deghosting %-3s (relative pattern strength %+.2fdB).\n",
vf_info_divtc.name,
p->deghost?"ON":"OFF",
10.0*log10(s1/s0));
}
/* analyze the data */
for(f=0; f<5; f++)
for(sum[f]=0, n=-15; n<20; n+=5)
sum[f]+=buf[n+f];
for(f=0; f<b; f++)
{
m=match(p, sum, -1, -1, &d);
if(d>=p->threshold)
pbuf[f]=m;
if(f<b-1)
for(n=0; n<5; n++)
sum[n]=sum[n]-buf[5*(f-3)+n]+buf[5*(f+4)+n];
}
/* fill in the gaps */
/* the beginning */
for(f=0; f<b && pbuf[f]==-1; f++);
if(f==b)
{
free(buf-15);
mp_msg(MSGT_VFILTER, MSGL_FATAL, "%s: No telecine pattern found!\n",
vf_info_divtc.name);
return 0;
}
for(n=0; n<f; pbuf[n++]=pbuf[f]);
/* the end */
for(f=b-1; pbuf[f]==-1; f--);
for(n=f+1; n<b; pbuf[n++]=pbuf[f]);
/* the rest */
for(f=0;;)
{
while(f<b && pbuf[f]!=-1) f++;
if(f==b) break;
for(n=f; pbuf[n]==-1; n++);
if(pbuf[f-1]==pbuf[n])
{
/* just a gap */
while(f<n) pbuf[f++]=pbuf[n];
}
else
{
/* phase change, reanalyze the original data in the gap with zero
threshold for only the two phases that appear at the ends */
for(i=0; i<5; i++)
for(sum[i]=0, j=5*f-15; j<5*f; j+=5)
sum[i]+=buf[i+j];
for(i=f; i<n; i++)
{
pbuf[i]=match(p, sum, pbuf[f-1], pbuf[n], 0);
for(j=0; j<5; j++)
sum[j]=sum[j]-buf[5*(i-3)+j]+buf[5*(i+4)+j];
}
/* estimate the transition point by dividing the gap
in the same proportion as the number of matches of each kind */
for(i=f, m=f; i<n; i++)
if(pbuf[i]==pbuf[f-1]) m++;
/* find the transition of the right direction nearest to the
estimated point */
if(m>f && m<n)
{
for(j=m; j>f; j--)
if(pbuf[j-1]==pbuf[f-1] && pbuf[j]==pbuf[n]) break;
for(s=m; s<n; s++)
if(pbuf[s-1]==pbuf[f-1] && pbuf[s]==pbuf[n]) break;
m=(s-m<m-j)?s:j;
}
/* and rewrite the data to allow only this one transition */
for(i=f; i<m; i++)
pbuf[i]=pbuf[f-1];
for(; i<n; i++)
pbuf[i]=pbuf[n];
f=n;
}
}
free(buf-15);
return 1;
}
static int query_format(struct vf_instance_s* vf, unsigned int fmt)
{
switch(fmt)
{
case IMGFMT_444P: case IMGFMT_IYUV: case IMGFMT_RGB24:
case IMGFMT_422P: case IMGFMT_UYVY: case IMGFMT_BGR24:
case IMGFMT_411P: case IMGFMT_YUY2: case IMGFMT_IF09:
case IMGFMT_YV12: case IMGFMT_I420: case IMGFMT_YVU9:
case IMGFMT_IUYV: case IMGFMT_Y800: case IMGFMT_Y8:
return vf_next_query_format(vf,fmt);
}
return 0;
}
static void uninit(struct vf_instance_s* vf)
{
if(vf->priv)
{
if(vf->priv->file) fclose(vf->priv->file);
if(vf->priv->csdata) free(vf->priv->csdata-15);
free(vf->priv->bdata);
free(vf->priv->history);
free(vf->priv);
}
}
static int open(vf_instance_t *vf, char* args)
{
struct vf_priv_s *p;
char *filename="framediff.log", *ap, *q, *a;
if(args && !(args=strdup(args)))
{
nomem:
mp_msg(MSGT_VFILTER, MSGL_FATAL,
"%s: Not enough memory.\n", vf->info->name);
fail:
uninit(vf);
free(args);
return 0;
}
vf->put_image=put_image;
vf->uninit=uninit;
vf->query_format=query_format;
vf->default_reqs=VFCAP_ACCEPT_STRIDE;
if(!(vf->priv=p=calloc(1, sizeof(struct vf_priv_s))))
goto nomem;
p->phase=5;
p->threshold=0.5;
p->window=30;
if((ap=args))
while(*ap)
{
q=ap;
if((ap=strchr(q, ':'))) *ap++=0; else ap=q+strlen(q);
if((a=strchr(q, '='))) *a++=0; else a=q+strlen(q);
switch(*q)
{
case 0: break;
case 'f': filename=a; break;
case 't': p->threshold=atof(a); break;
case 'w': p->window=5*(atoi(a)+4)/5; break;
case 'd': p->deghost=atoi(a); break;
case 'p':
if(q[1]=='h') p->phase=atoi(a);
else p->pass=atoi(a);
break;
case 'h':
mp_msg(MSGT_VFILTER, MSGL_INFO,
"\n%s options:\n\n"
"pass=1|2 - Use 2-pass mode.\n"
"file=filename - Set the 2-pass log file name "
"(default %s).\n"
"threshold=value - Set the pattern recognition "
"sensitivity (default %g).\n"
"deghost=value - Select deghosting threshold "
"(default %d).\n"
"window=numframes - Set the statistics window "
"for 1-pass mode (default %d).\n"
"phase=0|1|2|3|4 - Set the initial phase "
"for 1-pass mode (default %d).\n\n"
"The option names can be abbreviated to the shortest "
"unique prefix.\n\n",
vf->info->name, filename, p->threshold, p->deghost,
p->window, p->phase%5);
break;
default:
mp_msg(MSGT_VFILTER, MSGL_FATAL,
"%s: Unknown argument %s.\n", vf->info->name, q);
goto fail;
}
}
switch(p->pass)
{
case 1:
if(!(p->file=fopen(filename, "w")))
{
mp_msg(MSGT_VFILTER, MSGL_FATAL,
"%s: Can't create file %s.\n", vf->info->name, filename);
goto fail;
}
break;
case 2:
if(!(p->file=fopen(filename, "r")))
{
mp_msg(MSGT_VFILTER, MSGL_FATAL,
"%s: Can't open file %s.\n", vf->info->name, filename);
goto fail;
}
if(!analyze(p))
goto fail;
fclose(p->file);
p->file=0;
break;
}
if(p->window<5) p->window=5;
if(!(p->history=calloc(sizeof *p->history, p->window)))
goto nomem;
diff=
#ifdef HAVE_MMX
gCpuCaps.hasMMX?diff_MMX:
#endif
diff_C;
free(args);
return 1;
}
vf_info_t vf_info_divtc =
{
"inverse telecine for deinterlaced video",
"divtc",
"Ville Saari",
"",
open,
NULL
};