avfilter/vf_maskedmerge: fix rounding when masking

This commit is contained in:
Paul B Mahol 2022-03-02 22:30:40 +01:00
parent 59520f068d
commit dae95b3ffd
3 changed files with 23 additions and 16 deletions

View File

@ -30,7 +30,7 @@ typedef struct MaskedMergeContext {
int linesize[4];
int nb_planes;
int planes;
int half, depth;
int half, depth, max;
FFFrameSync fs;
void (*maskedmerge)(const uint8_t *bsrc, const uint8_t *osrc,

View File

@ -96,7 +96,7 @@ static int filter_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
base->linesize[p], overlay->linesize[p],
mask->linesize[p], out->linesize[p],
s->width[p], slice_end - slice_start,
s->half, s->depth);
s->half, s->max);
}
return 0;
@ -138,13 +138,13 @@ static int process_frame(FFFrameSync *fs)
return ff_filter_frame(outlink, out);
}
#define MASKEDMERGE(n, type, half, shift) \
#define MASKEDMERGE(n, type, ctype, half, max, div) \
static void maskedmerge##n(const uint8_t *bbsrc, const uint8_t *oosrc, \
const uint8_t *mmsrc, uint8_t *ddst, \
ptrdiff_t blinesize, ptrdiff_t olinesize, \
ptrdiff_t mlinesize, ptrdiff_t dlinesize, \
int w, int h, \
int hhalf, int sshift) \
int hhalf, int mmax) \
{ \
const type *bsrc = (const type *)bbsrc; \
const type *osrc = (const type *)oosrc; \
@ -158,7 +158,10 @@ static void maskedmerge##n(const uint8_t *bbsrc, const uint8_t *oosrc, \
\
for (int y = 0; y < h; y++) { \
for (int x = 0; x < w; x++) { \
dst[x] = bsrc[x] + ((msrc[x] * (osrc[x] - bsrc[x]) + half) shift); \
const type invm = max - msrc[x]; \
const ctype r = ((ctype)(bsrc[x] * invm) + \
(ctype)(msrc[x] * osrc[x] + half)) div; \
dst[x] = r; \
} \
\
dst += dlinesize; \
@ -168,9 +171,9 @@ static void maskedmerge##n(const uint8_t *bbsrc, const uint8_t *oosrc, \
} \
}
MASKEDMERGE(8, uint8_t, 128, >> 8)
MASKEDMERGE(16, uint16_t, hhalf, >> sshift)
MASKEDMERGE(32, float, 0.f, + 0.f)
MASKEDMERGE(8, uint8_t, uint16_t, 127, 255, / 255)
MASKEDMERGE(16, uint16_t, uint32_t, hhalf, mmax, / mmax)
MASKEDMERGE(32, float, float, 0.f, 1.f, + 0.f)
static int config_input(AVFilterLink *inlink)
{
@ -189,7 +192,8 @@ static int config_input(AVFilterLink *inlink)
s->width[0] = s->width[3] = inlink->w;
s->depth = desc->comp[0].depth;
s->half = (1 << s->depth) / 2;
s->max = (1 << s->depth) - 1;
s->half = s->max / 2;
if (s->depth == 8)
s->maskedmerge = maskedmerge8;

View File

@ -24,26 +24,28 @@
SECTION_RODATA
pw_128: times 8 dw 128
pw_256: times 8 dw 256
pw_127: times 8 dw 127
pw_255: times 8 dw 255
pw_32897: times 8 dw 32897
SECTION .text
INIT_XMM sse2
%if ARCH_X86_64
cglobal maskedmerge8, 8, 11, 7, bsrc, osrc, msrc, dst, blinesize, olinesize, mlinesize, dlinesize, w, h, x
cglobal maskedmerge8, 8, 11, 8, bsrc, osrc, msrc, dst, blinesize, olinesize, mlinesize, dlinesize, w, h, x
mov wd, dword wm
mov hd, dword hm
%else
cglobal maskedmerge8, 5, 7, 7, bsrc, osrc, msrc, dst, blinesize, w, x
cglobal maskedmerge8, 5, 7, 8, bsrc, osrc, msrc, dst, blinesize, w, x
mov wd, r8m
%define olinesizeq r5mp
%define mlinesizeq r6mp
%define dlinesizeq r7mp
%define hd r9mp
%endif
mova m4, [pw_256]
mova m5, [pw_128]
mova m4, [pw_255]
mova m5, [pw_127]
mova m7, [pw_32897]
pxor m6, m6
add bsrcq, wq
add osrcq, wq
@ -66,7 +68,8 @@ cglobal maskedmerge8, 5, 7, 7, bsrc, osrc, msrc, dst, blinesize, w, x
pmullw m1, m3
paddw m1, m2
paddw m1, m5
psrlw m1, 8
pmulhuw m1, m7
psrlw m1, 7
packuswb m1, m1
movh [dstq + xq], m1
add xq, mmsize / 2