avfilter/af_adynamicequalizer: add adaptive detection mode

This commit is contained in:
Paul B Mahol 2023-11-08 14:31:50 +01:00
parent 82be1e5c0d
commit 08e97dae20
3 changed files with 149 additions and 0 deletions

View File

@ -1100,6 +1100,8 @@ Disable using automatically gathered threshold value.
Stop picking threshold value.
@item on
Start picking threshold value.
@item adaptive
Adaptively pick threshold value, by calculating sliding window entropy.
@end table
@item precision

View File

@ -27,7 +27,11 @@
#undef CLIP
#undef SAMPLE_FORMAT
#undef FABS
#undef FLOG
#undef FEXP
#undef FLOG2
#undef FLOG10
#undef FEXP2
#undef FEXP10
#undef EPSILON
#if DEPTH == 32
@ -41,7 +45,11 @@
#define FMAX fmaxf
#define CLIP av_clipf
#define FABS fabsf
#define FLOG logf
#define FEXP expf
#define FLOG2 log2f
#define FLOG10 log10f
#define FEXP2 exp2f
#define FEXP10 ff_exp10f
#define EPSILON (1.f / (1 << 23))
#define ftype float
@ -56,7 +64,11 @@
#define FMAX fmax
#define CLIP av_clipd
#define FABS fabs
#define FLOG log
#define FEXP exp
#define FLOG2 log2
#define FLOG10 log10
#define FEXP2 exp2
#define FEXP10 ff_exp10
#define EPSILON (1.0 / (1LL << 53))
#define ftype double
@ -150,6 +162,92 @@ static int fn(filter_prepare)(AVFilterContext *ctx)
return 0;
}
#define PEAKS(empty_value,op,sample, psample)\
if (!empty && psample == ss[front]) { \
ss[front] = empty_value; \
if (back != front) { \
front--; \
if (front < 0) \
front = n - 1; \
} \
empty = front == back; \
} \
\
if (!empty && sample op ss[front]) { \
while (1) { \
ss[front] = empty_value; \
if (back == front) { \
empty = 1; \
break; \
} \
front--; \
if (front < 0) \
front = n - 1; \
} \
} \
\
while (!empty && sample op ss[back]) { \
ss[back] = empty_value; \
if (back == front) { \
empty = 1; \
break; \
} \
back++; \
if (back >= n) \
back = 0; \
} \
\
if (!empty) { \
back--; \
if (back < 0) \
back = n - 1; \
}
static void fn(queue_sample)(ChannelContext *cc,
const ftype x,
const int nb_samples)
{
ftype *ss = cc->dqueue;
ftype *qq = cc->queue;
int front = cc->front;
int back = cc->back;
int empty, n, pos = cc->position;
ftype px = qq[pos];
fn(cc->sum) += x;
fn(cc->log_sum) += FLOG2(x);
if (cc->size >= nb_samples) {
fn(cc->sum) -= px;
fn(cc->log_sum) -= FLOG2(px);
}
qq[pos] = x;
pos++;
if (pos >= nb_samples)
pos = 0;
cc->position = pos;
if (cc->size < nb_samples)
cc->size++;
n = cc->size;
empty = (front == back) && (ss[front] == ZERO);
PEAKS(ZERO, >, x, px)
ss[back] = x;
cc->front = front;
cc->back = back;
}
static ftype fn(get_peak)(ChannelContext *cc, ftype *score)
{
ftype s, *ss = cc->dqueue;
s = FEXP2(fn(cc->log_sum) / cc->size) / (fn(cc->sum) / cc->size);
*score = LIN2LOG(s);
return ss[cc->front];
}
static int fn(filter_channels)(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
{
AudioDynamicEqualizerContext *s = ctx->priv;
@ -157,6 +255,7 @@ static int fn(filter_channels)(AVFilterContext *ctx, void *arg, int jobnr, int n
AVFrame *in = td->in;
AVFrame *out = td->out;
const ftype sample_rate = in->sample_rate;
const int isample_rate = in->sample_rate;
const ftype makeup = s->makeup;
const ftype ratio = s->ratio;
const ftype range = s->range;
@ -197,6 +296,27 @@ static int fn(filter_channels)(AVFilterContext *ctx, void *arg, int jobnr, int n
fn(cc->new_threshold_log) = FMAX(fn(cc->new_threshold_log), LIN2LOG(new_threshold));
}
} else if (detection == DET_ADAPTIVE) {
for (int ch = start; ch < end; ch++) {
const ftype *src = (const ftype *)in->extended_data[ch];
ChannelContext *cc = &s->cc[ch];
ftype *tstate = fn(cc->tstate);
ftype score, peak;
for (int n = 0; n < in->nb_samples; n++) {
ftype detect = FMAX(FABS(fn(get_svf)(src[n], dm, da, tstate)), EPSILON);
fn(queue_sample)(cc, detect, isample_rate);
}
peak = fn(get_peak)(cc, &score);
if (score >= -3.5) {
fn(cc->threshold_log) = LIN2LOG(peak);
} else if (cc->detection == DET_UNSET) {
fn(cc->threshold_log) = s->threshold_log;
}
cc->detection = detection;
}
} else if (detection == DET_DISABLED) {
for (int ch = start; ch < end; ch++) {
ChannelContext *cc = &s->cc[ch];

View File

@ -29,6 +29,7 @@ enum DetectionModes {
DET_DISABLED,
DET_OFF,
DET_ON,
DET_ADAPTIVE,
NB_DMODES,
};
@ -50,6 +51,8 @@ typedef struct ChannelContext {
double detect_double;
double threshold_log_double;
double new_threshold_log_double;
double log_sum_double;
double sum_double;
float fa_float[3], fm_float[3];
float dstate_float[2];
float fstate_float[2];
@ -58,6 +61,14 @@ typedef struct ChannelContext {
float detect_float;
float threshold_log_float;
float new_threshold_log_float;
float log_sum_float;
float sum_float;
void *dqueue;
void *queue;
int position;
int size;
int front;
int back;
int detection;
int init;
} ChannelContext;
@ -86,6 +97,7 @@ typedef struct AudioDynamicEqualizerContext {
int dftype;
int precision;
int format;
int nb_channels;
int (*filter_prepare)(AVFilterContext *ctx);
int (*filter_channels)(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs);
@ -140,6 +152,7 @@ static int config_input(AVFilterLink *inlink)
s->cc = av_calloc(inlink->ch_layout.nb_channels, sizeof(*s->cc));
if (!s->cc)
return AVERROR(ENOMEM);
s->nb_channels = inlink->ch_layout.nb_channels;
switch (s->format) {
case AV_SAMPLE_FMT_DBLP:
@ -152,6 +165,14 @@ static int config_input(AVFilterLink *inlink)
break;
}
for (int ch = 0; ch < s->nb_channels; ch++) {
ChannelContext *cc = &s->cc[ch];
cc->queue = av_calloc(inlink->sample_rate, sizeof(double));
cc->dqueue = av_calloc(inlink->sample_rate, sizeof(double));
if (!cc->queue || !cc->dqueue)
return AVERROR(ENOMEM);
}
return 0;
}
@ -189,6 +210,11 @@ static av_cold void uninit(AVFilterContext *ctx)
{
AudioDynamicEqualizerContext *s = ctx->priv;
for (int ch = 0; ch < s->nb_channels; ch++) {
ChannelContext *cc = &s->cc[ch];
av_freep(&cc->queue);
av_freep(&cc->dqueue);
}
av_freep(&s->cc);
}
@ -226,6 +252,7 @@ static const AVOption adynamicequalizer_options[] = {
{ "disabled", 0, 0, AV_OPT_TYPE_CONST, {.i64=DET_DISABLED}, 0, 0, FLAGS, "auto" },
{ "off", 0, 0, AV_OPT_TYPE_CONST, {.i64=DET_OFF}, 0, 0, FLAGS, "auto" },
{ "on", 0, 0, AV_OPT_TYPE_CONST, {.i64=DET_ON}, 0, 0, FLAGS, "auto" },
{ "adaptive", 0, 0, AV_OPT_TYPE_CONST, {.i64=DET_ADAPTIVE}, 0, 0, FLAGS, "auto" },
{ "precision", "set processing precision", OFFSET(precision), AV_OPT_TYPE_INT, {.i64=0}, 0, 2, AF, "precision" },
{ "auto", "set auto processing precision", 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, AF, "precision" },
{ "float", "set single-floating point processing precision", 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, AF, "precision" },