From 08e97dae205d10806a0360bfc62f654d629dda93 Mon Sep 17 00:00:00 2001 From: Paul B Mahol Date: Wed, 8 Nov 2023 14:31:50 +0100 Subject: [PATCH] avfilter/af_adynamicequalizer: add adaptive detection mode --- doc/filters.texi | 2 + libavfilter/adynamicequalizer_template.c | 120 +++++++++++++++++++++++ libavfilter/af_adynamicequalizer.c | 27 +++++ 3 files changed, 149 insertions(+) diff --git a/doc/filters.texi b/doc/filters.texi index 13c18a2574..d83a3fb91e 100644 --- a/doc/filters.texi +++ b/doc/filters.texi @@ -1100,6 +1100,8 @@ Disable using automatically gathered threshold value. Stop picking threshold value. @item on Start picking threshold value. +@item adaptive +Adaptively pick threshold value, by calculating sliding window entropy. @end table @item precision diff --git a/libavfilter/adynamicequalizer_template.c b/libavfilter/adynamicequalizer_template.c index 653d51c3a9..4eb2489cd6 100644 --- a/libavfilter/adynamicequalizer_template.c +++ b/libavfilter/adynamicequalizer_template.c @@ -27,7 +27,11 @@ #undef CLIP #undef SAMPLE_FORMAT #undef FABS +#undef FLOG +#undef FEXP +#undef FLOG2 #undef FLOG10 +#undef FEXP2 #undef FEXP10 #undef EPSILON #if DEPTH == 32 @@ -41,7 +45,11 @@ #define FMAX fmaxf #define CLIP av_clipf #define FABS fabsf +#define FLOG logf +#define FEXP expf +#define FLOG2 log2f #define FLOG10 log10f +#define FEXP2 exp2f #define FEXP10 ff_exp10f #define EPSILON (1.f / (1 << 23)) #define ftype float @@ -56,7 +64,11 @@ #define FMAX fmax #define CLIP av_clipd #define FABS fabs +#define FLOG log +#define FEXP exp +#define FLOG2 log2 #define FLOG10 log10 +#define FEXP2 exp2 #define FEXP10 ff_exp10 #define EPSILON (1.0 / (1LL << 53)) #define ftype double @@ -150,6 +162,92 @@ static int fn(filter_prepare)(AVFilterContext *ctx) return 0; } +#define PEAKS(empty_value,op,sample, psample)\ + if (!empty && psample == ss[front]) { \ + ss[front] = empty_value; \ + if (back != front) { \ + front--; \ + if (front < 0) \ + front = n - 1; \ + } \ + empty = front == back; \ + } \ + \ + if (!empty && sample op ss[front]) { \ + while (1) { \ + ss[front] = empty_value; \ + if (back == front) { \ + empty = 1; \ + break; \ + } \ + front--; \ + if (front < 0) \ + front = n - 1; \ + } \ + } \ + \ + while (!empty && sample op ss[back]) { \ + ss[back] = empty_value; \ + if (back == front) { \ + empty = 1; \ + break; \ + } \ + back++; \ + if (back >= n) \ + back = 0; \ + } \ + \ + if (!empty) { \ + back--; \ + if (back < 0) \ + back = n - 1; \ + } + +static void fn(queue_sample)(ChannelContext *cc, + const ftype x, + const int nb_samples) +{ + ftype *ss = cc->dqueue; + ftype *qq = cc->queue; + int front = cc->front; + int back = cc->back; + int empty, n, pos = cc->position; + ftype px = qq[pos]; + + fn(cc->sum) += x; + fn(cc->log_sum) += FLOG2(x); + if (cc->size >= nb_samples) { + fn(cc->sum) -= px; + fn(cc->log_sum) -= FLOG2(px); + } + + qq[pos] = x; + pos++; + if (pos >= nb_samples) + pos = 0; + cc->position = pos; + + if (cc->size < nb_samples) + cc->size++; + n = cc->size; + + empty = (front == back) && (ss[front] == ZERO); + PEAKS(ZERO, >, x, px) + + ss[back] = x; + + cc->front = front; + cc->back = back; +} + +static ftype fn(get_peak)(ChannelContext *cc, ftype *score) +{ + ftype s, *ss = cc->dqueue; + s = FEXP2(fn(cc->log_sum) / cc->size) / (fn(cc->sum) / cc->size); + *score = LIN2LOG(s); + return ss[cc->front]; +} + static int fn(filter_channels)(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) { AudioDynamicEqualizerContext *s = ctx->priv; @@ -157,6 +255,7 @@ static int fn(filter_channels)(AVFilterContext *ctx, void *arg, int jobnr, int n AVFrame *in = td->in; AVFrame *out = td->out; const ftype sample_rate = in->sample_rate; + const int isample_rate = in->sample_rate; const ftype makeup = s->makeup; const ftype ratio = s->ratio; const ftype range = s->range; @@ -197,6 +296,27 @@ static int fn(filter_channels)(AVFilterContext *ctx, void *arg, int jobnr, int n fn(cc->new_threshold_log) = FMAX(fn(cc->new_threshold_log), LIN2LOG(new_threshold)); } + } else if (detection == DET_ADAPTIVE) { + for (int ch = start; ch < end; ch++) { + const ftype *src = (const ftype *)in->extended_data[ch]; + ChannelContext *cc = &s->cc[ch]; + ftype *tstate = fn(cc->tstate); + ftype score, peak; + + for (int n = 0; n < in->nb_samples; n++) { + ftype detect = FMAX(FABS(fn(get_svf)(src[n], dm, da, tstate)), EPSILON); + fn(queue_sample)(cc, detect, isample_rate); + } + + peak = fn(get_peak)(cc, &score); + + if (score >= -3.5) { + fn(cc->threshold_log) = LIN2LOG(peak); + } else if (cc->detection == DET_UNSET) { + fn(cc->threshold_log) = s->threshold_log; + } + cc->detection = detection; + } } else if (detection == DET_DISABLED) { for (int ch = start; ch < end; ch++) { ChannelContext *cc = &s->cc[ch]; diff --git a/libavfilter/af_adynamicequalizer.c b/libavfilter/af_adynamicequalizer.c index 1926ae8ec1..611e542c1b 100644 --- a/libavfilter/af_adynamicequalizer.c +++ b/libavfilter/af_adynamicequalizer.c @@ -29,6 +29,7 @@ enum DetectionModes { DET_DISABLED, DET_OFF, DET_ON, + DET_ADAPTIVE, NB_DMODES, }; @@ -50,6 +51,8 @@ typedef struct ChannelContext { double detect_double; double threshold_log_double; double new_threshold_log_double; + double log_sum_double; + double sum_double; float fa_float[3], fm_float[3]; float dstate_float[2]; float fstate_float[2]; @@ -58,6 +61,14 @@ typedef struct ChannelContext { float detect_float; float threshold_log_float; float new_threshold_log_float; + float log_sum_float; + float sum_float; + void *dqueue; + void *queue; + int position; + int size; + int front; + int back; int detection; int init; } ChannelContext; @@ -86,6 +97,7 @@ typedef struct AudioDynamicEqualizerContext { int dftype; int precision; int format; + int nb_channels; int (*filter_prepare)(AVFilterContext *ctx); int (*filter_channels)(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs); @@ -140,6 +152,7 @@ static int config_input(AVFilterLink *inlink) s->cc = av_calloc(inlink->ch_layout.nb_channels, sizeof(*s->cc)); if (!s->cc) return AVERROR(ENOMEM); + s->nb_channels = inlink->ch_layout.nb_channels; switch (s->format) { case AV_SAMPLE_FMT_DBLP: @@ -152,6 +165,14 @@ static int config_input(AVFilterLink *inlink) break; } + for (int ch = 0; ch < s->nb_channels; ch++) { + ChannelContext *cc = &s->cc[ch]; + cc->queue = av_calloc(inlink->sample_rate, sizeof(double)); + cc->dqueue = av_calloc(inlink->sample_rate, sizeof(double)); + if (!cc->queue || !cc->dqueue) + return AVERROR(ENOMEM); + } + return 0; } @@ -189,6 +210,11 @@ static av_cold void uninit(AVFilterContext *ctx) { AudioDynamicEqualizerContext *s = ctx->priv; + for (int ch = 0; ch < s->nb_channels; ch++) { + ChannelContext *cc = &s->cc[ch]; + av_freep(&cc->queue); + av_freep(&cc->dqueue); + } av_freep(&s->cc); } @@ -226,6 +252,7 @@ static const AVOption adynamicequalizer_options[] = { { "disabled", 0, 0, AV_OPT_TYPE_CONST, {.i64=DET_DISABLED}, 0, 0, FLAGS, "auto" }, { "off", 0, 0, AV_OPT_TYPE_CONST, {.i64=DET_OFF}, 0, 0, FLAGS, "auto" }, { "on", 0, 0, AV_OPT_TYPE_CONST, {.i64=DET_ON}, 0, 0, FLAGS, "auto" }, + { "adaptive", 0, 0, AV_OPT_TYPE_CONST, {.i64=DET_ADAPTIVE}, 0, 0, FLAGS, "auto" }, { "precision", "set processing precision", OFFSET(precision), AV_OPT_TYPE_INT, {.i64=0}, 0, 2, AF, "precision" }, { "auto", "set auto processing precision", 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, AF, "precision" }, { "float", "set single-floating point processing precision", 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, AF, "precision" },