gl_video: improve dithering

Use a different algorithm to generate the dithering matrix. This looks much better than the previous ordered dither matrix with its cross-hatch artifacts. The matrix generation algorithm as well as its implementation was contributed by Wessel Dankers aka Fruit. The code in dither.c is his implementation, reformatted and with static global variables removed by me. The new matrix is uploaded as float texture - before this commit, it was a normal integer fixed point matrix. This means dithering will be disabled on systems without float textures. The size of the dithering matrix can be configured, as the matrix is generated at runtime. The generation of the matrix can take rather long, and is already unacceptable with size 8. The default is at 6, which takes about 100 ms on a Core2 Duo system with dither.c compiled at -O2, which I consider just about acceptable. The old ordered dithering is still available and can be selected by putting the dither=ordered sub-option. The ordered dither matrix generation code was moved to dither.c. This function was originally written by Uoti Urpala.
2024-12-24 07:33:46 +01:00 · 2013-05-26 01:48:39 +02:00 · 2013-05-26 01:48:39 +02:00 · 58a7d81dc5
commit 58a7d81dc5
parent 39225ed196
9 changed files with 349 additions and 24 deletions
--- a/DOCS/man/en/vo.rst
+++ b/DOCS/man/en/vo.rst
@ -342,6 +342,24 @@ opengl
        detected. Often, LCD panels will do dithering on their own, which
        conflicts with vo_opengl's dithering, and leads to ugly output.

+    dither-size=<2-8>
+        Set the size of the dither matrix (default: 6). The actual size of
+        the matrix is ``(N^2) x (N^2)`` for an option value of ``N``, so a
+        value of 6 gives a size of 64x64. The matrix is generated at startup
+        time, and a large matrix can take rather long to compute (seconds).
+
+        Used for ``fruit`` dithering only.
+
+    dither=<fruit|ordered|no>
+        Select dithering algorithm (default: fruit).
+
+    temporal-dither
+        Enable temporal dithering. (Only active if dithering is enabled in
+        general.) This changes between 8 different dithering pattern on each
+        frame by changing the orientation of the tiled dithering matrix.
+        Unfortunately, this can lead to flicker on LCD displays, since these
+        have a high reaction time.
+
    debug
        Check for OpenGL errors, i.e. call glGetError(). Also request a
        debug OpenGL context (which does nothing with current graphics drivers
--- a/2
+++ b/2
@ -90,7 +90,7 @@ SOURCES-$(DIRECT3D)             += video/out/vo_direct3d.c \
 SOURCES-$(DSOUND)               += audio/out/ao_dsound.c
 SOURCES-$(GL)                   += video/out/gl_common.c video/out/gl_osd.c \
                                   video/out/vo_opengl.c video/out/gl_lcms.c \
-                                   video/out/gl_video.c \
+                                   video/out/gl_video.c video/out/dither.c \
                                   video/out/vo_opengl_old.c \
                                   video/out/pnm_loader.c

--- a/video/out/dither.c
+++ b/video/out/dither.c
@ -0,0 +1,239 @@
+/******************************************************************************
+
+    dither.c - generate a dithering matrix for downsampling images
+    Copyright © 2013  Wessel Dankers <wsl@fruit.je>
+    This file is part of mpv.
+
+    mpv is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    mpv is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with mpv.  If not, see <http://www.gnu.org/licenses/>.
+
+    You can alternatively redistribute this file and/or
+    modify it under the terms of the GNU Lesser General Public
+    License as published by the Free Software Foundation; either
+    version 2.1 of the License, or (at your option) any later version.
+
+******************************************************************************/
+
+#include <stdio.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <inttypes.h>
+#include <string.h>
+#include <assert.h>
+#include <math.h>
+
+#include <libavutil/lfg.h>
+
+#include "talloc.h"
+#include "dither.h"
+
+#define MAX_SIZEB 8
+#define MAX_SIZE (1 << MAX_SIZEB)
+#define MAX_SIZE2 (MAX_SIZE * MAX_SIZE)
+
+typedef uint_fast32_t index_t;
+
+#define WRAP_SIZE2(k, x) ((index_t)((index_t)(x) & ((k)->size2 - 1)))
+#define XY(k, x, y) ((index_t)(((x) | ((y) << (k)->sizeb))))
+
+struct ctx {
+    unsigned int sizeb, size, size2;
+    unsigned int gauss_radius;
+    unsigned int gauss_middle;
+    uint64_t gauss[MAX_SIZE2];
+    index_t randomat[MAX_SIZE2];
+    bool calcmat[MAX_SIZE2];
+    uint64_t gaussmat[MAX_SIZE2];
+    index_t unimat[MAX_SIZE2];
+    AVLFG avlfg;
+};
+
+static void makegauss(struct ctx *k, unsigned int sizeb)
+{
+    assert(sizeb >= 1 && sizeb <= MAX_SIZEB);
+
+    memset(k, 0, sizeof(*k));
+    av_lfg_init(&k->avlfg, 123);
+
+    k->sizeb = sizeb;
+    k->size = 1 << k->sizeb;
+    k->size2 = k->size * k->size;
+
+    k->gauss_radius = k->size / 2 - 1;
+    k->gauss_middle = XY(k, k->gauss_radius, k->gauss_radius);
+
+    unsigned int gauss_size = k->gauss_radius * 2 + 1;
+    unsigned int gauss_size2 = gauss_size * gauss_size;
+
+    for (index_t c = 0; c < k->size2; c++)
+        k->gauss[c] = 0;
+
+    long double sigma = -logl(1.5 / UINT64_MAX * gauss_size2) / k->gauss_radius;
+
+    for (index_t gy = 0; gy <= k->gauss_radius; gy++) {
+        for (index_t gx = 0; gx <= gy; gx++) {
+            int cx = (int)gx - k->gauss_radius;
+            int cy = (int)gy - k->gauss_radius;
+            int sq = cx * cx + cy * cy;
+            long double e = expl(-sqrtl(sq) * sigma);
+            uint64_t v = e / gauss_size2 * UINT64_MAX;
+            k->gauss[XY(k, gx, gy)] =
+                k->gauss[XY(k, gy, gx)] =
+                k->gauss[XY(k, gx, gauss_size - 1 - gy)] =
+                k->gauss[XY(k, gy, gauss_size - 1 - gx)] =
+                k->gauss[XY(k, gauss_size - 1 - gx, gy)] =
+                k->gauss[XY(k, gauss_size - 1 - gy, gx)] =
+                k->gauss[XY(k, gauss_size - 1 - gx, gauss_size - 1 - gy)] =
+                k->gauss[XY(k, gauss_size - 1 - gy, gauss_size - 1 - gx)] = v;
+        }
+    }
+    uint64_t total = 0;
+    for (index_t c = 0; c < k->size2; c++) {
+        uint64_t oldtotal = total;
+        total += k->gauss[c];
+        assert(total >= oldtotal);
+    }
+}
+
+static void setbit(struct ctx *k, index_t c)
+{
+    if (k->calcmat[c])
+        return;
+    k->calcmat[c] = true;
+    uint64_t *m = k->gaussmat;
+    uint64_t *me = k->gaussmat + k->size2;
+    uint64_t *g = k->gauss + WRAP_SIZE2(k, k->gauss_middle + k->size2 - c);
+    uint64_t *ge = k->gauss + k->size2;
+    while (g < ge)
+        *m++ += *g++;
+    g = k->gauss;
+    while (m < me)
+        *m++ += *g++;
+}
+
+static index_t getmin(struct ctx *k)
+{
+    uint64_t min = UINT64_MAX;
+    index_t resnum = 0;
+    unsigned int size2 = k->size2;
+    for (index_t c = 0; c < size2; c++) {
+        if (k->calcmat[c])
+            continue;
+        uint64_t total = k->gaussmat[c];
+        if (total <= min) {
+            if (total != min) {
+                min = total;
+                resnum = 0;
+            }
+            k->randomat[resnum++] = c;
+        }
+    }
+    if (resnum == 1)
+        return k->randomat[0];
+    if (resnum == size2)
+        return size2 / 2;
+    return k->randomat[av_lfg_get(&k->avlfg) % resnum];
+}
+
+static void makeuniform(struct ctx *k)
+{
+    unsigned int size2 = k->size2;
+    for (index_t c = 0; c < size2; c++) {
+        index_t r = getmin(k);
+        setbit(k, r);
+        k->unimat[r] = c;
+    }
+}
+
+// out_matrix is a reactangular tsize * tsize array, where tsize = (1 << size).
+void mp_make_fruit_dither_matrix(float *out_matrix, int size)
+{
+    struct ctx *k = talloc(NULL, struct ctx);
+    makegauss(k, size);
+    makeuniform(k);
+    float invscale = k->size2;
+    for(index_t y = 0; y < k->size; y++) {
+        for(index_t x = 0; x < k->size; x++)
+            out_matrix[x + y * k->size] = k->unimat[XY(k, x, y)] / invscale;
+    }
+    talloc_free(k);
+}
+
+void mp_make_ordered_dither_matrix(unsigned char *m, int size)
+{
+    m[0] = 0;
+    for (int sz = 1; sz < size; sz *= 2) {
+        int offset[] = {sz*size, sz, sz * (size+1), 0};
+        for (int i = 0; i < 4; i++)
+            for (int y = 0; y < sz * size; y += size)
+                for (int x = 0; x < sz; x++)
+                    m[x+y+offset[i]] = m[x+y] * 4 + (3-i) * 256/size/size;
+    }
+}
+
+#if 0
+
+static int index_cmp(const void *a, const void *b)
+{
+    index_t x = *(const index_t *)a;
+    index_t y = *(const index_t *)b;
+    return x < y ? -1 : x > y;
+}
+
+static void fsck(struct ctx *k)
+{
+    qsort(k->unimat, k->size2, sizeof k->unimat[0], index_cmp);
+    for (index_t c = 0; c < k->size2; c++)
+        assert(k->unimat[c] == c);
+}
+
+uint16_t r[MAX_SIZE2];
+static void print(struct ctx *k)
+{
+#if 0
+    puts("#include <stdint.h>");
+    printf("static const int mp_dither_size = %d;\n", k->size);
+    printf("static const int mp_dither_size2 = %d;\n", k->size2);
+    printf("static const uint16_t mp_dither_matrix[] = {\n");
+    for(index_t y = 0; y < k->size; y++) {
+        printf("\t");
+        for(index_t x = 0; x < k->size; x++)
+            printf("%4"PRIuFAST32", ", k->unimat[XY(k, x, y)]);
+        printf("\n");
+    }
+    puts("};");
+#else
+    for(index_t y = 0; y < k->size; y++) {
+        for(index_t x = 0; x < k->size; x++)
+            r[XY(k, x, y)] = k->unimat[XY(k, x, y)];
+    }
+#endif
+}
+
+#include "osdep/timer.h"
+int main(void)
+{
+    mp_time_init();
+    struct ctx *k = malloc(sizeof(struct ctx));
+    int64_t s = mp_time_us();
+    makegauss(k, 6);
+    makeuniform(k);
+    print(k);
+    fsck(k);
+    int64_t l = mp_time_us() - s;
+    printf("time: %f ms\n", l / 1000.0);
+    return 0;
+}
+
+#endif
--- a/video/out/dither.h
+++ b/video/out/dither.h
@ -0,0 +1,2 @@
+void mp_make_fruit_dither_matrix(float *out_matrix, int size);
+void mp_make_ordered_dither_matrix(unsigned char *m, int size);
--- a/video/out/gl_common.c
+++ b/video/out/gl_common.c
@ -259,6 +259,7 @@ struct gl_functions gl_functions[] = {
            DEF_FN(Uniform2f),
            DEF_FN(Uniform3f),
            DEF_FN(Uniform1i),
+            DEF_FN(UniformMatrix2fv),
            DEF_FN(UniformMatrix3fv),
            DEF_FN(TexImage3D),
            {0},
--- a/video/out/gl_common.h
+++ b/video/out/gl_common.h
@ -305,6 +305,8 @@ struct GL {
    void (GLAPIENTRY *Uniform3f)(GLint, GLfloat, GLfloat, GLfloat);
    void (GLAPIENTRY *Uniform4f)(GLint, GLfloat, GLfloat, GLfloat, GLfloat);
    void (GLAPIENTRY *Uniform1i)(GLint, GLint);
+    void (GLAPIENTRY *UniformMatrix2fv)(GLint, GLsizei, GLboolean,
+                                        const GLfloat *);
    void (GLAPIENTRY *UniformMatrix3fv)(GLint, GLsizei, GLboolean,
                                        const GLfloat *);
    void (GLAPIENTRY *UniformMatrix4x3fv)(GLint, GLsizei, GLboolean,
--- a/video/out/gl_video.c
+++ b/video/out/gl_video.c
@ -37,6 +37,7 @@
 #include "aspect.h"
 #include "video/memcpy_pic.h"
 #include "bitmap_packer.h"
+#include "dither.h"

 static const char vo_opengl_shaders[] =
 // Generated from gl_video_shaders.glsl
@ -192,6 +193,10 @@ struct gl_video {

    int frames_rendered;

+    // Cached because computing it can take relatively long
+    int last_dither_matrix_size;
+    float *last_dither_matrix;
+
    void *scratch;
 };

@ -229,6 +234,7 @@ static const char *osd_shaders[SUBBITMAP_COUNT] = {
 static const struct gl_video_opts gl_video_opts_def = {
    .npot = 1,
    .dither_depth = -1,
+    .dither_size = 6,
    .fbo_format = GL_RGB,
    .scale_sep = 1,
    .scalers = { "bilinear", "bilinear" },
@ -269,6 +275,10 @@ const struct m_sub_options gl_video_conf = {
                    {"rgba32f", GL_RGBA32F})),
        OPT_CHOICE_OR_INT("dither-depth", dither_depth, 0, -1, 16,
                          ({"no", -1}, {"auto", 0})),
+        OPT_CHOICE("dither", dither_algo, 0,
+                   ({"fruit", 0}, {"ordered", 1}, {"no", -1})),
+        OPT_INTRANGE("dither-size-fruit", dither_size, 0, 2, 8),
+        OPT_FLAG("temporal-dither", temporal_dither, 0),
        OPT_FLAG("alpha", enable_alpha, 0),
        {0}
    },
@ -754,6 +764,7 @@ static void compile_shaders(struct gl_video *p)
    shader_def_opt(&header_final, "USE_3DLUT", p->use_lut_3d);
    shader_def_opt(&header_final, "USE_SRGB", p->opts.srgb);
    shader_def_opt(&header_final, "USE_DITHER", p->dither_texture != 0);
+    shader_def_opt(&header_final, "USE_TEMPORAL_DITHER", p->opts.temporal_dither);

    if (p->opts.scale_sep && p->scalers[0].kernel) {
        header_sep = talloc_strdup(tmp, "");
@ -926,18 +937,6 @@ static void init_scaler(struct gl_video *p, struct scaler *scaler)
    debug_check_gl(p, "after initializing scaler");
 }

-static void make_dither_matrix(unsigned char *m, int size)
-{
-    m[0] = 0;
-    for (int sz = 1; sz < size; sz *= 2) {
-        int offset[] = {sz*size, sz, sz * (size+1), 0};
-        for (int i = 0; i < 4; i++)
-            for (int y = 0; y < sz * size; y += size)
-                for (int x = 0; x < sz; x++)
-                    m[x+y+offset[i]] = m[x+y] * 4 + (3-i) * 256/size/size;
-    }
-}
-
 static void init_dither(struct gl_video *p)
 {
    GL *gl = p->gl;
@ -947,30 +946,54 @@ static void init_dither(struct gl_video *p)
    if (p->opts.dither_depth > 0)
        dst_depth = p->opts.dither_depth;

-    if (p->opts.dither_depth < 0)
+    if (p->opts.dither_depth < 0 || p->opts.dither_algo < 0)
        return;

    mp_msg(MSGT_VO, MSGL_V, "[gl] Dither to %d.\n", dst_depth);

+    int tex_size;
+    void *tex_data;
+    GLenum tex_type;
+    unsigned char temp[256];
+
+    if (p->opts.dither_algo == 0) {
+        int sizeb = p->opts.dither_size;
+        int size = 1 << sizeb;
+
+        if (p->last_dither_matrix_size != size) {
+            p->last_dither_matrix = talloc_realloc(p, p->last_dither_matrix,
+                                                   float, size * size);
+            mp_make_fruit_dither_matrix(p->last_dither_matrix, sizeb);
+            p->last_dither_matrix_size = size;
+        }
+
+        tex_size = size;
+        tex_type = GL_FLOAT;
+        tex_data = p->last_dither_matrix;
+    } else {
+        assert(sizeof(temp) >= 8 * 8);
+        mp_make_ordered_dither_matrix(temp, 8);
+
+        tex_size = 8;
+        tex_type = GL_UNSIGNED_BYTE;
+        tex_data = temp;
+    }
+
    // This defines how many bits are considered significant for output on
-    // screen. The superfluous bits will be used for rounded according to the
+    // screen. The superfluous bits will be used for rounding according to the
    // dither matrix. The precision of the source implicitly decides how many
    // dither patterns can be visible.
    p->dither_quantization = (1 << dst_depth) - 1;
-    int size = 8;
-    p->dither_multiply = p->dither_quantization + 1.0 / (size*size);
-    unsigned char dither[256];
-    make_dither_matrix(dither, size);
-
-    p->dither_size = size;
+    p->dither_multiply = p->dither_quantization + 1.0 / (tex_size * tex_size);
+    p->dither_size = tex_size;

    gl->ActiveTexture(GL_TEXTURE0 + TEXUNIT_DITHER);
    gl->GenTextures(1, &p->dither_texture);
    gl->BindTexture(GL_TEXTURE_2D, p->dither_texture);
    gl->PixelStorei(GL_UNPACK_ALIGNMENT, 1);
    gl->PixelStorei(GL_UNPACK_ROW_LENGTH, 0);
-    gl->TexImage2D(GL_TEXTURE_2D, 0, GL_RED, size, size, 0, GL_RED,
-                   GL_UNSIGNED_BYTE, dither);
+    gl->TexImage2D(GL_TEXTURE_2D, 0, GL_RED, tex_size, tex_size, 0, GL_RED,
+                   tex_type, tex_data);
    gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
    gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
    gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT);
@ -1164,6 +1187,25 @@ static void uninit_video(struct gl_video *p)
    fbotex_uninit(p, &p->scale_sep_fbo);
 }

+static void change_dither_trafo(struct gl_video *p)
+{
+    GL *gl = p->gl;
+    int program = p->final_program;
+
+    int phase = p->frames_rendered % 8u;
+    float r = phase * (M_PI / 2); // rotate
+    float m = phase < 4 ? 1 : -1; // mirror
+
+    gl->UseProgram(program);
+
+    float matrix[2][2] = {{cos(r),     -sin(r)    },
+                          {sin(r) * m,  cos(r) * m}};
+    gl->UniformMatrix2fv(gl->GetUniformLocation(program, "dither_trafo"),
+                         1, GL_TRUE, &matrix[0][0]);
+
+    gl->UseProgram(0);
+}
+
 static void render_to_fbo(struct gl_video *p, struct fbotex *fbo, int w, int h,
                          int tex_w, int tex_h)
 {
@ -1206,6 +1248,9 @@ void gl_video_render_frame(struct gl_video *p)
    struct video_image *vimg = &p->image;
    bool is_flipped = vimg->image_flipped;

+    if (p->opts.temporal_dither)
+        change_dither_trafo(p);
+
    if (p->dst_rect.x0 > p->vp_x || p->dst_rect.y0 > p->vp_y
        || p->dst_rect.x1 < p->vp_x + p->vp_w
        || p->dst_rect.y1 < p->vp_y + p->vp_h)
@ -1274,6 +1319,8 @@ void gl_video_render_frame(struct gl_video *p)

    gl->UseProgram(0);

+    p->frames_rendered++;
+
    debug_check_gl(p, "after video rendering");
 }

@ -1542,6 +1589,14 @@ static void check_gl_features(struct gl_video *p)
        }
    }

+    if (!have_float_tex && p->opts.dither_depth >= 0) {
+        // only fruit dithering uses float textures
+        if (p->opts.dither_algo == 0) {
+            p->opts.dither_depth = -1;
+            disabled[n_disabled++] = "dithering (float tex.)";
+        }
+    }
+
    if (!have_srgb && p->opts.srgb) {
        p->opts.srgb = false;
        disabled[n_disabled++] = "sRGB";
--- a/video/out/gl_video.h
+++ b/video/out/gl_video.h
@ -39,6 +39,9 @@ struct gl_video_opts {
    int npot;
    int pbo;
    int dither_depth;
+    int dither_algo;
+    int dither_size;
+    int temporal_dither;
    int fbo_format;
    int stereo_mode;
    int enable_alpha;
--- a/video/out/gl_video_shaders.glsl
+++ b/video/out/gl_video_shaders.glsl
@ -121,6 +121,7 @@ uniform sampler2D lut_l_2d;
 uniform sampler3D lut_3d;
 uniform sampler2D dither;
 uniform mat4x3 colormatrix;
+uniform mat2 dither_trafo;
 uniform vec3 inv_gamma;
 uniform float input_gamma;
 uniform float conv_gamma;
@ -376,7 +377,11 @@ void main() {
    color.rgb = srgb_compand(color.rgb);
 #endif
 #ifdef USE_DITHER
-    float dither_value = texture(dither, gl_FragCoord.xy / dither_size).r;
+    vec2 dither_pos = gl_FragCoord.xy / dither_size;
+#ifdef USE_TEMPORAL_DITHER
+    dither_pos = dither_trafo * dither_pos;
+#endif
+    float dither_value = texture(dither, dither_pos).r;
    color = floor(color * dither_multiply + dither_value ) / dither_quantization;
 #endif
 #ifdef USE_ALPHA