From 13ef6bcf6fe129614299bb40daa1427a213ed949 Mon Sep 17 00:00:00 2001 From: Bin Jin Date: Mon, 24 Jul 2017 19:15:15 +0000 Subject: [PATCH] vo_opengl: enable compute shader for mesa Mesa 17.1 supports compute shader but not full specs of OpenGL 4.3. Change the code to detect OpenGL extension "GL_ARB_compute_shader" rather than OpenGL version 4.3. HDR peak detection requires SSBO, and polar scaler requires 2D array extension. Add these extensions as requirement as well. --- video/out/opengl/common.c | 9 +++++++++ video/out/opengl/common.h | 2 ++ video/out/opengl/utils.c | 7 +++++++ video/out/opengl/video.c | 11 +++++++---- video/out/opengl/video_shaders.c | 1 + 5 files changed, 26 insertions(+), 4 deletions(-) diff --git a/video/out/opengl/common.c b/video/out/opengl/common.c index 9af21856ab..c7a714817a 100644 --- a/video/out/opengl/common.c +++ b/video/out/opengl/common.c @@ -357,6 +357,11 @@ static const struct gl_functions gl_functions[] = { {0}, }, }, + { + .ver_core = 430, + .extension = "GL_ARB_arrays_of_arrays", + .provides = MPGL_CAP_NESTED_ARRAY, + }, // Swap control, always an OS specific extension // The OSX code loads this manually. { @@ -619,6 +624,10 @@ void mpgl_load_functions2(GL *gl, void *(*get_fn)(void *ctx, const char *n), mp_verbose(log, "Detected suspected software renderer.\n"); } + // GL_ARB_compute_shader & GL_ARB_shader_image_load_store + if (gl->DispatchCompute && gl->BindImageTexture) + gl->mpgl_caps |= MPGL_CAP_COMPUTE_SHADER; + // Provided for simpler handling if no framebuffer support is available. if (!gl->BindFramebuffer) gl->BindFramebuffer = &dummy_glBindFramebuffer; diff --git a/video/out/opengl/common.h b/video/out/opengl/common.h index eec7806624..6d8015c8b3 100644 --- a/video/out/opengl/common.h +++ b/video/out/opengl/common.h @@ -55,6 +55,8 @@ enum { MPGL_CAP_ARB_FLOAT = (1 << 19), // GL_ARB_texture_float MPGL_CAP_EXT_CR_HFLOAT = (1 << 20), // GL_EXT_color_buffer_half_float MPGL_CAP_SSBO = (1 << 21), // GL_ARB_shader_storage_buffer_object + MPGL_CAP_COMPUTE_SHADER = (1 << 22), // GL_ARB_compute_shader & GL_ARB_shader_image_load_store + MPGL_CAP_NESTED_ARRAY = (1 << 23), // GL_ARB_arrays_of_arrays MPGL_CAP_SW = (1 << 30), // indirect or sw renderer }; diff --git a/video/out/opengl/utils.c b/video/out/opengl/utils.c index afbd6f65af..451010fffa 100644 --- a/video/out/opengl/utils.c +++ b/video/out/opengl/utils.c @@ -777,6 +777,8 @@ void gl_sc_uniform_image2D(struct gl_shader_cache *sc, char *name, GLuint textur void gl_sc_ssbo(struct gl_shader_cache *sc, char *name, GLuint ssbo, char *format, ...) { + gl_sc_enable_extension(sc, "GL_ARB_shader_storage_buffer_object"); + struct sc_buffer *b = find_buffer(sc, name); b->binding = sc->next_buffer_binding++; b->ssbo = ssbo; @@ -1179,6 +1181,11 @@ struct mp_pass_perf gl_sc_generate(struct gl_shader_cache *sc, GLenum type) // set up shader text (header + uniforms + body) bstr *header = &sc->tmp[0]; ADD(header, "#version %d%s\n", gl->glsl_version, gl->es >= 300 ? " es" : ""); + if (type == GL_COMPUTE_SHADER) { + // This extension cannot be enabled in fragment shader. Enable it as + // an exception for compute shader. + ADD(header, "#extension GL_ARB_compute_shader : enable\n"); + } for (int n = 0; n < sc->num_exts; n++) ADD(header, "#extension %s : enable\n", sc->exts[n]); if (gl->es) { diff --git a/video/out/opengl/video.c b/video/out/opengl/video.c index d4f746e3a2..e1fd60646a 100644 --- a/video/out/opengl/video.c +++ b/video/out/opengl/video.c @@ -1183,7 +1183,7 @@ static void dispatch_compute(struct gl_video *p, int w, int h, int bw, int bh) // Clamp the texture coordinates to prevent sampling out-of-bounds in // threads that exceed the requested width/height PRELUDE("#define texmap%d(id) min(texcoord%d_rot(id), vec2(1.0))\n", n, n); - PRELUDE("const vec2 texcoord%d = texmap%d(gl_GlobalInvocationID);\n", n, n); + PRELUDE("vec2 texcoord%d = texmap%d(gl_GlobalInvocationID);\n", n, n); } pass_record(p, gl_sc_generate(p->sc, GL_COMPUTE_SHADER)); @@ -1756,10 +1756,12 @@ static void pass_sample(struct gl_video *p, struct img_tex tex, } else if (strcmp(name, "oversample") == 0) { pass_sample_oversample(p->sc, scaler, w, h); } else if (scaler->kernel && scaler->kernel->polar) { + bool use_compute_polar = (p->gl->mpgl_caps & MPGL_CAP_COMPUTE_SHADER) && + (p->gl->mpgl_caps & MPGL_CAP_NESTED_ARRAY); // Use a compute shader where possible, fallback to the slower texture // fragment sampler otherwise. Also use the fragment shader for // very large kernels to avoid exhausting shmem - if (p->gl->glsl_version < 430 || scaler->kernel->f.radius > 16) { + if (!use_compute_polar || scaler->kernel->f.radius > 16) { pass_sample_polar(p->sc, scaler, tex.components, p->gl->glsl_version); } else { // For performance we want to load at least as many pixels @@ -3391,7 +3393,8 @@ static void check_gl_features(struct gl_video *p) bool have_mglsl = gl->glsl_version >= 130; // modern GLSL (1st class arrays etc.) bool have_texrg = gl->mpgl_caps & MPGL_CAP_TEX_RG; bool have_tex16 = !gl->es || (gl->mpgl_caps & MPGL_CAP_EXT16); - bool have_compute = gl->glsl_version >= 430; // easiest way to ensure all + bool have_compute = gl->mpgl_caps & MPGL_CAP_COMPUTE_SHADER; + bool have_ssbo = gl->mpgl_caps & MPGL_CAP_SSBO; const GLint auto_fbo_fmts[] = {GL_RGBA16, GL_RGBA16F, GL_RGB10_A2, GL_RGBA8, 0}; @@ -3502,7 +3505,7 @@ static void check_gl_features(struct gl_video *p) p->opts.deband = 0; MP_WARN(p, "Disabling debanding (GLSL version too old).\n"); } - if (!have_compute && p->opts.compute_hdr_peak) { + if ((!have_compute || !have_ssbo) && p->opts.compute_hdr_peak) { p->opts.compute_hdr_peak = 0; MP_WARN(p, "Disabling HDR peak computation (no compute shaders).\n"); } diff --git a/video/out/opengl/video_shaders.c b/video/out/opengl/video_shaders.c index fe6e944168..854c829f1d 100644 --- a/video/out/opengl/video_shaders.c +++ b/video/out/opengl/video_shaders.c @@ -241,6 +241,7 @@ void pass_compute_polar(struct gl_shader_cache *sc, struct scaler *scaler, gl_sc_uniform_tex(sc, "lut", scaler->gl_target, scaler->gl_lut); // Load all relevant texels into shmem + gl_sc_enable_extension(sc, "GL_ARB_arrays_of_arrays"); for (int c = 0; c < components; c++) GLSLHF("shared float in%d[%d][%d];\n", c, ih, iw);