1
mirror of https://github.com/mpv-player/mpv synced 2024-11-14 22:48:35 +01:00

vo_opengl: enable compute shader for mesa

Mesa 17.1 supports compute shader but not full specs of OpenGL 4.3.
Change the code to detect OpenGL extension "GL_ARB_compute_shader"
rather than OpenGL version 4.3.

HDR peak detection requires SSBO, and polar scaler requires 2D array
extension. Add these extensions as requirement as well.
This commit is contained in:
Bin Jin 2017-07-24 19:15:15 +00:00
parent dbef5b737e
commit 13ef6bcf6f
5 changed files with 26 additions and 4 deletions

View File

@ -357,6 +357,11 @@ static const struct gl_functions gl_functions[] = {
{0},
},
},
{
.ver_core = 430,
.extension = "GL_ARB_arrays_of_arrays",
.provides = MPGL_CAP_NESTED_ARRAY,
},
// Swap control, always an OS specific extension
// The OSX code loads this manually.
{
@ -619,6 +624,10 @@ void mpgl_load_functions2(GL *gl, void *(*get_fn)(void *ctx, const char *n),
mp_verbose(log, "Detected suspected software renderer.\n");
}
// GL_ARB_compute_shader & GL_ARB_shader_image_load_store
if (gl->DispatchCompute && gl->BindImageTexture)
gl->mpgl_caps |= MPGL_CAP_COMPUTE_SHADER;
// Provided for simpler handling if no framebuffer support is available.
if (!gl->BindFramebuffer)
gl->BindFramebuffer = &dummy_glBindFramebuffer;

View File

@ -55,6 +55,8 @@ enum {
MPGL_CAP_ARB_FLOAT = (1 << 19), // GL_ARB_texture_float
MPGL_CAP_EXT_CR_HFLOAT = (1 << 20), // GL_EXT_color_buffer_half_float
MPGL_CAP_SSBO = (1 << 21), // GL_ARB_shader_storage_buffer_object
MPGL_CAP_COMPUTE_SHADER = (1 << 22), // GL_ARB_compute_shader & GL_ARB_shader_image_load_store
MPGL_CAP_NESTED_ARRAY = (1 << 23), // GL_ARB_arrays_of_arrays
MPGL_CAP_SW = (1 << 30), // indirect or sw renderer
};

View File

@ -777,6 +777,8 @@ void gl_sc_uniform_image2D(struct gl_shader_cache *sc, char *name, GLuint textur
void gl_sc_ssbo(struct gl_shader_cache *sc, char *name, GLuint ssbo,
char *format, ...)
{
gl_sc_enable_extension(sc, "GL_ARB_shader_storage_buffer_object");
struct sc_buffer *b = find_buffer(sc, name);
b->binding = sc->next_buffer_binding++;
b->ssbo = ssbo;
@ -1179,6 +1181,11 @@ struct mp_pass_perf gl_sc_generate(struct gl_shader_cache *sc, GLenum type)
// set up shader text (header + uniforms + body)
bstr *header = &sc->tmp[0];
ADD(header, "#version %d%s\n", gl->glsl_version, gl->es >= 300 ? " es" : "");
if (type == GL_COMPUTE_SHADER) {
// This extension cannot be enabled in fragment shader. Enable it as
// an exception for compute shader.
ADD(header, "#extension GL_ARB_compute_shader : enable\n");
}
for (int n = 0; n < sc->num_exts; n++)
ADD(header, "#extension %s : enable\n", sc->exts[n]);
if (gl->es) {

View File

@ -1183,7 +1183,7 @@ static void dispatch_compute(struct gl_video *p, int w, int h, int bw, int bh)
// Clamp the texture coordinates to prevent sampling out-of-bounds in
// threads that exceed the requested width/height
PRELUDE("#define texmap%d(id) min(texcoord%d_rot(id), vec2(1.0))\n", n, n);
PRELUDE("const vec2 texcoord%d = texmap%d(gl_GlobalInvocationID);\n", n, n);
PRELUDE("vec2 texcoord%d = texmap%d(gl_GlobalInvocationID);\n", n, n);
}
pass_record(p, gl_sc_generate(p->sc, GL_COMPUTE_SHADER));
@ -1756,10 +1756,12 @@ static void pass_sample(struct gl_video *p, struct img_tex tex,
} else if (strcmp(name, "oversample") == 0) {
pass_sample_oversample(p->sc, scaler, w, h);
} else if (scaler->kernel && scaler->kernel->polar) {
bool use_compute_polar = (p->gl->mpgl_caps & MPGL_CAP_COMPUTE_SHADER) &&
(p->gl->mpgl_caps & MPGL_CAP_NESTED_ARRAY);
// Use a compute shader where possible, fallback to the slower texture
// fragment sampler otherwise. Also use the fragment shader for
// very large kernels to avoid exhausting shmem
if (p->gl->glsl_version < 430 || scaler->kernel->f.radius > 16) {
if (!use_compute_polar || scaler->kernel->f.radius > 16) {
pass_sample_polar(p->sc, scaler, tex.components, p->gl->glsl_version);
} else {
// For performance we want to load at least as many pixels
@ -3391,7 +3393,8 @@ static void check_gl_features(struct gl_video *p)
bool have_mglsl = gl->glsl_version >= 130; // modern GLSL (1st class arrays etc.)
bool have_texrg = gl->mpgl_caps & MPGL_CAP_TEX_RG;
bool have_tex16 = !gl->es || (gl->mpgl_caps & MPGL_CAP_EXT16);
bool have_compute = gl->glsl_version >= 430; // easiest way to ensure all
bool have_compute = gl->mpgl_caps & MPGL_CAP_COMPUTE_SHADER;
bool have_ssbo = gl->mpgl_caps & MPGL_CAP_SSBO;
const GLint auto_fbo_fmts[] = {GL_RGBA16, GL_RGBA16F, GL_RGB10_A2,
GL_RGBA8, 0};
@ -3502,7 +3505,7 @@ static void check_gl_features(struct gl_video *p)
p->opts.deband = 0;
MP_WARN(p, "Disabling debanding (GLSL version too old).\n");
}
if (!have_compute && p->opts.compute_hdr_peak) {
if ((!have_compute || !have_ssbo) && p->opts.compute_hdr_peak) {
p->opts.compute_hdr_peak = 0;
MP_WARN(p, "Disabling HDR peak computation (no compute shaders).\n");
}

View File

@ -241,6 +241,7 @@ void pass_compute_polar(struct gl_shader_cache *sc, struct scaler *scaler,
gl_sc_uniform_tex(sc, "lut", scaler->gl_target, scaler->gl_lut);
// Load all relevant texels into shmem
gl_sc_enable_extension(sc, "GL_ARB_arrays_of_arrays");
for (int c = 0; c < components; c++)
GLSLHF("shared float in%d[%d][%d];\n", c, ih, iw);