diff --git a/DOCS/man/vo.rst b/DOCS/man/vo.rst index 69c554fa16..0725cf19e6 100644 --- a/DOCS/man/vo.rst +++ b/DOCS/man/vo.rst @@ -804,7 +804,7 @@ Available video output drivers are: angle Direct3D11 through the OpenGL ES translation layer ANGLE. This supports almost everything the ``win`` backend does, except ICC - profiles, high bit depth video input, and the ``nnedi3`` prescaler. + profiles, and the ``nnedi3`` prescaler. dxinterop (experimental) Win32, using WGL for rendering and Direct3D 9Ex for presentation. Works on Nvidia and AMD only. diff --git a/video/out/opengl/utils.c b/video/out/opengl/utils.c index 8247588341..588b6cddf8 100644 --- a/video/out/opengl/utils.c +++ b/video/out/opengl/utils.c @@ -670,6 +670,15 @@ void gl_sc_uniform_sampler(struct gl_shader_cache *sc, char *name, GLenum target u->v.i[0] = unit; } +void gl_sc_uniform_sampler_ui(struct gl_shader_cache *sc, char *name, int unit) +{ + struct sc_uniform *u = find_uniform(sc, name); + u->type = UT_i; + u->size = 1; + u->glsl_type = "usampler2D"; + u->v.i[0] = unit; +} + void gl_sc_uniform_f(struct gl_shader_cache *sc, char *name, GLfloat f) { struct sc_uniform *u = find_uniform(sc, name); diff --git a/video/out/opengl/utils.h b/video/out/opengl/utils.h index 280be0396d..3ec6077bf5 100644 --- a/video/out/opengl/utils.h +++ b/video/out/opengl/utils.h @@ -126,6 +126,7 @@ void gl_sc_hadd(struct gl_shader_cache *sc, const char *text); void gl_sc_haddf(struct gl_shader_cache *sc, const char *textf, ...); void gl_sc_uniform_sampler(struct gl_shader_cache *sc, char *name, GLenum target, int unit); +void gl_sc_uniform_sampler_ui(struct gl_shader_cache *sc, char *name, int unit); void gl_sc_uniform_f(struct gl_shader_cache *sc, char *name, GLfloat f); void gl_sc_uniform_i(struct gl_shader_cache *sc, char *name, GLint f); void gl_sc_uniform_vec2(struct gl_shader_cache *sc, char *name, GLfloat f[2]); diff --git a/video/out/opengl/video.c b/video/out/opengl/video.c index 9a3b0c16f2..bbf01f2cbb 100644 --- a/video/out/opengl/video.c +++ b/video/out/opengl/video.c @@ -93,6 +93,7 @@ struct texplane { int w, h; GLint gl_internal_format; GLenum gl_target; + bool use_integer; GLenum gl_format; GLenum gl_type; GLuint gl_texture; @@ -115,6 +116,7 @@ struct fbosurface { struct src_tex { GLuint gl_tex; GLenum gl_target; + bool use_integer; int w, h; struct mp_rect_f src; }; @@ -160,10 +162,12 @@ struct gl_video { bool is_yuv, is_packed_yuv; bool has_alpha; char color_swizzle[5]; + bool use_integer_conversion; struct video_image image; bool dumb_mode; + bool forced_dumb_mode; struct fbotex chroma_merge_fbo; struct fbotex chroma_deband_fbo; @@ -173,6 +177,7 @@ struct gl_video { struct fbotex output_fbo; struct fbotex deband_fbo; struct fbosurface surfaces[FBOSURFACES_MAX]; + struct fbotex integer_conv_fbo[4]; // these are duplicated so we can keep rendering back and forth between // them to support an unlimited number of shader passes per step @@ -260,6 +265,17 @@ static const struct fmt_entry gl_byte_formats_gles3[] = { {0, 0, 0, 0}, // 4 x 16 }; +static const struct fmt_entry gl_ui_byte_formats_gles3[] = { + {0, GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE}, // 1 x 8 + {0, GL_RG8UI, GL_RG_INTEGER, GL_UNSIGNED_BYTE}, // 2 x 8 + {0, GL_RGB8UI, GL_RGB_INTEGER, GL_UNSIGNED_BYTE}, // 3 x 8 + {0, GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE}, // 4 x 8 + {0, GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT}, // 1 x 16 + {0, GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT}, // 2 x 16 + {0, GL_RGB16UI, GL_RGB_INTEGER, GL_UNSIGNED_SHORT}, // 3 x 16 + {0, GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT}, // 4 x 16 +}; + static const struct fmt_entry gl_byte_formats_gles2[] = { {0, GL_LUMINANCE, GL_LUMINANCE, GL_UNSIGNED_BYTE}, // 1 x 8 {0, GL_LUMINANCE_ALPHA, GL_LUMINANCE_ALPHA, GL_UNSIGNED_BYTE}, // 2 x 8 @@ -507,6 +523,7 @@ static void get_scale_factors(struct gl_video *p, double xy[2]); #define GLSL(x) gl_sc_add(p->sc, #x "\n"); #define GLSLF(...) gl_sc_addf(p->sc, __VA_ARGS__) +// Return a fixed point texture format with given characteristics. static const struct fmt_entry *find_tex_format(GL *gl, int bytes_per_comp, int n_channels) { @@ -523,6 +540,19 @@ static const struct fmt_entry *find_tex_format(GL *gl, int bytes_per_comp, return &fmts[n_channels - 1 + (bytes_per_comp - 1) * 4]; } +static bool is_integer_format(const struct fmt_entry *fmt) +{ + // Tests only the formats which we actually declare somewhere. + switch (fmt->format) { + case GL_RED_INTEGER: + case GL_RG_INTEGER: + case GL_RGB_INTEGER: + case GL_RGBA_INTEGER: + return true; + } + return false; +} + static const char *load_cached_file(struct gl_video *p, const char *path) { if (!path || !path[0]) @@ -624,6 +654,9 @@ static void uninit_rendering(struct gl_video *p) fbotex_uninit(&p->unsharp_fbo); fbotex_uninit(&p->deband_fbo); + for (int n = 0; n < 4; n++) + fbotex_uninit(&p->integer_conv_fbo[n]); + for (int n = 0; n < 2; n++) { fbotex_uninit(&p->pre_fbo[n]); fbotex_uninit(&p->post_fbo[n]); @@ -722,8 +755,9 @@ static void pass_set_image_textures(struct gl_video *p, struct video_image *vimg for (int n = 0; n < p->plane_count; n++) { struct texplane *t = &vimg->planes[n]; p->pass_tex[n] = (struct src_tex){ - .gl_tex = vimg->planes[n].gl_texture, + .gl_tex = t->gl_texture, .gl_target = t->gl_target, + .use_integer = t->use_integer, .w = t->w, .h = t->h, .src = {0, 0, t->w, t->h}, @@ -735,11 +769,11 @@ static void init_video(struct gl_video *p) { GL *gl = p->gl; - check_gl_features(p); - init_format(p->image_params.imgfmt, p); p->gl_target = p->opts.use_rectangle ? GL_TEXTURE_RECTANGLE : GL_TEXTURE_2D; + check_gl_features(p); + if (p->hwdec_active) { if (p->hwdec->driver->reinit(p->hwdec, &p->image_params) < 0) MP_ERR(p, "Initializing texture for hardware decoding failed.\n"); @@ -783,8 +817,9 @@ static void init_video(struct gl_video *p) plane->w, plane->h, 0, plane->gl_format, plane->gl_type, NULL); - gl->TexParameteri(p->gl_target, GL_TEXTURE_MIN_FILTER, GL_LINEAR); - gl->TexParameteri(p->gl_target, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + int filter = plane->use_integer ? GL_NEAREST : GL_LINEAR; + gl->TexParameteri(p->gl_target, GL_TEXTURE_MIN_FILTER, filter); + gl->TexParameteri(p->gl_target, GL_TEXTURE_MAG_FILTER, filter); gl->TexParameteri(p->gl_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); gl->TexParameteri(p->gl_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); } @@ -838,7 +873,11 @@ static void pass_prepare_src_tex(struct gl_video *p) snprintf(texture_name, sizeof(texture_name), "texture%d", n); snprintf(texture_size, sizeof(texture_size), "texture_size%d", n); - gl_sc_uniform_sampler(sc, texture_name, s->gl_target, n); + if (s->use_integer) { + gl_sc_uniform_sampler_ui(sc, texture_name, n); + } else { + gl_sc_uniform_sampler(sc, texture_name, s->gl_target, n); + } float f[2] = {1, 1}; if (s->gl_target != GL_TEXTURE_RECTANGLE) { f[0] = s->w; @@ -1334,6 +1373,37 @@ static bool pass_prescale_luma(struct gl_video *p, float tex_mul, return true; } +// The input textures are in an integer format (non-fixed-point), like R16UI. +// Convert it to float in an extra pass. +static void pass_integer_conversion(struct gl_video *p) +{ + double tex_mul = 1 / mp_get_csp_mul(p->image_params.colorspace, + p->image_desc.component_bits, + p->image_desc.component_full_bits); + uint64_t tex_max = 1ull << p->image_desc.component_full_bits; + tex_mul *= 1.0 / (tex_max - 1); + + struct src_tex pass_tex[TEXUNIT_VIDEO_NUM]; + assert(sizeof(pass_tex) == sizeof(p->pass_tex)); + memcpy(pass_tex, p->pass_tex, sizeof(pass_tex)); + + for (int n = 0; n < TEXUNIT_VIDEO_NUM; n++) { + if (!p->pass_tex[n].gl_tex) + continue; + GLSLF("// integer conversion plane %d\n", n); + GLSLF("uvec4 icolor = texture(texture%d, texcoord%d);\n", n, n); + GLSLF("vec4 color = vec4(icolor) * tex_mul;\n"); + gl_sc_uniform_f(p->sc, "tex_mul", tex_mul); + int c_w = p->pass_tex[n].src.x1 - p->pass_tex[n].src.x0; + int c_h = p->pass_tex[n].src.y1 - p->pass_tex[n].src.y0; + finish_pass_fbo(p, &p->integer_conv_fbo[n], c_w, c_h, n, 0); + pass_tex[n] = p->pass_tex[n]; + memcpy(p->pass_tex, pass_tex, sizeof(p->pass_tex)); + } + + p->use_normalized_range = true; +} + // sample from video textures, set "color" variable to yuv value static void pass_read_video(struct gl_video *p) { @@ -1342,9 +1412,14 @@ static void pass_read_video(struct gl_video *p) struct gl_transform chromafix; pass_set_image_textures(p, &p->image, &chromafix); + if (p->use_integer_conversion) + pass_integer_conversion(p); + float tex_mul = 1 / mp_get_csp_mul(p->image_params.colorspace, p->image_desc.component_bits, p->image_desc.component_full_bits); + if (p->use_normalized_range) + tex_mul = 1.0; struct src_tex prescaled_tex; struct gl_transform offset = {{{0}}}; @@ -2389,6 +2464,8 @@ static bool test_fbo(struct gl_video *p) static bool check_dumb_mode(struct gl_video *p) { struct gl_video_opts *o = &p->opts; + if (p->use_integer_conversion) + return false; if (o->dumb_mode) return true; if (o->target_prim || o->target_trc || o->linear_scaling || @@ -2434,8 +2511,9 @@ static void check_gl_features(struct gl_video *p) MP_WARN(p, "Disabling PBOs (GLES unsupported).\n"); } + p->forced_dumb_mode = p->opts.dumb_mode || !have_fbo || !have_texrg; bool voluntarily_dumb = check_dumb_mode(p); - if (p->opts.dumb_mode || !have_fbo || !have_texrg || voluntarily_dumb) { + if (p->forced_dumb_mode || voluntarily_dumb) { if (voluntarily_dumb) { MP_VERBOSE(p, "No advanced processing required. Enabling dumb mode.\n"); } else if (!p->opts.dumb_mode) { @@ -2638,6 +2716,17 @@ static void packed_fmt_swizzle(char w[5], const struct fmt_entry *texfmt, w[4] = '\0'; } +// Like find_tex_format(), but takes bits (not bytes), and but if no fixed point +// format is available, return an unsigned integer format. +static const struct fmt_entry *find_plane_format(GL *gl, int bytes_per_comp, + int n_channels) +{ + const struct fmt_entry *e = find_tex_format(gl, bytes_per_comp, n_channels); + if (e->format || gl->es < 300) + return e; + return &gl_ui_byte_formats_gles3[n_channels - 1 + (bytes_per_comp - 1) * 4]; +} + static bool init_format(int fmt, struct gl_video *init) { struct GL *gl = init->gl; @@ -2665,7 +2754,7 @@ static bool init_format(int fmt, struct gl_video *init) int bits = desc.component_bits; if ((desc.flags & MP_IMGFLAG_NE) && bits >= 8 && bits <= 16) { init->has_alpha = desc.num_planes > 3; - plane_format[0] = find_tex_format(gl, (bits + 7) / 8, 1); + plane_format[0] = find_plane_format(gl, (bits + 7) / 8, 1); for (int p = 1; p < desc.num_planes; p++) plane_format[p] = plane_format[0]; // RGB/planar @@ -2679,8 +2768,8 @@ static bool init_format(int fmt, struct gl_video *init) if (desc.flags & MP_IMGFLAG_YUV_NV) { int bits = desc.component_bits; if ((desc.flags & MP_IMGFLAG_NE) && bits >= 8 && bits <= 16) { - plane_format[0] = find_tex_format(gl, (bits + 7) / 8, 1); - plane_format[1] = find_tex_format(gl, (bits + 7) / 8, 2); + plane_format[0] = find_plane_format(gl, (bits + 7) / 8, 1); + plane_format[1] = find_plane_format(gl, (bits + 7) / 8, 2); if (desc.flags & MP_IMGFLAG_YUV_NV_SWAP) snprintf(init->color_swizzle, sizeof(init->color_swizzle), "rbga"); goto supported; @@ -2735,10 +2824,20 @@ supported: return false; } + int use_integer = -1; for (int p = 0; p < desc.num_planes; p++) { if (!plane_format[p]->format) return false; + int use_int_plane = !!is_integer_format(plane_format[p]); + if (use_integer < 0) + use_integer = use_int_plane; + if (use_integer != use_int_plane) + return false; // mixed planes not supported } + init->use_integer_conversion = use_integer; + + if (init->use_integer_conversion && init->forced_dumb_mode) + return false; for (int p = 0; p < desc.num_planes; p++) { struct texplane *plane = &init->image.planes[p]; @@ -2747,6 +2846,7 @@ supported: plane->gl_format = format->format; plane->gl_internal_format = format->internal_format; plane->gl_type = format->type; + plane->use_integer = init->use_integer_conversion; } init->is_yuv = desc.flags & MP_IMGFLAG_YUV;