hwdec_vulkan: add Vulkan HW Interop

Vulkan Video Decoding has finally become a reality, as it's now
showing up in shipping drivers, and the ffmpeg support has been
merged.

With that in mind, this change introduces HW interop support for
ffmpeg Vulkan frames. The implementation is functionally complete - it
can display frames produced by hardware decoding, and it can work with
ffmpeg vulkan filters. There are still various caveats due to gaps and
bugs in drivers, so YMMV, as always.

Primary testing has been done on Intel, AMD, and nvidia hardware on
Linux with basic Windows testing on nvidia.

Notable caveats:
* Due to driver bugs, video decoding on nvidia does not work right now,
  unless you use the Vulkan Beta driver. It can be worked around, but
  requires ffmpeg changes that are not considered acceptable to merge.
* Even if those work-arounds are applied, Vulkan filters will not work
  on video that was decoded by Vulkan, due to additional bugs in the
  nvidia drivers. The filters do work correctly on content decoded some
  other way, and then uploaded to Vulkan (eg: Decode with nvdec, upload
  with --vf=format=vulkan)
* Vulkan filters can only be used with drivers that support
  VK_EXT_descriptor_buffer which doesn't include Intel ANV as yet.
  There is an MR outstanding for this.
* When dealing with 1080p content, there may be some visual distortion
  in the bottom lines of frames due to chroma scaling incorporating the
  extra hidden lines at the bottom of the frame (1080p content is
  actually stored as 1088 lines), depending on the hardware/driver
  combination and the scaling algorithm. This cannot be easily
  addressed as the mechanical fix for it violates the Vulkan spec, and
  probably requires a spec change to resolve properly.

All of these caveats will be fixed in either drivers or ffmpeg, and so
will not require mpv changes (unless something unexpected happens)

If you want to run on nvidia with the non-beta drivers, you can this
ffmpeg tree with the work-around patches:

* https://github.com/philipl/FFmpeg/tree/vulkan-nvidia-workarounds
This commit is contained in:
Philip Langdale 2022-03-12 11:21:29 -08:00 committed by Philip Langdale
parent 085f3e31a0
commit 61e685594d
11 changed files with 387 additions and 2 deletions

View File

@ -1255,6 +1255,8 @@ Video
:nvdec-copy: copies video back to system RAM (Any platform CUDA is available)
:drm: requires ``--vo=gpu`` (Linux only)
:drm-copy: copies video back to system RAM (Linux only)
:vulkan: requires ``--vo=gpu-next`` (Any platform with Vulkan Video Decoding)
:vulkan-copy: copies video back to system RAM (Any platform with Vulkan Video Decoding)
Other hwdecs (only use if you know you have to):
@ -1310,7 +1312,8 @@ Video
.. note::
Most non-copy methods only work with the OpenGL GPU backend. Currently,
only the ``vaapi``, ``nvdec`` and ``cuda`` methods work with Vulkan.
only the ``vaapi``, ``nvdec``, ``cuda`` and ``vulkan`` methods work with
Vulkan.
The ``vaapi`` mode, if used with ``--vo=gpu``, requires Mesa 11, and most
likely works with Intel and AMD GPUs only. It also requires the opengl EGL

View File

@ -141,6 +141,11 @@ Available mpv-only filters are:
For a list of available formats, use ``--vf=format=fmt=help``.
.. note::
Conversion between hardware formats is supported in some cases.
eg: ``cuda`` to ``vulkan``, or ``vaapi`` to ``vulkan``.
``<convert=yes|no>``
Force conversion of color parameters (default: no).
@ -164,6 +169,9 @@ Available mpv-only filters are:
If input and output video parameters are the same, conversion is always
skipped.
When converting between hardware formats, this parameter has no effect,
and the only conversion that is done is the format conversion.
.. admonition:: Examples
``mpv test.mkv --vf=format:colormatrix=ycgco``

View File

@ -50,6 +50,12 @@ struct hwmap_pairs {
// We cannot discover which pairs of hardware formats need to use hwmap to
// convert between the formats, so we need a lookup table.
static const struct hwmap_pairs hwmap_pairs[] = {
#if HAVE_VULKAN_INTEROP
{
.first_fmt = IMGFMT_VAAPI,
.second_fmt = IMGFMT_VULKAN,
},
#endif
{
.first_fmt = IMGFMT_DRMPRIME,
.second_fmt = IMGFMT_VAAPI,

View File

@ -411,7 +411,7 @@ endif
if darwin
path_source = files('osdep/path-darwin.c')
sources += path_source + files('osdep/timer-darwin.c')
endif
if posix and not darwin
@ -933,6 +933,8 @@ if features['libplacebo-next']
'video/out/gpu_next/context.c')
endif
features += {'libplacebo-decode': features['libplacebo'] and libplacebo.version().version_compare('>=5.275.0')}
sdl2_video = get_option('sdl2-video').require(
features['sdl2'],
error_message: 'sdl2 was not found!',
@ -1309,6 +1311,16 @@ if features['cuda-interop'] and features['vulkan']
sources += files('video/out/hwdec/hwdec_cuda_vk.c')
endif
vulkan_interop = get_option('vulkan-interop').require(
vulkan.found() and features['libplacebo-decode'] and
libavutil.version().version_compare('>=58.11.100'),
error_message: 'Vulkan Interop requires vulkan, libplacebo >= 5.275.0, and libavutil >= 58.11.100',
)
features += {'vulkan-interop': vulkan_interop.allowed()}
if vulkan_interop.allowed()
sources += files('video/out/hwdec/hwdec_vulkan.c')
endif
d3d_hwaccel = get_option('d3d-hwaccel').require(
win32,
error_message: 'the os is not win32!',

View File

@ -104,6 +104,7 @@ option('gl-dxinterop-d3d9', type: 'feature', value: 'auto', description: 'OpenGL
option('ios-gl', type: 'feature', value: 'auto', description: 'iOS OpenGL ES hardware decoding interop support')
option('rpi-mmal', type: 'feature', value: 'auto', description: 'Raspberry Pi MMAL hwaccel')
option('videotoolbox-gl', type: 'feature', value: 'auto', description: 'Videotoolbox with OpenGL')
option('vulkan-interop', type: 'feature', value: 'auto', description: 'Vulkan graphics interop')
# macOS features
option('macos-10-11-features', type: 'feature', value: 'auto', description: 'macOS 10.11 SDK Features')

View File

@ -66,6 +66,9 @@ static const struct {
{IMGFMT_CUDA, AV_PIX_FMT_CUDA},
{IMGFMT_P010, AV_PIX_FMT_P010},
{IMGFMT_DRMPRIME, AV_PIX_FMT_DRM_PRIME},
#if HAVE_VULKAN_INTEROP
{IMGFMT_VULKAN, AV_PIX_FMT_VULKAN},
#endif
{0, AV_PIX_FMT_NONE}
};

View File

@ -20,6 +20,7 @@
#include <inttypes.h>
#include "config.h"
#include "osdep/endian.h"
#include "misc/bstr.h"
#include "video/csputils.h"
@ -318,6 +319,9 @@ enum mp_imgfmt {
IMGFMT_VDPAU_OUTPUT, // VdpOutputSurface
IMGFMT_VAAPI,
IMGFMT_VIDEOTOOLBOX, // CVPixelBufferRef
#if HAVE_VULKAN_INTEROP
IMGFMT_VULKAN, // VKImage
#endif
IMGFMT_DRMPRIME, // AVDRMFrameDescriptor
// Generic pass-through of AV_PIX_FMT_*. Used for formats which don't have

View File

@ -38,6 +38,7 @@ extern const struct ra_hwdec_driver ra_hwdec_rpi_overlay;
extern const struct ra_hwdec_driver ra_hwdec_drmprime;
extern const struct ra_hwdec_driver ra_hwdec_drmprime_overlay;
extern const struct ra_hwdec_driver ra_hwdec_aimagereader;
extern const struct ra_hwdec_driver ra_hwdec_vulkan;
const struct ra_hwdec_driver *const ra_hwdec_drivers[] = {
#if HAVE_VAAPI_EGL || HAVE_VAAPI_LIBPLACEBO
@ -79,6 +80,9 @@ const struct ra_hwdec_driver *const ra_hwdec_drivers[] = {
#if HAVE_ANDROID_MEDIA_NDK
&ra_hwdec_aimagereader,
#endif
#if HAVE_VULKAN_INTEROP
&ra_hwdec_vulkan,
#endif
NULL
};

View File

@ -0,0 +1,332 @@
/*
* Copyright (c) 2022 Philip Langdale <philipl@overt.org>
*
* This file is part of mpv.
*
* mpv is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* mpv is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with mpv. If not, see <http://www.gnu.org/licenses/>.
*/
#include "config.h"
#include "video/out/gpu/hwdec.h"
#include "video/out/vulkan/context.h"
#include "video/out/placebo/ra_pl.h"
#include <libavutil/hwcontext.h>
#include <libavutil/hwcontext_vulkan.h>
struct vulkan_hw_priv {
struct mp_hwdec_ctx hwctx;
pl_gpu gpu;
};
struct vulkan_mapper_priv {
struct mp_image layout;
AVVkFrame *vkf;
pl_tex tex[4];
};
static void lock_queue(struct AVHWDeviceContext *ctx,
uint32_t queue_family, uint32_t index)
{
pl_vulkan vulkan = ctx->user_opaque;
vulkan->lock_queue(vulkan, queue_family, index);
}
static void unlock_queue(struct AVHWDeviceContext *ctx,
uint32_t queue_family, uint32_t index)
{
pl_vulkan vulkan = ctx->user_opaque;
vulkan->unlock_queue(vulkan, queue_family, index);
}
static int vulkan_init(struct ra_hwdec *hw)
{
AVBufferRef *hw_device_ctx = NULL;
int ret = 0;
struct vulkan_hw_priv *p = hw->priv;
struct mpvk_ctx *vk = ra_vk_ctx_get(hw->ra_ctx);
if (!vk) {
MP_ERR(hw, "This is not a libplacebo vulkan gpu api context.\n");
return 0;
}
p->gpu = ra_pl_get(hw->ra_ctx->ra);
if (!p->gpu) {
MP_ERR(hw, "Failed to obtain pl_gpu.\n");
return 0;
}
/*
* libplacebo initialises all queues, but we still need to discover which
* one is the decode queue.
*/
uint32_t num_qf = 0;
VkQueueFamilyProperties *qf = NULL;
vkGetPhysicalDeviceQueueFamilyProperties(vk->vulkan->phys_device, &num_qf, NULL);
if (!num_qf)
goto error;
qf = talloc_array(NULL, VkQueueFamilyProperties, num_qf);
vkGetPhysicalDeviceQueueFamilyProperties(vk->vulkan->phys_device, &num_qf, qf);
int decode_index = -1, decode_count = 0;
for (int i = 0; i < num_qf; i++) {
/*
* Pick the first discovered decode queue that we find. Maybe a day will
* come when this needs to be smarter, but I'm sure a bunch of other
* things will have to change too.
*/
if ((qf[i].queueFlags) & VK_QUEUE_VIDEO_DECODE_BIT_KHR) {
decode_index = i;
decode_count = qf[i].queueCount;
}
}
hw_device_ctx = av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_VULKAN);
if (!hw_device_ctx)
goto error;
AVHWDeviceContext *device_ctx = (void *)hw_device_ctx->data;
AVVulkanDeviceContext *device_hwctx = device_ctx->hwctx;
device_ctx->user_opaque = (void *)vk->vulkan;
device_hwctx->lock_queue = lock_queue;
device_hwctx->unlock_queue = unlock_queue;
device_hwctx->get_proc_addr = vk->vkinst->get_proc_addr;
device_hwctx->inst = vk->vkinst->instance;
device_hwctx->phys_dev = vk->vulkan->phys_device;
device_hwctx->act_dev = vk->vulkan->device;
device_hwctx->device_features = *vk->vulkan->features;
device_hwctx->enabled_inst_extensions = vk->vkinst->extensions;
device_hwctx->nb_enabled_inst_extensions = vk->vkinst->num_extensions;
device_hwctx->enabled_dev_extensions = vk->vulkan->extensions;
device_hwctx->nb_enabled_dev_extensions = vk->vulkan->num_extensions;
device_hwctx->queue_family_index = vk->vulkan->queue_graphics.index;
device_hwctx->nb_graphics_queues = vk->vulkan->queue_graphics.count;
device_hwctx->queue_family_tx_index = vk->vulkan->queue_transfer.index;
device_hwctx->nb_tx_queues = vk->vulkan->queue_transfer.count;
device_hwctx->queue_family_comp_index = vk->vulkan->queue_compute.index;
device_hwctx->nb_comp_queues = vk->vulkan->queue_compute.count;
device_hwctx->queue_family_decode_index = decode_index;
device_hwctx->nb_decode_queues = decode_count;
ret = av_hwdevice_ctx_init(hw_device_ctx);
if (ret < 0) {
MP_ERR(hw, "av_hwdevice_ctx_init failed\n");
goto error;
}
p->hwctx = (struct mp_hwdec_ctx) {
.driver_name = hw->driver->name,
.av_device_ref = hw_device_ctx,
.hw_imgfmt = IMGFMT_VULKAN,
};
hwdec_devices_add(hw->devs, &p->hwctx);
talloc_free(qf);
return 0;
error:
talloc_free(qf);
av_buffer_unref(&hw_device_ctx);
return -1;
}
static void vulkan_uninit(struct ra_hwdec *hw)
{
struct vulkan_hw_priv *p = hw->priv;
hwdec_devices_remove(hw->devs, &p->hwctx);
av_buffer_unref(&p->hwctx.av_device_ref);
}
static int mapper_init(struct ra_hwdec_mapper *mapper)
{
struct vulkan_mapper_priv *p = mapper->priv;
mapper->dst_params = mapper->src_params;
mapper->dst_params.imgfmt = mapper->src_params.hw_subfmt;
mapper->dst_params.hw_subfmt = 0;
mp_image_set_params(&p->layout, &mapper->dst_params);
struct ra_imgfmt_desc desc = {0};
if (!ra_get_imgfmt_desc(mapper->ra, mapper->dst_params.imgfmt, &desc))
return -1;
return 0;
}
static void mapper_uninit(struct ra_hwdec_mapper *mapper)
{
}
static void mapper_unmap(struct ra_hwdec_mapper *mapper)
{
struct vulkan_hw_priv *p_owner = mapper->owner->priv;
struct vulkan_mapper_priv *p = mapper->priv;
if (!mapper->src)
goto end;
AVHWFramesContext *hwfc = (AVHWFramesContext *) mapper->src->hwctx->data;;
const AVVulkanFramesContext *vkfc = hwfc->hwctx;;
AVVkFrame *vkf = p->vkf;
int num_images;
for (num_images = 0; (vkf->img[num_images] != NULL); num_images++);
for (int i = 0; (p->tex[i] != NULL); i++) {
pl_tex *tex = &p->tex[i];
if (!*tex)
continue;
// If we have multiple planes and one image, then that is a multiplane
// frame. Anything else is treated as one-image-per-plane.
int index = p->layout.num_planes > 1 && num_images == 1 ? 0 : i;
// Update AVVkFrame state to reflect current layout
bool ok = pl_vulkan_hold_ex(p_owner->gpu, pl_vulkan_hold_params(
.tex = *tex,
.out_layout = &vkf->layout[index],
.qf = VK_QUEUE_FAMILY_IGNORED,
.semaphore = (pl_vulkan_sem) {
.sem = vkf->sem[index],
.value = vkf->sem_value[index] + 1,
},
));
vkf->access[index] = 0;
vkf->sem_value[index] += !!ok;
*tex = NULL;
}
vkfc->unlock_frame(hwfc, vkf);
end:
for (int i = 0; i < p->layout.num_planes; i++)
ra_tex_free(mapper->ra, &mapper->tex[i]);
p->vkf = NULL;
}
static int mapper_map(struct ra_hwdec_mapper *mapper)
{
bool result = false;
struct vulkan_hw_priv *p_owner = mapper->owner->priv;
struct vulkan_mapper_priv *p = mapper->priv;
pl_vulkan vk = pl_vulkan_get(p_owner->gpu);
if (!vk)
return -1;
AVHWFramesContext *hwfc = (AVHWFramesContext *) mapper->src->hwctx->data;
const AVVulkanFramesContext *vkfc = hwfc->hwctx;
AVVkFrame *vkf = (AVVkFrame *) mapper->src->planes[0];
/*
* We need to use the dimensions from the HW Frames Context for the
* textures, as the underlying images may be larger than the logical frame
* size. This most often happens with 1080p content where the actual frame
* height is 1088.
*/
struct mp_image raw_layout;
mp_image_setfmt(&raw_layout, p->layout.params.imgfmt);
mp_image_set_size(&raw_layout, hwfc->width, hwfc->height);
int num_images;
for (num_images = 0; (vkf->img[num_images] != NULL); num_images++);
const VkFormat *vk_fmt = av_vkfmt_from_pixfmt(hwfc->sw_format);
vkfc->lock_frame(hwfc, vkf);
for (int i = 0; i < p->layout.num_planes; i++) {
pl_tex *tex = &p->tex[i];
VkImageAspectFlags aspect = VK_IMAGE_ASPECT_COLOR_BIT;
int index = i;
// If we have multiple planes and one image, then that is a multiplane
// frame. Anything else is treated as one-image-per-plane.
if (p->layout.num_planes > 1 && num_images == 1) {
index = 0;
switch (i) {
case 0:
aspect = VK_IMAGE_ASPECT_PLANE_0_BIT_KHR;
break;
case 1:
aspect = VK_IMAGE_ASPECT_PLANE_1_BIT_KHR;
break;
case 2:
aspect = VK_IMAGE_ASPECT_PLANE_2_BIT_KHR;
break;
default:
goto error;
}
}
*tex = pl_vulkan_wrap(p_owner->gpu, pl_vulkan_wrap_params(
.image = vkf->img[index],
.width = mp_image_plane_w(&raw_layout, i),
.height = mp_image_plane_h(&raw_layout, i),
.format = vk_fmt[i],
.usage = vkfc->usage,
.aspect = aspect,
));
if (!*tex)
goto error;
pl_vulkan_release_ex(p_owner->gpu, pl_vulkan_release_params(
.tex = p->tex[i],
.layout = vkf->layout[index],
.qf = VK_QUEUE_FAMILY_IGNORED,
.semaphore = (pl_vulkan_sem) {
.sem = vkf->sem[index],
.value = vkf->sem_value[index],
},
));
struct ra_tex *ratex = talloc_ptrtype(NULL, ratex);
result = mppl_wrap_tex(mapper->ra, *tex, ratex);
if (!result) {
pl_tex_destroy(p_owner->gpu, tex);
talloc_free(ratex);
goto error;
}
mapper->tex[i] = ratex;
}
p->vkf = vkf;
return 0;
error:
vkfc->unlock_frame(hwfc, vkf);
mapper_unmap(mapper);
return -1;
}
const struct ra_hwdec_driver ra_hwdec_vulkan = {
.name = "vulkan",
.imgfmts = {IMGFMT_VULKAN, 0},
.priv_size = sizeof(struct vulkan_hw_priv),
.init = vulkan_init,
.uninit = vulkan_uninit,
.mapper = &(const struct ra_hwdec_mapper_driver){
.priv_size = sizeof(struct vulkan_mapper_priv),
.init = mapper_init,
.uninit = mapper_uninit,
.map = mapper_map,
.unmap = mapper_unmap,
},
};

11
wscript
View File

@ -797,6 +797,12 @@ video_output_features = [
'deps': 'libplacebo',
'func': check_preprocessor('libplacebo/config.h', 'PL_API_VER >= 264',
use='libplacebo'),
}, {
'name': 'libplacebo-decode',
'desc': 'libplacebo v5.275.0+, needed for Vulkan video decode',
'deps': 'libplacebo',
'func': check_preprocessor('libplacebo/config.h', 'PL_API_VER >= 275',
use='libplacebo'),
}, {
'name': '--vulkan',
'desc': 'Vulkan context support',
@ -808,6 +814,11 @@ video_output_features = [
'deps': 'vulkan',
'func': check_statement('vulkan/vulkan_core.h', 'vkCreateDisplayPlaneSurfaceKHR(0, 0, 0, 0)',
use='vulkan')
}, {
'name': '--vulkan-interop',
'desc': 'Vulkan graphics interop',
'deps': 'vulkan && libplacebo-next',
'func': check_pkg_config('libavutil', '>= 58.11.100'),
}, {
'name': 'vaapi-libplacebo',
'desc': 'VAAPI libplacebo',

View File

@ -486,6 +486,7 @@ def build(ctx):
( "video/out/hwdec/hwdec_drmprime.c", "drm" ),
( "video/out/hwdec/hwdec_drmprime_overlay.c","drm" ),
( "video/out/hwdec/hwdec_vaapi.c", "vaapi-egl || vaapi-libplacebo" ),
( "video/out/hwdec/hwdec_vulkan.c", "vulkan-interop" ),
( "video/out/hwdec/dmabuf_interop_gl.c", "dmabuf-interop-gl" ),
( "video/out/hwdec/dmabuf_interop_pl.c", "dmabuf-interop-pl" ),
( "video/out/hwdec/dmabuf_interop_wl.c", "dmabuf-wayland" ),