avformat/{isom,mov,movenc}: add support for CMAF DASH roles

This information is coded in a standard MP4 KindBox and utilizes the
scheme and values as per the DASH role scheme defined in MPEG-DASH.
Other schemes are technically allowed, but where multiple schemes
define the same concepts, the DASH scheme should be utilized.

Such flagging is additionally utilized by the DASH-IF CMAF ingest
specification, enabling an encoder to inform the following component
of the roles of the incoming media streams.

A test is added for this functionality in a similar manner to the
matroska test.

Signed-off-by: Jan Ekström <jan.ekstrom@24i.com>
This commit is contained in:
Jan Ekström 2021-07-15 09:57:48 +03:00 committed by Jan Ekström
parent 847fd8de7c
commit 7a446b1179
7 changed files with 264 additions and 1 deletions

View File

@ -430,3 +430,22 @@ void ff_mov_write_chan(AVIOContext *pb, int64_t channel_layout)
}
avio_wb32(pb, 0); // mNumberChannelDescriptions
}
static const struct MP4TrackKindValueMapping dash_role_map[] = {
{ AV_DISPOSITION_HEARING_IMPAIRED|AV_DISPOSITION_CAPTIONS,
"caption" },
{ AV_DISPOSITION_COMMENT,
"commentary" },
{ AV_DISPOSITION_VISUAL_IMPAIRED|AV_DISPOSITION_DESCRIPTIONS,
"description" },
{ AV_DISPOSITION_DUB,
"dub" },
{ AV_DISPOSITION_FORCED,
"forced-subtitle" },
{ 0, NULL }
};
const struct MP4TrackKindMapping ff_mov_track_kind_table[] = {
{ "urn:mpeg:dash:role:2011", dash_role_map },
{ 0, NULL }
};

View File

@ -390,4 +390,16 @@ static inline enum AVCodecID ff_mov_get_lpcm_codec_id(int bps, int flags)
#define MOV_ISMV_TTML_TAG MKTAG('d', 'f', 'x', 'p')
#define MOV_MP4_TTML_TAG MKTAG('s', 't', 'p', 'p')
struct MP4TrackKindValueMapping {
int disposition;
const char *value;
};
struct MP4TrackKindMapping {
const char *scheme_uri;
const struct MP4TrackKindValueMapping *value_maps;
};
extern const struct MP4TrackKindMapping ff_mov_track_kind_table[];
#endif /* AVFORMAT_ISOM_H */

View File

@ -28,6 +28,7 @@
#include <stdint.h>
#include "libavutil/attributes.h"
#include "libavutil/bprint.h"
#include "libavutil/channel_layout.h"
#include "libavutil/internal.h"
#include "libavutil/intreadwrite.h"
@ -6853,6 +6854,95 @@ static int mov_read_dvcc_dvvc(MOVContext *c, AVIOContext *pb, MOVAtom atom)
return 0;
}
static int mov_read_kind(MOVContext *c, AVIOContext *pb, MOVAtom atom)
{
AVFormatContext *ctx = c->fc;
AVStream *st = NULL;
AVBPrint scheme_buf, value_buf;
int64_t scheme_str_len = 0, value_str_len = 0;
int version, flags, ret = AVERROR_BUG;
int64_t size = atom.size;
if (atom.size < 6)
// 4 bytes for version + flags, 2x 1 byte for null
return AVERROR_INVALIDDATA;
if (c->fc->nb_streams < 1)
return 0;
st = c->fc->streams[c->fc->nb_streams-1];
version = avio_r8(pb);
flags = avio_rb24(pb);
size -= 4;
if (version != 0 || flags != 0) {
av_log(ctx, AV_LOG_ERROR,
"Unsupported 'kind' box with version %d, flags: %x",
version, flags);
return AVERROR_INVALIDDATA;
}
av_bprint_init(&scheme_buf, 0, AV_BPRINT_SIZE_UNLIMITED);
av_bprint_init(&value_buf, 0, AV_BPRINT_SIZE_UNLIMITED);
if ((scheme_str_len = ff_read_string_to_bprint_overwrite(pb, &scheme_buf,
size)) < 0) {
ret = scheme_str_len;
goto cleanup;
}
if (scheme_str_len + 1 >= size) {
// we need to have another string, even if nullptr.
// we check with + 1 since we expect that if size was not hit,
// an additional null was read.
ret = AVERROR_INVALIDDATA;
goto cleanup;
}
size -= scheme_str_len + 1;
if ((value_str_len = ff_read_string_to_bprint_overwrite(pb, &value_buf,
size)) < 0) {
ret = value_str_len;
goto cleanup;
}
if (value_str_len == size) {
// in case of no trailing null, box is not valid.
ret = AVERROR_INVALIDDATA;
goto cleanup;
}
av_log(ctx, AV_LOG_TRACE,
"%s stream %d KindBox(scheme: %s, value: %s)\n",
av_get_media_type_string(st->codecpar->codec_type),
st->index,
scheme_buf.str, value_buf.str);
for (int i = 0; ff_mov_track_kind_table[i].scheme_uri; i++) {
const struct MP4TrackKindMapping map = ff_mov_track_kind_table[i];
if (!av_strstart(scheme_buf.str, map.scheme_uri, NULL))
continue;
for (int j = 0; map.value_maps[j].disposition; j++) {
const struct MP4TrackKindValueMapping value_map = map.value_maps[j];
if (!av_strstart(value_buf.str, value_map.value, NULL))
continue;
st->disposition |= value_map.disposition;
}
}
ret = 0;
cleanup:
av_bprint_finalize(&scheme_buf, NULL);
av_bprint_finalize(&value_buf, NULL);
return ret;
}
static const MOVParseTableEntry mov_default_parse_table[] = {
{ MKTAG('A','C','L','R'), mov_read_aclr },
{ MKTAG('A','P','R','G'), mov_read_avid },
@ -6950,6 +7040,7 @@ static const MOVParseTableEntry mov_default_parse_table[] = {
{ MKTAG('c','l','l','i'), mov_read_clli },
{ MKTAG('d','v','c','C'), mov_read_dvcc_dvvc },
{ MKTAG('d','v','v','C'), mov_read_dvcc_dvvc },
{ MKTAG('k','i','n','d'), mov_read_kind },
{ 0, NULL }
};

View File

@ -3322,6 +3322,52 @@ static int mov_write_track_metadata(AVIOContext *pb, AVStream *st,
return update_size(pb, pos);
}
static int mov_write_track_kind(AVIOContext *pb, const char *scheme_uri,
const char *value)
{
int64_t pos = avio_tell(pb);
/* Box|FullBox basics */
avio_wb32(pb, 0); /* size placeholder */
ffio_wfourcc(pb, (const unsigned char *)"kind");
avio_w8(pb, 0); /* version = 0 */
avio_wb24(pb, 0); /* flags = 0 */
/* Required null-terminated scheme URI */
avio_write(pb, (const unsigned char *)scheme_uri,
strlen(scheme_uri));
avio_w8(pb, 0);
/* Optional value string */
if (value && value[0])
avio_write(pb, (const unsigned char *)value,
strlen(value));
avio_w8(pb, 0);
return update_size(pb, pos);
}
static int mov_write_track_kinds(AVIOContext *pb, AVStream *st)
{
int ret = AVERROR_BUG;
for (int i = 0; ff_mov_track_kind_table[i].scheme_uri; i++) {
const struct MP4TrackKindMapping map = ff_mov_track_kind_table[i];
for (int j = 0; map.value_maps[j].disposition; j++) {
const struct MP4TrackKindValueMapping value_map = map.value_maps[j];
if (!(st->disposition & value_map.disposition))
continue;
if ((ret = mov_write_track_kind(pb, map.scheme_uri, value_map.value)) < 0)
return ret;
}
}
return 0;
}
static int mov_write_track_udta_tag(AVIOContext *pb, MOVMuxContext *mov,
AVStream *st)
{
@ -3339,6 +3385,11 @@ static int mov_write_track_udta_tag(AVIOContext *pb, MOVMuxContext *mov,
if (mov->mode & (MODE_MP4|MODE_MOV))
mov_write_track_metadata(pb_buf, st, "name", "title");
if (mov->mode & MODE_MP4) {
if ((ret = mov_write_track_kinds(pb_buf, st)) < 0)
return ret;
}
if ((size = avio_get_dyn_buf(pb_buf, &buf)) > 0) {
avio_wb32(pb, size + 8);
ffio_wfourcc(pb, "udta");

View File

@ -33,7 +33,7 @@
// Also please add any ticket numbers that you believe might be affected here
#define LIBAVFORMAT_VERSION_MAJOR 59
#define LIBAVFORMAT_VERSION_MINOR 5
#define LIBAVFORMAT_VERSION_MICRO 100
#define LIBAVFORMAT_VERSION_MICRO 101
#define LIBAVFORMAT_VERSION_INT AV_VERSION_INT(LIBAVFORMAT_VERSION_MAJOR, \
LIBAVFORMAT_VERSION_MINOR, \

View File

@ -136,6 +136,15 @@ FATE_MOV_FFMPEG_FFPROBE-$(call ALLYES, FILE_PROTOCOL SRT_DEMUXER MOV_DEMUXER SUB
fate-mov-mp4-ttml-stpp: CMD = transcode srt $(TARGET_SAMPLES)/sub/SubRip_capability_tester.srt mp4 "-map 0:s -c:s ttml -time_base:s 1:1000" "-map 0 -c copy" "" "-of json -show_entries packet:stream=index,codec_type,codec_tag_string,codec_tag,codec_name,time_base,start_time,duration_ts,duration,nb_frames,nb_read_packets:stream_tags"
fate-mov-mp4-ttml-dfxp: CMD = transcode srt $(TARGET_SAMPLES)/sub/SubRip_capability_tester.srt mp4 "-map 0:s -c:s ttml -time_base:s 1:1000 -tag:s dfxp -strict unofficial" "-map 0 -c copy" "" "-of json -show_entries packet:stream=index,codec_type,codec_tag_string,codec_tag,codec_name,time_base,start_time,duration_ts,duration,nb_frames,nb_read_packets:stream_tags"
# Resulting remux should have:
# 1. first audio stream with AV_DISPOSITION_HEARING_IMPAIRED
# 2. second audio stream with AV_DISPOSITION_VISUAL_IMPAIRED | DESCRIPTIONS
FATE_MOV_FFMPEG_FFPROBE-$(call ALLYES, FILE_PROTOCOL PIPE_PROTOCOL \
MPEGTS_DEMUXER MOV_DEMUXER AC3_DECODER \
MP4_MUXER FRAMECRC_MUXER ) \
+= fate-mov-mp4-disposition-mpegts-remux
fate-mov-mp4-disposition-mpegts-remux: CMD = transcode mpegts $(TARGET_SAMPLES)/mpegts/pmtchange.ts mp4 "-map 0:1 -map 0:2 -c copy -disposition:a:0 +hearing_impaired" "-map 0 -c copy" "" "-of json -show_entries stream_disposition:stream=index"
FATE_SAMPLES_FFMPEG_FFPROBE += $(FATE_MOV_FFMPEG_FFPROBE-yes)
fate-mov: $(FATE_MOV) $(FATE_MOV_FFPROBE) $(FATE_MOV_FASTSTART) $(FATE_MOV_FFMPEG_FFPROBE-yes)

View File

@ -0,0 +1,81 @@
3c4432fe59ffd9f2ed6ba4b122cea935 *tests/data/fate/mov-mp4-disposition-mpegts-remux.mp4
5709 tests/data/fate/mov-mp4-disposition-mpegts-remux.mp4
#tb 0: 1/48000
#media_type 0: audio
#codec_id 0: ac3
#sample_rate 0: 48000
#channel_layout 0: 3
#channel_layout_name 0: stereo
#tb 1: 1/48000
#media_type 1: audio
#codec_id 1: ac3
#sample_rate 1: 48000
#channel_layout 1: 3
#channel_layout_name 1: stereo
1, 0, 0, 1536, 768, 0xa63778d4, S=1, 4
1, 1536, 1536, 1536, 768, 0x7d577f3f
0, 3072, 3072, 1536, 768, 0xc2867884, S=1, 4
1, 3072, 3072, 1536, 768, 0xd86b7c8f
0, 4608, 4608, 1536, 690, 0xa2714bf3
1, 4608, 4608, 1536, 626, 0x09f4382f
{
"programs": [
],
"streams": [
{
"index": 0,
"disposition": {
"default": 1,
"dub": 0,
"original": 0,
"comment": 0,
"lyrics": 0,
"karaoke": 0,
"forced": 0,
"hearing_impaired": 1,
"visual_impaired": 0,
"clean_effects": 0,
"attached_pic": 0,
"timed_thumbnails": 0,
"captions": 1,
"descriptions": 0,
"metadata": 0,
"dependent": 0,
"still_image": 0
},
"side_data_list": [
{
}
]
},
{
"index": 1,
"disposition": {
"default": 0,
"dub": 0,
"original": 0,
"comment": 0,
"lyrics": 0,
"karaoke": 0,
"forced": 0,
"hearing_impaired": 0,
"visual_impaired": 1,
"clean_effects": 0,
"attached_pic": 0,
"timed_thumbnails": 0,
"captions": 0,
"descriptions": 1,
"metadata": 0,
"dependent": 0,
"still_image": 0
},
"side_data_list": [
{
}
]
}
]
}