avformat: Immersive Audio Model and Formats demuxer

Signed-off-by: James Almer <jamrial@gmail.com>
This commit is contained in:
James Almer 2023-11-25 11:22:28 -03:00
parent 7f9308380f
commit 4ee05182b7
8 changed files with 1939 additions and 0 deletions

View File

@ -10,6 +10,7 @@ version <next>:
- demuxing, decoding, filtering, encoding, and muxing in the
ffmpeg CLI now all run in parallel
- enable gdigrab device to grab a window using the hwnd=HANDLER syntax
- IAMF raw demuxer
version 6.1:
- libaribcaption decoder

View File

@ -258,6 +258,7 @@ OBJS-$(CONFIG_EVC_MUXER) += rawenc.o
OBJS-$(CONFIG_HLS_DEMUXER) += hls.o hls_sample_encryption.o
OBJS-$(CONFIG_HLS_MUXER) += hlsenc.o hlsplaylist.o avc.o
OBJS-$(CONFIG_HNM_DEMUXER) += hnm.o
OBJS-$(CONFIG_IAMF_DEMUXER) += iamfdec.o iamf_parse.o iamf.o
OBJS-$(CONFIG_ICO_DEMUXER) += icodec.o
OBJS-$(CONFIG_ICO_MUXER) += icoenc.o
OBJS-$(CONFIG_IDCIN_DEMUXER) += idcin.o

View File

@ -212,6 +212,7 @@ extern const FFOutputFormat ff_hevc_muxer;
extern const AVInputFormat ff_hls_demuxer;
extern const FFOutputFormat ff_hls_muxer;
extern const AVInputFormat ff_hnm_demuxer;
extern const AVInputFormat ff_iamf_demuxer;
extern const AVInputFormat ff_ico_demuxer;
extern const FFOutputFormat ff_ico_muxer;
extern const AVInputFormat ff_idcin_demuxer;

125
libavformat/iamf.c Normal file
View File

@ -0,0 +1,125 @@
/*
* Immersive Audio Model and Formats common helpers and structs
* Copyright (c) 2023 James Almer <jamrial@gmail.com>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/channel_layout.h"
#include "libavutil/iamf.h"
#include "libavutil/mem.h"
#include "iamf.h"
const AVChannelLayout ff_iamf_scalable_ch_layouts[10] = {
AV_CHANNEL_LAYOUT_MONO,
AV_CHANNEL_LAYOUT_STEREO,
// "Loudspeaker configuration for Sound System B"
AV_CHANNEL_LAYOUT_5POINT1_BACK,
// "Loudspeaker configuration for Sound System C"
AV_CHANNEL_LAYOUT_5POINT1POINT2_BACK,
// "Loudspeaker configuration for Sound System D"
AV_CHANNEL_LAYOUT_5POINT1POINT4_BACK,
// "Loudspeaker configuration for Sound System I"
AV_CHANNEL_LAYOUT_7POINT1,
// "Loudspeaker configuration for Sound System I" + Ltf + Rtf
AV_CHANNEL_LAYOUT_7POINT1POINT2,
// "Loudspeaker configuration for Sound System J"
AV_CHANNEL_LAYOUT_7POINT1POINT4_BACK,
// Front subset of "Loudspeaker configuration for Sound System J"
AV_CHANNEL_LAYOUT_3POINT1POINT2,
// Binaural
AV_CHANNEL_LAYOUT_STEREO,
};
const struct IAMFSoundSystemMap ff_iamf_sound_system_map[13] = {
{ SOUND_SYSTEM_A_0_2_0, AV_CHANNEL_LAYOUT_STEREO },
{ SOUND_SYSTEM_B_0_5_0, AV_CHANNEL_LAYOUT_5POINT1_BACK },
{ SOUND_SYSTEM_C_2_5_0, AV_CHANNEL_LAYOUT_5POINT1POINT2_BACK },
{ SOUND_SYSTEM_D_4_5_0, AV_CHANNEL_LAYOUT_5POINT1POINT4_BACK },
{ SOUND_SYSTEM_E_4_5_1,
{
.nb_channels = 11,
.order = AV_CHANNEL_ORDER_NATIVE,
.u.mask = AV_CH_LAYOUT_5POINT1POINT4_BACK | AV_CH_BOTTOM_FRONT_CENTER,
},
},
{ SOUND_SYSTEM_F_3_7_0, AV_CHANNEL_LAYOUT_7POINT2POINT3 },
{ SOUND_SYSTEM_G_4_9_0, AV_CHANNEL_LAYOUT_9POINT1POINT4_BACK },
{ SOUND_SYSTEM_H_9_10_3, AV_CHANNEL_LAYOUT_22POINT2 },
{ SOUND_SYSTEM_I_0_7_0, AV_CHANNEL_LAYOUT_7POINT1 },
{ SOUND_SYSTEM_J_4_7_0, AV_CHANNEL_LAYOUT_7POINT1POINT4_BACK },
{ SOUND_SYSTEM_10_2_7_0, AV_CHANNEL_LAYOUT_7POINT1POINT2 },
{ SOUND_SYSTEM_11_2_3_0, AV_CHANNEL_LAYOUT_3POINT1POINT2 },
{ SOUND_SYSTEM_12_0_1_0, AV_CHANNEL_LAYOUT_MONO },
};
void ff_iamf_free_audio_element(IAMFAudioElement **paudio_element)
{
IAMFAudioElement *audio_element = *paudio_element;
if (!audio_element)
return;
for (int i = 0; i < audio_element->nb_substreams; i++)
avcodec_parameters_free(&audio_element->substreams[i].codecpar);
av_free(audio_element->substreams);
av_free(audio_element->layers);
av_iamf_audio_element_free(&audio_element->element);
av_freep(paudio_element);
}
void ff_iamf_free_mix_presentation(IAMFMixPresentation **pmix_presentation)
{
IAMFMixPresentation *mix_presentation = *pmix_presentation;
if (!mix_presentation)
return;
for (int i = 0; i < mix_presentation->count_label; i++)
av_free(mix_presentation->language_label[i]);
av_free(mix_presentation->language_label);
av_iamf_mix_presentation_free(&mix_presentation->mix);
av_freep(pmix_presentation);
}
void ff_iamf_uninit_context(IAMFContext *c)
{
if (!c)
return;
for (int i = 0; i < c->nb_codec_configs; i++) {
av_free(c->codec_configs[i]->extradata);
av_free(c->codec_configs[i]);
}
av_freep(&c->codec_configs);
c->nb_codec_configs = 0;
for (int i = 0; i < c->nb_audio_elements; i++)
ff_iamf_free_audio_element(&c->audio_elements[i]);
av_freep(&c->audio_elements);
c->nb_audio_elements = 0;
for (int i = 0; i < c->nb_mix_presentations; i++)
ff_iamf_free_mix_presentation(&c->mix_presentations[i]);
av_freep(&c->mix_presentations);
c->nb_mix_presentations = 0;
for (int i = 0; i < c->nb_param_definitions; i++)
av_free(c->param_definitions[i]);
av_freep(&c->param_definitions);
c->nb_param_definitions = 0;
}

163
libavformat/iamf.h Normal file
View File

@ -0,0 +1,163 @@
/*
* Immersive Audio Model and Formats common helpers and structs
* Copyright (c) 2023 James Almer <jamrial@gmail.com>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef AVFORMAT_IAMF_H
#define AVFORMAT_IAMF_H
#include <stdint.h>
#include "libavutil/channel_layout.h"
#include "libavutil/iamf.h"
#include "libavcodec/codec_id.h"
#include "libavcodec/codec_par.h"
#include "avformat.h"
#define MAX_IAMF_OBU_HEADER_SIZE (1 + 8 * 3)
// OBU types (section 3.2).
enum IAMF_OBU_Type {
IAMF_OBU_IA_CODEC_CONFIG = 0,
IAMF_OBU_IA_AUDIO_ELEMENT = 1,
IAMF_OBU_IA_MIX_PRESENTATION = 2,
IAMF_OBU_IA_PARAMETER_BLOCK = 3,
IAMF_OBU_IA_TEMPORAL_DELIMITER = 4,
IAMF_OBU_IA_AUDIO_FRAME = 5,
IAMF_OBU_IA_AUDIO_FRAME_ID0 = 6,
IAMF_OBU_IA_AUDIO_FRAME_ID1 = 7,
IAMF_OBU_IA_AUDIO_FRAME_ID2 = 8,
IAMF_OBU_IA_AUDIO_FRAME_ID3 = 9,
IAMF_OBU_IA_AUDIO_FRAME_ID4 = 10,
IAMF_OBU_IA_AUDIO_FRAME_ID5 = 11,
IAMF_OBU_IA_AUDIO_FRAME_ID6 = 12,
IAMF_OBU_IA_AUDIO_FRAME_ID7 = 13,
IAMF_OBU_IA_AUDIO_FRAME_ID8 = 14,
IAMF_OBU_IA_AUDIO_FRAME_ID9 = 15,
IAMF_OBU_IA_AUDIO_FRAME_ID10 = 16,
IAMF_OBU_IA_AUDIO_FRAME_ID11 = 17,
IAMF_OBU_IA_AUDIO_FRAME_ID12 = 18,
IAMF_OBU_IA_AUDIO_FRAME_ID13 = 19,
IAMF_OBU_IA_AUDIO_FRAME_ID14 = 20,
IAMF_OBU_IA_AUDIO_FRAME_ID15 = 21,
IAMF_OBU_IA_AUDIO_FRAME_ID16 = 22,
IAMF_OBU_IA_AUDIO_FRAME_ID17 = 23,
// 24~30 reserved.
IAMF_OBU_IA_SEQUENCE_HEADER = 31,
};
typedef struct IAMFCodecConfig {
unsigned codec_config_id;
enum AVCodecID codec_id;
uint32_t codec_tag;
unsigned nb_samples;
int seek_preroll;
int sample_rate;
int extradata_size;
uint8_t *extradata;
} IAMFCodecConfig;
typedef struct IAMFLayer {
unsigned int substream_count;
unsigned int coupled_substream_count;
} IAMFLayer;
typedef struct IAMFSubStream {
unsigned int audio_substream_id;
// demux
AVCodecParameters *codecpar;
} IAMFSubStream;
typedef struct IAMFAudioElement {
AVIAMFAudioElement *element;
unsigned int audio_element_id;
IAMFSubStream *substreams;
unsigned int nb_substreams;
unsigned int codec_config_id;
// mux
IAMFLayer *layers;
unsigned int nb_layers;
} IAMFAudioElement;
typedef struct IAMFMixPresentation {
AVIAMFMixPresentation *mix;
unsigned int mix_presentation_id;
// demux
unsigned int count_label;
char **language_label;
} IAMFMixPresentation;
typedef struct IAMFParamDefinition {
const IAMFAudioElement *audio_element;
AVIAMFParamDefinition *param;
int mode;
size_t param_size;
} IAMFParamDefinition;
typedef struct IAMFContext {
IAMFCodecConfig **codec_configs;
int nb_codec_configs;
IAMFAudioElement **audio_elements;
int nb_audio_elements;
IAMFMixPresentation **mix_presentations;
int nb_mix_presentations;
IAMFParamDefinition **param_definitions;
int nb_param_definitions;
} IAMFContext;
enum IAMF_Anchor_Element {
IAMF_ANCHOR_ELEMENT_UNKNWONW,
IAMF_ANCHOR_ELEMENT_DIALOGUE,
IAMF_ANCHOR_ELEMENT_ALBUM,
};
enum IAMF_Sound_System {
SOUND_SYSTEM_A_0_2_0 = 0, // "Loudspeaker configuration for Sound System A"
SOUND_SYSTEM_B_0_5_0 = 1, // "Loudspeaker configuration for Sound System B"
SOUND_SYSTEM_C_2_5_0 = 2, // "Loudspeaker configuration for Sound System C"
SOUND_SYSTEM_D_4_5_0 = 3, // "Loudspeaker configuration for Sound System D"
SOUND_SYSTEM_E_4_5_1 = 4, // "Loudspeaker configuration for Sound System E"
SOUND_SYSTEM_F_3_7_0 = 5, // "Loudspeaker configuration for Sound System F"
SOUND_SYSTEM_G_4_9_0 = 6, // "Loudspeaker configuration for Sound System G"
SOUND_SYSTEM_H_9_10_3 = 7, // "Loudspeaker configuration for Sound System H"
SOUND_SYSTEM_I_0_7_0 = 8, // "Loudspeaker configuration for Sound System I"
SOUND_SYSTEM_J_4_7_0 = 9, // "Loudspeaker configuration for Sound System J"
SOUND_SYSTEM_10_2_7_0 = 10, // "Loudspeaker configuration for Sound System I" + Ltf + Rtf
SOUND_SYSTEM_11_2_3_0 = 11, // Front subset of "Loudspeaker configuration for Sound System J"
SOUND_SYSTEM_12_0_1_0 = 12, // Mono
};
struct IAMFSoundSystemMap {
enum IAMF_Sound_System id;
AVChannelLayout layout;
};
extern const AVChannelLayout ff_iamf_scalable_ch_layouts[10];
extern const struct IAMFSoundSystemMap ff_iamf_sound_system_map[13];
void ff_iamf_free_audio_element(IAMFAudioElement **paudio_element);
void ff_iamf_free_mix_presentation(IAMFMixPresentation **pmix_presentation);
void ff_iamf_uninit_context(IAMFContext *c);
#endif /* AVFORMAT_IAMF_H */

1105
libavformat/iamf_parse.c Normal file

File diff suppressed because it is too large Load Diff

38
libavformat/iamf_parse.h Normal file
View File

@ -0,0 +1,38 @@
/*
* Immersive Audio Model and Formats parsing
* Copyright (c) 2023 James Almer <jamrial@gmail.com>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef AVFORMAT_IAMF_PARSE_H
#define AVFORMAT_IAMF_PARSE_H
#include <stdint.h>
#include "libavutil/iamf.h"
#include "avio.h"
#include "iamf.h"
int ff_iamf_parse_obu_header(const uint8_t *buf, int buf_size,
unsigned *obu_size, int *start_pos, enum IAMF_OBU_Type *type,
unsigned *skip_samples, unsigned *discard_padding);
int ff_iamfdec_read_descriptors(IAMFContext *c, AVIOContext *pb,
int size, void *log_ctx);
#endif /* AVFORMAT_IAMF_PARSE_H */

505
libavformat/iamfdec.c Normal file
View File

@ -0,0 +1,505 @@
/*
* Immersive Audio Model and Formats demuxer
* Copyright (c) 2023 James Almer <jamrial@gmail.com>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "config_components.h"
#include "libavutil/avassert.h"
#include "libavutil/iamf.h"
#include "libavutil/intreadwrite.h"
#include "libavutil/log.h"
#include "libavcodec/mathops.h"
#include "avformat.h"
#include "avio_internal.h"
#include "demux.h"
#include "iamf.h"
#include "iamf_parse.h"
#include "internal.h"
typedef struct IAMFDemuxContext {
IAMFContext iamf;
// Packet side data
AVIAMFParamDefinition *mix;
size_t mix_size;
AVIAMFParamDefinition *demix;
size_t demix_size;
AVIAMFParamDefinition *recon;
size_t recon_size;
} IAMFDemuxContext;
static AVStream *find_stream_by_id(AVFormatContext *s, int id)
{
for (int i = 0; i < s->nb_streams; i++)
if (s->streams[i]->id == id)
return s->streams[i];
av_log(s, AV_LOG_ERROR, "Invalid stream id %d\n", id);
return NULL;
}
static int audio_frame_obu(AVFormatContext *s, AVPacket *pkt, int len,
enum IAMF_OBU_Type type,
unsigned skip_samples, unsigned discard_padding,
int id_in_bitstream)
{
const IAMFDemuxContext *const c = s->priv_data;
AVStream *st;
int ret, audio_substream_id;
if (id_in_bitstream) {
unsigned explicit_audio_substream_id;
int64_t pos = avio_tell(s->pb);
explicit_audio_substream_id = ffio_read_leb(s->pb);
len -= avio_tell(s->pb) - pos;
audio_substream_id = explicit_audio_substream_id;
} else
audio_substream_id = type - IAMF_OBU_IA_AUDIO_FRAME_ID0;
st = find_stream_by_id(s, audio_substream_id);
if (!st)
return AVERROR_INVALIDDATA;
ret = av_get_packet(s->pb, pkt, len);
if (ret < 0)
return ret;
if (ret != len)
return AVERROR_INVALIDDATA;
if (skip_samples || discard_padding) {
uint8_t *side_data = av_packet_new_side_data(pkt, AV_PKT_DATA_SKIP_SAMPLES, 10);
if (!side_data)
return AVERROR(ENOMEM);
AV_WL32(side_data, skip_samples);
AV_WL32(side_data + 4, discard_padding);
}
if (c->mix) {
uint8_t *side_data = av_packet_new_side_data(pkt, AV_PKT_DATA_IAMF_MIX_GAIN_PARAM, c->mix_size);
if (!side_data)
return AVERROR(ENOMEM);
memcpy(side_data, c->mix, c->mix_size);
}
if (c->demix) {
uint8_t *side_data = av_packet_new_side_data(pkt, AV_PKT_DATA_IAMF_DEMIXING_INFO_PARAM, c->demix_size);
if (!side_data)
return AVERROR(ENOMEM);
memcpy(side_data, c->demix, c->demix_size);
}
if (c->recon) {
uint8_t *side_data = av_packet_new_side_data(pkt, AV_PKT_DATA_IAMF_RECON_GAIN_INFO_PARAM, c->recon_size);
if (!side_data)
return AVERROR(ENOMEM);
memcpy(side_data, c->recon, c->recon_size);
}
pkt->stream_index = st->index;
return 0;
}
static const IAMFParamDefinition *get_param_definition(AVFormatContext *s, unsigned int parameter_id)
{
const IAMFDemuxContext *const c = s->priv_data;
const IAMFContext *const iamf = &c->iamf;
const IAMFParamDefinition *param_definition = NULL;
for (int i = 0; i < iamf->nb_param_definitions; i++)
if (iamf->param_definitions[i]->param->parameter_id == parameter_id) {
param_definition = iamf->param_definitions[i];
break;
}
return param_definition;
}
static int parameter_block_obu(AVFormatContext *s, int len)
{
IAMFDemuxContext *const c = s->priv_data;
const IAMFParamDefinition *param_definition;
const AVIAMFParamDefinition *param;
AVIAMFParamDefinition *out_param = NULL;
FFIOContext b;
AVIOContext *pb;
uint8_t *buf;
unsigned int duration, constant_subblock_duration;
unsigned int nb_subblocks;
unsigned int parameter_id;
size_t out_param_size;
int ret;
buf = av_malloc(len);
if (!buf)
return AVERROR(ENOMEM);
ret = avio_read(s->pb, buf, len);
if (ret != len) {
if (ret >= 0)
ret = AVERROR_INVALIDDATA;
goto fail;
}
ffio_init_context(&b, buf, len, 0, NULL, NULL, NULL, NULL);
pb = &b.pub;
parameter_id = ffio_read_leb(pb);
param_definition = get_param_definition(s, parameter_id);
if (!param_definition) {
av_log(s, AV_LOG_VERBOSE, "Non existant parameter_id %d referenced in a parameter block. Ignoring\n",
parameter_id);
ret = 0;
goto fail;
}
param = param_definition->param;
if (!param_definition->mode) {
duration = ffio_read_leb(pb);
if (!duration) {
ret = AVERROR_INVALIDDATA;
goto fail;
}
constant_subblock_duration = ffio_read_leb(pb);
if (constant_subblock_duration == 0)
nb_subblocks = ffio_read_leb(pb);
else
nb_subblocks = duration / constant_subblock_duration;
} else {
duration = param->duration;
constant_subblock_duration = param->constant_subblock_duration;
nb_subblocks = param->nb_subblocks;
}
out_param = av_iamf_param_definition_alloc(param->type, nb_subblocks, &out_param_size);
if (!out_param) {
ret = AVERROR(ENOMEM);
goto fail;
}
out_param->parameter_id = param->parameter_id;
out_param->type = param->type;
out_param->parameter_rate = param->parameter_rate;
out_param->duration = duration;
out_param->constant_subblock_duration = constant_subblock_duration;
out_param->nb_subblocks = nb_subblocks;
for (int i = 0; i < nb_subblocks; i++) {
void *subblock = av_iamf_param_definition_get_subblock(out_param, i);
unsigned int subblock_duration = constant_subblock_duration;
if (!param_definition->mode && !constant_subblock_duration)
subblock_duration = ffio_read_leb(pb);
switch (param->type) {
case AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN: {
AVIAMFMixGain *mix = subblock;
mix->animation_type = ffio_read_leb(pb);
if (mix->animation_type > AV_IAMF_ANIMATION_TYPE_BEZIER) {
ret = 0;
av_free(out_param);
goto fail;
}
mix->start_point_value = av_make_q(sign_extend(avio_rb16(pb), 16), 1 << 8);
if (mix->animation_type >= AV_IAMF_ANIMATION_TYPE_LINEAR)
mix->end_point_value = av_make_q(sign_extend(avio_rb16(pb), 16), 1 << 8);
if (mix->animation_type == AV_IAMF_ANIMATION_TYPE_BEZIER) {
mix->control_point_value = av_make_q(sign_extend(avio_rb16(pb), 16), 1 << 8);
mix->control_point_relative_time = av_make_q(avio_r8(pb), 1 << 8);
}
mix->subblock_duration = subblock_duration;
break;
}
case AV_IAMF_PARAMETER_DEFINITION_DEMIXING: {
AVIAMFDemixingInfo *demix = subblock;
demix->dmixp_mode = avio_r8(pb) >> 5;
demix->subblock_duration = subblock_duration;
break;
}
case AV_IAMF_PARAMETER_DEFINITION_RECON_GAIN: {
AVIAMFReconGain *recon = subblock;
const IAMFAudioElement *audio_element = param_definition->audio_element;
const AVIAMFAudioElement *element = audio_element->element;
av_assert0(audio_element && element);
for (int i = 0; i < element->nb_layers; i++) {
const AVIAMFLayer *layer = element->layers[i];
if (layer->flags & AV_IAMF_LAYER_FLAG_RECON_GAIN) {
unsigned int recon_gain_flags = ffio_read_leb(pb);
unsigned int bitcount = 7 + 5 * !!(recon_gain_flags & 0x80);
recon_gain_flags = (recon_gain_flags & 0x7F) | ((recon_gain_flags & 0xFF00) >> 1);
for (int j = 0; j < bitcount; j++) {
if (recon_gain_flags & (1 << j))
recon->recon_gain[i][j] = avio_r8(pb);
}
}
}
recon->subblock_duration = subblock_duration;
break;
}
default:
av_assert0(0);
}
}
len -= avio_tell(pb);
if (len) {
int level = (s->error_recognition & AV_EF_EXPLODE) ? AV_LOG_ERROR : AV_LOG_WARNING;
av_log(s, level, "Underread in parameter_block_obu. %d bytes left at the end\n", len);
}
switch (param->type) {
case AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN:
av_free(c->mix);
c->mix = out_param;
c->mix_size = out_param_size;
break;
case AV_IAMF_PARAMETER_DEFINITION_DEMIXING:
av_free(c->demix);
c->demix = out_param;
c->demix_size = out_param_size;
break;
case AV_IAMF_PARAMETER_DEFINITION_RECON_GAIN:
av_free(c->recon);
c->recon = out_param;
c->recon_size = out_param_size;
break;
default:
av_assert0(0);
}
ret = 0;
fail:
if (ret < 0)
av_free(out_param);
av_free(buf);
return ret;
}
static int iamf_read_packet(AVFormatContext *s, AVPacket *pkt)
{
IAMFDemuxContext *const c = s->priv_data;
uint8_t header[MAX_IAMF_OBU_HEADER_SIZE + AV_INPUT_BUFFER_PADDING_SIZE];
unsigned obu_size;
int ret;
while (1) {
enum IAMF_OBU_Type type;
unsigned skip_samples, discard_padding;
int len, size, start_pos;
if ((ret = ffio_ensure_seekback(s->pb, MAX_IAMF_OBU_HEADER_SIZE)) < 0)
return ret;
size = avio_read(s->pb, header, MAX_IAMF_OBU_HEADER_SIZE);
if (size < 0)
return size;
len = ff_iamf_parse_obu_header(header, size, &obu_size, &start_pos, &type,
&skip_samples, &discard_padding);
if (len < 0) {
av_log(s, AV_LOG_ERROR, "Failed to read obu\n");
return len;
}
avio_seek(s->pb, -(size - start_pos), SEEK_CUR);
if (type >= IAMF_OBU_IA_AUDIO_FRAME && type <= IAMF_OBU_IA_AUDIO_FRAME_ID17)
return audio_frame_obu(s, pkt, obu_size, type,
skip_samples, discard_padding,
type == IAMF_OBU_IA_AUDIO_FRAME);
else if (type == IAMF_OBU_IA_PARAMETER_BLOCK) {
ret = parameter_block_obu(s, obu_size);
if (ret < 0)
return ret;
} else if (type == IAMF_OBU_IA_TEMPORAL_DELIMITER) {
av_freep(&c->mix);
c->mix_size = 0;
av_freep(&c->demix);
c->demix_size = 0;
av_freep(&c->recon);
c->recon_size = 0;
} else {
int64_t offset = avio_skip(s->pb, obu_size);
if (offset < 0) {
ret = offset;
break;
}
}
}
return ret;
}
//return < 0 if we need more data
static int get_score(const uint8_t *buf, int buf_size, enum IAMF_OBU_Type type, int *seq)
{
if (type == IAMF_OBU_IA_SEQUENCE_HEADER) {
if (buf_size < 4 || AV_RB32(buf) != MKBETAG('i','a','m','f'))
return 0;
*seq = 1;
return -1;
}
if (type >= IAMF_OBU_IA_CODEC_CONFIG && type <= IAMF_OBU_IA_TEMPORAL_DELIMITER)
return *seq ? -1 : 0;
if (type >= IAMF_OBU_IA_AUDIO_FRAME && type <= IAMF_OBU_IA_AUDIO_FRAME_ID17)
return *seq ? AVPROBE_SCORE_EXTENSION + 1 : 0;
return 0;
}
static int iamf_probe(const AVProbeData *p)
{
unsigned obu_size;
enum IAMF_OBU_Type type;
int seq = 0, cnt = 0, start_pos;
int ret;
while (1) {
int size = ff_iamf_parse_obu_header(p->buf + cnt, p->buf_size - cnt,
&obu_size, &start_pos, &type,
NULL, NULL);
if (size < 0)
return 0;
ret = get_score(p->buf + cnt + start_pos,
p->buf_size - cnt - start_pos,
type, &seq);
if (ret >= 0)
return ret;
cnt += FFMIN(size, p->buf_size - cnt);
}
return 0;
}
static int iamf_read_header(AVFormatContext *s)
{
IAMFDemuxContext *const c = s->priv_data;
IAMFContext *const iamf = &c->iamf;
int ret;
ret = ff_iamfdec_read_descriptors(iamf, s->pb, INT_MAX, s);
if (ret < 0)
return ret;
for (int i = 0; i < iamf->nb_audio_elements; i++) {
IAMFAudioElement *audio_element = iamf->audio_elements[i];
AVStreamGroup *stg = avformat_stream_group_create(s, AV_STREAM_GROUP_PARAMS_IAMF_AUDIO_ELEMENT, NULL);
if (!stg)
return AVERROR(ENOMEM);
stg->id = audio_element->audio_element_id;
stg->params.iamf_audio_element = audio_element->element;
for (int j = 0; j < audio_element->nb_substreams; j++) {
IAMFSubStream *substream = &audio_element->substreams[j];
AVStream *st = avformat_new_stream(s, NULL);
if (!st)
return AVERROR(ENOMEM);
ret = avformat_stream_group_add_stream(stg, st);
if (ret < 0)
return ret;
ret = avcodec_parameters_copy(st->codecpar, substream->codecpar);
if (ret < 0)
return ret;
st->id = substream->audio_substream_id;
avpriv_set_pts_info(st, 64, 1, st->codecpar->sample_rate);
}
}
for (int i = 0; i < iamf->nb_mix_presentations; i++) {
IAMFMixPresentation *mix_presentation = iamf->mix_presentations[i];
AVStreamGroup *stg = avformat_stream_group_create(s, AV_STREAM_GROUP_PARAMS_IAMF_MIX_PRESENTATION, NULL);
const AVIAMFMixPresentation *mix = mix_presentation->mix;
if (!stg)
return AVERROR(ENOMEM);
stg->id = mix_presentation->mix_presentation_id;
stg->params.iamf_mix_presentation = mix_presentation->mix;
for (int j = 0; j < mix->nb_submixes; j++) {
AVIAMFSubmix *sub_mix = mix->submixes[j];
for (int k = 0; k < sub_mix->nb_elements; k++) {
AVIAMFSubmixElement *submix_element = sub_mix->elements[k];
AVStreamGroup *audio_element = NULL;
for (int l = 0; l < s->nb_stream_groups; l++)
if (s->stream_groups[l]->type == AV_STREAM_GROUP_PARAMS_IAMF_AUDIO_ELEMENT &&
s->stream_groups[l]->id == submix_element->audio_element_id) {
audio_element = s->stream_groups[l];
break;
}
av_assert0(audio_element);
for (int l = 0; l < audio_element->nb_streams; l++) {
ret = avformat_stream_group_add_stream(stg, audio_element->streams[l]);
if (ret < 0 && ret != AVERROR(EEXIST))
return ret;
}
}
}
}
return 0;
}
static int iamf_read_close(AVFormatContext *s)
{
IAMFDemuxContext *const c = s->priv_data;
IAMFContext *const iamf = &c->iamf;
for (int i = 0; i < iamf->nb_audio_elements; i++) {
IAMFAudioElement *audio_element = iamf->audio_elements[i];
audio_element->element = NULL;
}
for (int i = 0; i < iamf->nb_mix_presentations; i++) {
IAMFMixPresentation *mix_presentation = iamf->mix_presentations[i];
mix_presentation->mix = NULL;
}
ff_iamf_uninit_context(&c->iamf);
av_freep(&c->mix);
c->mix_size = 0;
av_freep(&c->demix);
c->demix_size = 0;
av_freep(&c->recon);
c->recon_size = 0;
return 0;
}
const AVInputFormat ff_iamf_demuxer = {
.name = "iamf",
.long_name = NULL_IF_CONFIG_SMALL("Raw Immersive Audio Model and Formats"),
.priv_data_size = sizeof(IAMFDemuxContext),
.flags_internal = FF_FMT_INIT_CLEANUP,
.read_probe = iamf_probe,
.read_header = iamf_read_header,
.read_packet = iamf_read_packet,
.read_close = iamf_read_close,
.extensions = "iamf",
.flags = AVFMT_GENERIC_INDEX | AVFMT_NO_BYTE_SEEK | AVFMT_NOTIMESTAMPS | AVFMT_SHOW_IDS,
};