diff --git a/Changelog b/Changelog index a897b9226a..2f1c411454 100644 --- a/Changelog +++ b/Changelog @@ -26,6 +26,7 @@ version : - RTP parser for loss tolerant payload format for MP3 audio (RFC 5219) - RTP parser for AC3 payload format (RFC 4184) - palettegen and paletteuse filters +- VP9 RTP payload format (draft 0) experimental depacketizer version 2.5: diff --git a/MAINTAINERS b/MAINTAINERS index 84302223a1..855984bea4 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -469,6 +469,7 @@ Muxers/Demuxers: rtpdec_h261.*, rtpenc_h261.* Thomas Volkert rtpdec_hevc.*, rtpenc_hevc.* Thomas Volkert rtpdec_asf.* Ronald S. Bultje + rtpdec_vp9.c Thomas Volkert rtpenc_mpv.*, rtpenc_aac.* Martin Storsjo rtsp.c Luca Barbato sbgdec.c Nicolas George diff --git a/libavformat/Makefile b/libavformat/Makefile index 073a42dd45..95a9e2f9ba 100644 --- a/libavformat/Makefile +++ b/libavformat/Makefile @@ -49,6 +49,7 @@ OBJS-$(CONFIG_RTPDEC) += rdt.o \ rtpdec_qt.o \ rtpdec_svq3.o \ rtpdec_vp8.o \ + rtpdec_vp9.o \ rtpdec_xiph.o \ srtp.o OBJS-$(CONFIG_RTPENC_CHAIN) += rtpenc_chain.o rtp.o diff --git a/libavformat/rtpdec.c b/libavformat/rtpdec.c index 550da4a282..895c253a56 100644 --- a/libavformat/rtpdec.c +++ b/libavformat/rtpdec.c @@ -107,6 +107,7 @@ void ff_register_rtp_dynamic_payload_handlers(void) ff_register_dynamic_payload_handler(&ff_theora_dynamic_handler); ff_register_dynamic_payload_handler(&ff_vorbis_dynamic_handler); ff_register_dynamic_payload_handler(&ff_vp8_dynamic_handler); + ff_register_dynamic_payload_handler(&ff_vp9_dynamic_handler); ff_register_dynamic_payload_handler(&gsm_dynamic_handler); ff_register_dynamic_payload_handler(&opus_dynamic_handler); ff_register_dynamic_payload_handler(&realmedia_mp3_dynamic_handler); diff --git a/libavformat/rtpdec_formats.h b/libavformat/rtpdec_formats.h index b38cf1bf5c..b6ff969eef 100644 --- a/libavformat/rtpdec_formats.h +++ b/libavformat/rtpdec_formats.h @@ -69,5 +69,6 @@ extern RTPDynamicProtocolHandler ff_svq3_dynamic_handler; extern RTPDynamicProtocolHandler ff_theora_dynamic_handler; extern RTPDynamicProtocolHandler ff_vorbis_dynamic_handler; extern RTPDynamicProtocolHandler ff_vp8_dynamic_handler; +extern RTPDynamicProtocolHandler ff_vp9_dynamic_handler; #endif /* AVFORMAT_RTPDEC_FORMATS_H */ diff --git a/libavformat/rtpdec_vp9.c b/libavformat/rtpdec_vp9.c new file mode 100644 index 0000000000..33f38300d4 --- /dev/null +++ b/libavformat/rtpdec_vp9.c @@ -0,0 +1,317 @@ +/* + * RTP parser for VP9 payload format (draft version 0) - experimental + * Copyright (c) 2015 Thomas Volkert + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "libavcodec/bytestream.h" + +#include "rtpdec_formats.h" + +#define RTP_VP9_DESC_REQUIRED_SIZE 1 + +struct PayloadContext { + AVIOContext *buf; + uint32_t timestamp; +}; + +static av_cold PayloadContext *vp9_new_context(void) +{ + return av_mallocz(sizeof(PayloadContext)); +} + +static void vp9_free_dyn_buffer(AVIOContext **dyn_buf) +{ + uint8_t *ptr_dyn_buffer; + avio_close_dyn_buf(*dyn_buf, &ptr_dyn_buffer); + av_free(ptr_dyn_buffer); + *dyn_buf = NULL; +} + +static av_cold void vp9_free_context(PayloadContext *data) +{ + av_free(data); +} + +static av_cold int vp9_init(AVFormatContext *ctx, int st_index, + PayloadContext *data) +{ + av_dlog(ctx, "vp9_init() for stream %d\n", st_index); + av_log(ctx, AV_LOG_WARNING, + "RTP/VP9 support is still experimental\n"); + + if (st_index < 0) + return 0; + + ctx->streams[st_index]->need_parsing = AVSTREAM_PARSE_FULL; + + return 0; +} + +static int vp9_handle_packet(AVFormatContext *ctx, PayloadContext *rtp_vp9_ctx, + AVStream *st, AVPacket *pkt, uint32_t *timestamp, + const uint8_t *buf, int len, uint16_t seq, + int flags) +{ + int has_pic_id, has_layer_idc, has_ref_idc, has_ss_data, has_su_data; + av_unused int pic_id = 0, non_key_frame = 0; + av_unused int layer_temporal = -1, layer_spatial = -1, layer_quality = -1; + int ref_fields = 0, has_ref_field_ext_pic_id = 0; + int first_fragment, last_fragment; + int res = 0; + + /* drop data of previous packets in case of non-continuous (lossy) packet stream */ + if (rtp_vp9_ctx->buf && rtp_vp9_ctx->timestamp != *timestamp) { + vp9_free_dyn_buffer(&rtp_vp9_ctx->buf); + } + + /* sanity check for size of input packet: 1 byte payload at least */ + if (len < RTP_VP9_DESC_REQUIRED_SIZE + 1) { + av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet, got %d bytes\n", len); + return AVERROR_INVALIDDATA; + } + + /* + decode the required VP9 payload descriptor according to section 4.2 of the spec.: + + 0 1 2 3 4 5 6 7 + +-+-+-+-+-+-+-+-+ + |I|L|F|B|E|V|U|-| (REQUIRED) + +-+-+-+-+-+-+-+-+ + + I: PictureID present + L: Layer indices present + F: Reference indices present + B: Start of VP9 frame + E: End of picture + V: Scalability Structure (SS) present + U: Scalability Structure Update (SU) present + */ + has_pic_id = buf[0] & 0x80; + has_layer_idc = buf[0] & 0x40; + has_ref_idc = buf[0] & 0x20; + first_fragment = buf[0] & 0x10; + last_fragment = buf[0] & 0x08; + has_ss_data = buf[0] & 0x04; + has_su_data = buf[0] & 0x02; + + /* sanity check for markers: B should always be equal to the RTP M marker */ + if (last_fragment >> 2 != flags & RTP_FLAG_MARKER) { + av_log(ctx, AV_LOG_ERROR, "Invalid combination of B and M marker\n"); + return AVERROR_INVALIDDATA; + } + + /* pass the extensions field */ + buf += RTP_VP9_DESC_REQUIRED_SIZE; + len -= RTP_VP9_DESC_REQUIRED_SIZE; + + /* + decode the 1-byte/2-byte picture ID: + + 0 1 2 3 4 5 6 7 + +-+-+-+-+-+-+-+-+ + I: |M|PICTURE ID | (RECOMMENDED) + +-+-+-+-+-+-+-+-+ + M: | EXTENDED PID | (RECOMMENDED) + +-+-+-+-+-+-+-+-+ + + M: The most significant bit of the first octet is an extension flag. + PictureID: 8 or 16 bits including the M bit. + */ + if (has_pic_id) { + if (len < 1) { + av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n"); + return AVERROR_INVALIDDATA; + } + + /* check for 1-byte or 2-byte picture index */ + if (buf[0] & 0x80) { + if (len < 2) { + av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n"); + return AVERROR_INVALIDDATA; + } + pic_id = AV_RB16(buf) & 0x7fff; + buf += 2; + len -= 2; + } else { + pic_id = buf[0] & 0x7f; + buf++; + len--; + } + } + + /* + decode layer indices + + 0 1 2 3 4 5 6 7 + +-+-+-+-+-+-+-+-+ + L: | T | S | Q | R | (CONDITIONALLY RECOMMENDED) + +-+-+-+-+-+-+-+-+ + + T, S and Q are 2-bit indices for temporal, spatial, and quality layers. + If "F" is set in the initial octet, R is 2 bits representing the number + of reference fields this frame refers to. + */ + if (has_layer_idc) { + if (len < 1) { + av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet"); + return AVERROR_INVALIDDATA; + } + layer_temporal = buf[0] & 0xC0; + layer_spatial = buf[0] & 0x30; + layer_quality = buf[0] & 0x0C; + if (has_ref_idc) { + ref_fields = buf[0] & 0x03; + if (ref_fields) + non_key_frame = 1; + } + buf++; + len--; + } + + /* + decode the reference fields + + 0 1 2 3 4 5 6 7 + +-+-+-+-+-+-+-+-+ -\ + F: | PID |X| RS| RQ| (OPTIONAL) . + +-+-+-+-+-+-+-+-+ . - R times + X: | EXTENDED PID | (OPTIONAL) . + +-+-+-+-+-+-+-+-+ -/ + + PID: The relative Picture ID referred to by this frame. + RS and RQ: The spatial and quality layer IDs. + X: 1 if this layer index has an extended relative Picture ID. + */ + if (has_ref_idc) { + while (ref_fields) { + if (len < 1) { + av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n"); + return AVERROR_INVALIDDATA; + } + + has_ref_field_ext_pic_id = buf[0] & 0x10; + + /* pass ref. field */ + if (has_ref_field_ext_pic_id) { + if (len < 2) { + av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n"); + return AVERROR_INVALIDDATA; + } + + /* ignore ref. data */ + + buf += 2; + len -= 2; + } else { + + /* ignore ref. data */ + + buf++; + len--; + } + ref_fields--; + } + } + + /* + decode the scalability structure (SS) + + 0 1 2 3 4 5 6 7 + +-+-+-+-+-+-+-+-+ + V: | PATTERN LENGTH| + +-+-+-+-+-+-+-+-+ -\ + | T | S | Q | R | (OPTIONAL) . + +-+-+-+-+-+-+-+-+ -\ . + | PID |X| RS| RQ| (OPTIONAL) . . - PAT. LEN. times + +-+-+-+-+-+-+-+-+ . - R times . + X: | EXTENDED PID | (OPTIONAL) . . + +-+-+-+-+-+-+-+-+ -/ -/ + + PID: The relative Picture ID referred to by this frame. + RS and RQ: The spatial and quality layer IDs. + X: 1 if this layer index has an extended relative Picture ID. + */ + if (has_ss_data) { + avpriv_report_missing_feature(ctx, "VP9 scalability structure data\n"); + return AVERROR_PATCHWELCOME; + } + + /* + decode the scalability update structure (SU) + + spec. is tbd + */ + if (has_su_data) { + avpriv_report_missing_feature(ctx, "VP9 scalability update structure data\n"); + return AVERROR_PATCHWELCOME; + } + + /* + decode the VP9 payload header + + spec. is tbd + */ + //XXX: implement when specified + + /* sanity check: 1 byte payload as minimum */ + if (len < 1) { + av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n"); + return AVERROR_INVALIDDATA; + } + + /* start frame buffering with new dynamic buffer */ + if (!rtp_vp9_ctx->buf) { + /* sanity check: a new frame should have started */ + if (first_fragment) { + res = avio_open_dyn_buf(&rtp_vp9_ctx->buf); + if (res < 0) + return res; + /* update the timestamp in the frame packet with the one from the RTP packet */ + rtp_vp9_ctx->timestamp = *timestamp; + } else { + /* frame not started yet, need more packets */ + return AVERROR(EAGAIN); + } + } + + /* write the fragment to the dyn. buffer */ + avio_write(rtp_vp9_ctx->buf, buf, len); + + /* do we need more fragments? */ + if (!last_fragment) + return AVERROR(EAGAIN); + + /* close frame buffering and create resulting A/V packet */ + res = ff_rtp_finalize_packet(pkt, &rtp_vp9_ctx->buf, st->index); + if (res < 0) + return res; + + return 0; +} + +RTPDynamicProtocolHandler ff_vp9_dynamic_handler = { + .enc_name = "VP9", + .codec_type = AVMEDIA_TYPE_VIDEO, + .codec_id = AV_CODEC_ID_VP9, + .init = vp9_init, + .alloc = vp9_new_context, + .free = vp9_free_context, + .parse_packet = vp9_handle_packet +};