From 0424e052f83adc422d8a746e3cdc5ab6bc28679e Mon Sep 17 00:00:00 2001 From: Michael Niedermayer Date: Sat, 21 May 2011 03:11:50 +0200 Subject: [PATCH] Merge remote-tracking branch 'ffmpeg-mt/master' * ffmpeg-mt/master: Update todo. h264: add an assert that copied pictures are valid picture pointers valgrind-check: run with 1 and 3 threads h264: When decoding a packet with multiple PPS/SPS, don't start the next thread until all of them have been read Allow some pictures to be released earlier after 51ead6d2c40c5defdd211f435aec49b19f5f6a18 h264: fix slice threading MC reading uninitialized frame edges. Please see ffmpeg-mt for a list of authors of these changes. Conflicts: libavcodec/h264.c mt-work/valgrind-check.sh Signed-off-by: Michael Niedermayer --- libavcodec/h264.c | 55 ++++++++++++++++++++++++++++++--------- libavcodec/mpegvideo.c | 4 +-- mt-work/todo.txt | 14 +++++----- mt-work/valgrind-check.sh | 4 ++- 4 files changed, 55 insertions(+), 22 deletions(-) diff --git a/libavcodec/h264.c b/libavcodec/h264.c index 3068db8d85..3a1f821d04 100644 --- a/libavcodec/h264.c +++ b/libavcodec/h264.c @@ -312,7 +312,6 @@ static void chroma_dc_dct_c(DCTELEM *block){ } #endif - static void free_tables(H264Context *h, int free_rbsp){ int i; H264Context *hx; @@ -612,11 +611,15 @@ av_cold int ff_h264_decode_init(AVCodecContext *avctx){ return 0; } +#define IN_RANGE(a, b, size) (((a) >= (b)) && ((a) < ((b)+(size)))) static void copy_picture_range(Picture **to, Picture **from, int count, MpegEncContext *new_base, MpegEncContext *old_base) { int i; for (i=0; ipicture, sizeof(Picture) * old_base->picture_count) || + !from[i])); to[i] = REBASE_PICTURE(from[i], new_base, old_base); } } @@ -796,8 +799,10 @@ int ff_h264_frame_start(H264Context *h){ * This includes finding the next displayed frame. * * @param h h264 master context + * @param setup_finished enough NALs have been read that we can call + * ff_thread_finish_setup() */ -static void decode_postinit(H264Context *h){ +static void decode_postinit(H264Context *h, int setup_finished){ MpegEncContext * const s = &h->s; Picture *out = s->current_picture_ptr; Picture *cur = s->current_picture_ptr; @@ -809,10 +814,11 @@ static void decode_postinit(H264Context *h){ if (h->next_output_pic) return; if (cur->field_poc[0]==INT_MAX || cur->field_poc[1]==INT_MAX) { - //FIXME this allows the next thread to start once we encounter the first field of a PAFF packet - //This works if the next packet contains the second field. It does not work if both fields are - //in the same packet. - //ff_thread_finish_setup(s->avctx); + //FIXME: if we have two PAFF fields in one packet, we can't start the next thread here. + //If we have one field per packet, we can. The check in decode_nal_units() is not good enough + //to find this yet, so we assume the worst for now. + //if (setup_finished) + // ff_thread_finish_setup(s->avctx); return; } @@ -943,7 +949,8 @@ static void decode_postinit(H264Context *h){ av_log(s->avctx, AV_LOG_DEBUG, "no picture\n"); } - ff_thread_finish_setup(s->avctx); + if (setup_finished) + ff_thread_finish_setup(s->avctx); } static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){ @@ -2310,7 +2317,7 @@ static int decode_slice_header(H264Context *h, H264Context *h0){ } //FIXME: fix draw_edges+PAFF+frame threads - h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE || (!h->sps.frame_mbs_only_flag && s->avctx->active_thread_type&FF_THREAD_FRAME)) ? 0 : 16; + h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE || (!h->sps.frame_mbs_only_flag && s->avctx->active_thread_type)) ? 0 : 16; h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width; if(s->avctx->debug&FF_DEBUG_PICT_INFO){ @@ -2892,10 +2899,13 @@ static void execute_decode_slices(H264Context *h, int context_count){ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){ MpegEncContext * const s = &h->s; AVCodecContext * const avctx= s->avctx; - int buf_index=0; H264Context *hx; ///< thread context - int context_count = 0; - int next_avc= h->is_avc ? 0 : buf_size; + int buf_index; + int context_count; + int next_avc; + int pass = !(avctx->active_thread_type & FF_THREAD_FRAME); + int nals_needed=0; ///< number of NALs that need decoding before the next frame thread starts + int nal_index; h->max_contexts = (HAVE_THREADS && (s->avctx->active_thread_type&FF_THREAD_SLICE)) ? avctx->thread_count : 1; #if 0 @@ -2911,6 +2921,11 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){ ff_h264_reset_sei(h); } + for(;pass <= 1;pass++){ + buf_index = 0; + context_count = 0; + next_avc = h->is_avc ? 0 : buf_size; + nal_index = 0; for(;;){ int consumed; int dst_length; @@ -2969,6 +2984,19 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){ } buf_index += consumed; + nal_index++; + + if(pass == 0) { + // packets can sometimes contain multiple PPS/SPS + // e.g. two PAFF field pictures in one packet, or a demuxer which splits NALs strangely + // if so, when frame threading we can't start the next thread until we've read all of them + switch (hx->nal_unit_type) { + case NAL_SPS: + case NAL_PPS: + nals_needed = nal_index; + } + continue; + } //FIXME do not discard SEI id if(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0) @@ -2998,7 +3026,7 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){ if (h->current_slice == 1) { if(!(s->flags2 & CODEC_FLAG2_CHUNKS)) { - decode_postinit(h); + decode_postinit(h, nal_index >= nals_needed); } if (s->avctx->hwaccel && s->avctx->hwaccel->start_frame(s->avctx, NULL, 0) < 0) @@ -3115,6 +3143,7 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){ goto again; } } + } if(context_count) execute_decode_slices(h, context_count); return buf_index; @@ -3190,7 +3219,7 @@ static int decode_frame(AVCodecContext *avctx, if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){ - if(s->flags2 & CODEC_FLAG2_CHUNKS) decode_postinit(h); + if(s->flags2 & CODEC_FLAG2_CHUNKS) decode_postinit(h, 1); field_end(h, 0); diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c index 6bb35f9f39..0a0a11ebc9 100644 --- a/libavcodec/mpegvideo.c +++ b/libavcodec/mpegvideo.c @@ -316,7 +316,7 @@ int ff_alloc_picture(MpegEncContext *s, Picture *pic, int shared){ s->prev_pict_types[0]= s->dropable ? AV_PICTURE_TYPE_B : s->pict_type; if(pic->age < PREV_PICT_TYPES_BUFFER_SIZE && s->prev_pict_types[pic->age] == AV_PICTURE_TYPE_B) pic->age= INT_MAX; // Skipped MBs in B-frames are quite rare in MPEG-1/2 and it is a bit tricky to skip them anyway. - pic->owner2 = s; + pic->owner2 = NULL; return 0; fail: //for the FF_ALLOCZ_OR_GOTO macro @@ -955,7 +955,7 @@ void ff_release_unused_pictures(MpegEncContext *s, int remove_current) /* release non reference frames */ for(i=0; ipicture_count; i++){ if(s->picture[i].data[0] && !s->picture[i].reference - && s->picture[i].owner2 == s + && (!s->picture[i].owner2 || s->picture[i].owner2 == s) && (remove_current || &s->picture[i] != s->current_picture_ptr) /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){ free_frame_buffer(s, &s->picture[i]); diff --git a/mt-work/todo.txt b/mt-work/todo.txt index 013853e3ae..678d213d8c 100644 --- a/mt-work/todo.txt +++ b/mt-work/todo.txt @@ -1,7 +1,7 @@ Todo -- For other people -- Multithread vp8 or vc1. +- Multithread vc1. - Multithread an intra codec like mjpeg (trivial). - Fix mpeg1 (see below). - Try the first three items under Optimization. @@ -18,11 +18,13 @@ work.) In general testing error paths should be done more. bugs in vsync in ffmpeg.c, which are currently obscuring real failures. h264: -- Files split at the wrong NAL unit don't (and can't) -be decoded with threads (e.g. TS split so PPS is after -the frame, PAFF with two fields in a packet). Scan the -packet at the start of decode and don't finish setup -until all PPS/SPS have been encountered. +- Files that aren't parsed (e.g. mp4) and contain PAFF with two +field pictures in the same packet are not optimal. Modify the +nals_needed check so that the second field's first slice is +considered as needed, then uncomment the FIXME code in decode_postinit. +Ex: http://astrange.ithinksw.net/ffmpeg/mt-samples/PAFF-Chalet-Tire.mp4 +- The conformance sample MR3_TANDBERG_B.264 has problems (allocated picture overflow). +- One 10-bit sample has problems. mpeg4: - Packed B-frames need to be explicitly split up diff --git a/mt-work/valgrind-check.sh b/mt-work/valgrind-check.sh index dc3833abb6..276327a76a 100644 --- a/mt-work/valgrind-check.sh +++ b/mt-work/valgrind-check.sh @@ -1,3 +1,5 @@ #!/bin/bash -valgrind --leak-check=full ./ffmpeg_g -threads 3 -vsync 0 -y -t 30 -i "$1" -an -f framecrc /dev/null \ No newline at end of file +valgrind --track-origins=yes --leak-check=full ./ffmpeg_g -threads 1 -vsync 0 -y -t 30 -i "$1" -an -f null /dev/null + +valgrind --track-origins=yes --leak-check=full ./ffmpeg_g -threads 3 -vsync 0 -y -t 30 -i "$1" -an -f null /dev/null