demux_timeline: add heuristic to fix shifted seeks with separate audio

If you have a EDL stream with separate sources for audio and video
stream (like ytdl_hook now creates), you can get the problem that the
video stream seeks to a different position than audio due to different
key frame granularity.

In particular, if you seek backward, the video might undershoot the seek
target by a lot. Then video will resume from an earlier position than
audio, and the player plays silence. This is annoying.

Fix this by explicitly implementing a heuristic to detect separate
audio/video streams, determining where a video seek ends up, and then
seeking the audio stream to the video destination. This also makes sure
to not seek audio with SEEK_FORWARD, so it will always seek before the
video position. Non-precise seeks still skip audio to the video target,
so this helps with ensuring that audio is present at the final seek
target.

The implementation is very annoying, because the only way to determine
the seek target is to actually read a packet. Thus a 1-packet queue
needs to be added. In theory, we could get the seek target from the
index of the video file (especially if it's mp4), but libavformat does
not have public API that exports this index, so we're stuck with this
roundabout generic method.

Note that this is only for non-precise seeks. If precise seeks are done,
the problem is handled by the frontend by skipping unwanted video
frames. But non-precise seeking should still work. (Personally I prefer
non-precise seek mode by default because they're still significantly
faster.)

It also needs to be said that this is the 4th implementation of this
seek adjustment thing in mpv. The 1st implementation is in the frontend
(look for MPContext.seek_slave). This works only if the external audio
stream is known as such on the frontend level. The 2nd implementation is
in the demuxer level packet cache (top of execute_cache_seek()). This is
similar to code that any demuxer needs to handle non-precise seeks
sufficiently nicely. The 3rd is in demux_mkv.c. Since mkv is an
interleaved format, this implementation mostly consists on trying to
pick index entries for video packets if a video stream is selected.
Maybe these "redundant" implementations could be avoided by exposing
separate streams through the demuxer API (and making them individually
seekable) or something like this, but this is messy and not without
problems for multiple reasons. So for now this commit is the best way to
fix the observed behavior.
This commit is contained in:
wm4 2019-01-11 15:04:43 +01:00
parent 87db2f24e8
commit 7d0e0b3a5c
1 changed files with 80 additions and 11 deletions

View File

@ -72,6 +72,8 @@ struct virtual_source {
bool eof_reached;
double dts; // highest read DTS (or PTS if no DTS available)
bool any_selected; // at least one stream is actually selected
struct demux_packet *next;
};
struct priv {
@ -88,6 +90,8 @@ struct priv {
};
static bool add_tl(struct demuxer *demuxer, struct timeline_par *par);
static bool do_read_next_packet(struct demuxer *demuxer,
struct virtual_source *src);
static void update_slave_stats(struct demuxer *demuxer, struct demuxer *slave)
{
@ -178,6 +182,7 @@ static void reselect_streams(struct demuxer *demuxer)
if (!was_selected && src->any_selected) {
src->eof_reached = false;
src->dts = MP_NOPTS_VALUE;
TA_FREEP(&src->next);
}
}
}
@ -189,6 +194,7 @@ static void close_lazy_segments(struct demuxer *demuxer,
for (int n = 0; n < src->num_segments; n++) {
struct segment *seg = src->segments[n];
if (seg != src->current && seg->d && seg->lazy) {
TA_FREEP(&src->next); // might depend on one of the sub-demuxers
demux_free(seg->d);
seg->d = NULL;
}
@ -249,28 +255,76 @@ static void switch_segment(struct demuxer *demuxer, struct virtual_source *src,
src->eos_packets = 0;
}
static void seek_source(struct demuxer *demuxer, struct virtual_source *src,
double pts, int flags)
{
struct segment *new = src->segments[src->num_segments - 1];
for (int n = 0; n < src->num_segments; n++) {
if (pts < src->segments[n]->end) {
new = src->segments[n];
break;
}
}
switch_segment(demuxer, src, new, pts, flags, false);
src->dts = MP_NOPTS_VALUE;
TA_FREEP(&src->next);
}
static void d_seek(struct demuxer *demuxer, double seek_pts, int flags)
{
struct priv *p = demuxer->priv;
double pts = seek_pts * ((flags & SEEK_FACTOR) ? p->duration : 1);
seek_pts = seek_pts * ((flags & SEEK_FACTOR) ? p->duration : 1);
flags &= SEEK_FORWARD | SEEK_HR;
// The intention is to seek audio streams to the same target as video
// streams if they are separate streams. Video streams usually have more
// coarse keyframe snapping, which could leave video without audio.
struct virtual_source *master = NULL;
bool has_slaves = false;
for (int x = 0; x < p->num_sources; x++) {
struct virtual_source *src = p->sources[x];
struct segment *new = src->segments[src->num_segments - 1];
for (int n = 0; n < src->num_segments; n++) {
if (pts < src->segments[n]->end) {
new = src->segments[n];
break;
bool any_audio = false, any_video = false;
for (int i = 0; i < src->num_streams; i++) {
struct virtual_stream *str = src->streams[i];
if (str->selected) {
if (str->sh->type == STREAM_VIDEO)
any_video = true;
if (str->sh->type == STREAM_AUDIO)
any_audio = true;
}
}
switch_segment(demuxer, src, new, pts, flags, false);
if (any_video)
master = src;
// A true slave stream is audio-only; this also prevents that the master
// stream is considered a slave stream.
if (any_audio && !any_video)
has_slaves = true;
}
src->dts = MP_NOPTS_VALUE;
if (!has_slaves)
master = NULL;
if (master) {
seek_source(demuxer, master, seek_pts, flags);
do_read_next_packet(demuxer, master);
if (master->next && master->next->pts != MP_NOPTS_VALUE) {
// Assume we got a seek target. Actually apply the heuristic.
MP_VERBOSE(demuxer, "adjust seek target from %f to %f\n", seek_pts,
master->next->pts);
seek_pts = master->next->pts;
flags &= ~(unsigned)SEEK_FORWARD;
}
}
for (int x = 0; x < p->num_sources; x++) {
struct virtual_source *src = p->sources[x];
if (src != master)
seek_source(demuxer, src, seek_pts, flags);
}
}
@ -300,8 +354,22 @@ static bool d_read_packet(struct demuxer *demuxer, struct demux_packet **out_pkt
if (!src)
return false;
if (!do_read_next_packet(demuxer, src))
return false;
*out_pkt = src->next;
src->next = NULL;
return true;
}
static bool do_read_next_packet(struct demuxer *demuxer,
struct virtual_source *src)
{
if (src->next)
return 1;
struct segment *seg = src->current;
assert(seg && seg->d);
if (!seg || !seg->d)
return 0;
struct demux_packet *pkt = demux_read_any_packet(seg->d);
if (!pkt || (!src->no_clip && pkt->pts >= seg->end))
@ -387,7 +455,7 @@ static bool d_read_packet(struct demuxer *demuxer, struct demux_packet **out_pkt
src->dts = dts;
pkt->stream = vs->sh->index;
*out_pkt = pkt;
src->next = pkt;
return true;
drop:
@ -558,6 +626,7 @@ static void d_close(struct demuxer *demuxer)
struct virtual_source *src = p->sources[x];
src->current = NULL;
TA_FREEP(&src->next);
close_lazy_segments(demuxer, src);
}