diff --git a/yt_dlp/downloader/fragment.py b/yt_dlp/downloader/fragment.py index 1cc99a4e92..e3af140fde 100644 --- a/yt_dlp/downloader/fragment.py +++ b/yt_dlp/downloader/fragment.py @@ -329,7 +329,7 @@ class FragmentFD(FileDownloader): 'fragment_index': 0, }) - def download_and_append_fragments(self, ctx, fragments, info_dict, pack_func=None): + def download_and_append_fragments(self, ctx, fragments, info_dict, *, pack_func=None, finish_func=None): fragment_retries = self.params.get('fragment_retries', 0) is_fatal = (lambda idx: idx == 0) if self.params.get('skip_unavailable_fragments', True) else (lambda _: True) if not pack_func: @@ -424,5 +424,8 @@ class FragmentFD(FileDownloader): if not result: return False + if finish_func is not None: + ctx['dest_stream'].write(finish_func()) + ctx['dest_stream'].flush() self._finish_frag_download(ctx, info_dict) return True diff --git a/yt_dlp/downloader/hls.py b/yt_dlp/downloader/hls.py index 9cbd5a584f..9cfc191cbb 100644 --- a/yt_dlp/downloader/hls.py +++ b/yt_dlp/downloader/hls.py @@ -260,29 +260,35 @@ class HlsFD(FragmentFD): block.end += adjust dedup_window = extra_state.setdefault('webvtt_dedup_window', []) - cue = block.as_json - # skip the cue if an identical one appears - # in the window of potential duplicates - # and prune the window of unviable candidates + ready = [] + i = 0 - skip = True + is_new = True while i < len(dedup_window): - window_cue = dedup_window[i] - if window_cue == cue: - break - if window_cue['end'] >= cue['start']: - i += 1 + wcue = dedup_window[i] + wblock = webvtt.CueBlock.from_json(wcue) + i += 1 + if wblock.hinges(block): + wcue['end'] = block.end + is_new = False continue + if wblock == block: + is_new = False + continue + if wblock.end > block.start: + continue + ready.append(wblock) + i -= 1 del dedup_window[i] - else: - skip = False - if skip: - continue + if is_new: + dedup_window.append(block.as_json) + for block in ready: + block.write_into(output) - # add the cue to the window - dedup_window.append(cue) + # we only emit cues once they fall out of the duplicate window + continue elif isinstance(block, webvtt.Magic): # take care of MPEG PES timestamp overflow if block.mpegts is None: @@ -317,6 +323,19 @@ class HlsFD(FragmentFD): block.write_into(output) return output.getvalue().encode('utf-8') + + def fin_fragments(): + dedup_window = extra_state.get('webvtt_dedup_window') + if not dedup_window: + return b'' + + output = io.StringIO() + for cue in dedup_window: + webvtt.CueBlock.from_json(cue).write_into(output) + + return output.getvalue().encode('utf-8') + + self.download_and_append_fragments( + ctx, fragments, info_dict, pack_func=pack_fragment, finish_func=fin_fragments) else: - pack_fragment = None - return self.download_and_append_fragments(ctx, fragments, info_dict, pack_fragment) + return self.download_and_append_fragments(ctx, fragments, info_dict) diff --git a/yt_dlp/webvtt.py b/yt_dlp/webvtt.py index ef55e6459a..eee2a4a2dd 100644 --- a/yt_dlp/webvtt.py +++ b/yt_dlp/webvtt.py @@ -331,6 +331,26 @@ class CueBlock(Block): 'settings': self.settings, } + def __eq__(self, other): + return self.as_json == other.as_json + + @classmethod + def from_json(cls, json): + return cls( + id=json['id'], + start=json['start'], + end=json['end'], + text=json['text'], + settings=json['settings'] + ) + + def hinges(self, other): + if self.text != other.text: + return False + if self.settings != other.settings: + return False + return self.start <= self.end == other.start <= other.end + def parse_fragment(frag_content): """