subtitles: introduce ASS codec id and use it.

Currently, we have a AV_CODEC_ID_SSA, which matches the way the ASS/SSA
markup is muxed in a standalone .ass/.ssa file. This means the AVPacket
data starts with a "Dialogue:" string, followed by a timing information
(start and end of the event as string) and a trailing CRLF after each
line. One packet can contain several lines. We'll refer to this layout
as "SSA" or "SSA lines".

In matroska, this markup is not stored as such: it has no "Dialogue:"
prefix, it contains a ReadOrder field, the timing information is not in
the payload, and it doesn't contain the trailing CRLF. See [1] for more
info. We'll refer to this layout as "ASS".

Since we have only one common codec for both formats, the matroska
demuxer is constructing an AVPacket following the "SSA lines" format.
This causes several problems, so it was decided to change this into
clean ASS packets.

Some insight about what is changed or unchanged in this commit:

  CODECS
  ------

  - the decoding process still writes "SSA lines" markup inside the ass
    fields of the subtitles rectangles (sub->rects[n]->ass), which is
    still the current common way of representing decoded subtitles
    markup. It is meant to change later.

  - new ASS codec id: AV_CODEC_ID_ASS (which is different from the
    legacy AV_CODEC_ID_SSA)

  - lavc/assdec: the "ass" decoder is renamed into "ssa" (instead of
    "ass") for consistency with the codec id and allows to add a real
    ass decoder. This ass decoder receives clean ASS lines (so it starts
    with a ReadOrder, is followed by the Layer, etc). We make sure this
    is decoded properly in a new ass-line rectangle of the decoded
    subtitles (the ssa decoder OTOH is doing a simple straightforward
    copy). Using the packet timing instead of data string makes sure the
    ass-line now contains the appropriate timing.

  - lavc/assenc: just like the ass decoder, the "ssa" encoder is renamed
    into "ssa" (instead of "ass") for consistency with the codec id, and
    allows to add a real "ass" encoder.

    One important thing about this encoder is that it only supports one
    ass rectangle: we could have put several dialogue events in the
    AVPacket (separated by a \0 for instance) but this would have cause
    trouble for the muxer which needs not only the start time, but also
    the duration: typically, you have merged events with the same start
    time (stored in the AVPacket->pts) but a different duration. At the
    moment, only the matroska do the merge with the SSA-line codec.

    We will need to make sure all the decoders in the future can't add
    more than one rectangle (and only one Dialogue line in it
    obviously).

  FORMATS
  -------

  - lavf/assenc: the .ass/.ssa muxer can take both SSA and ASS packets.
    In the case of ASS packets as input, it adds the timing based on the
    AVPacket pts and duration, and mux it with "Dialogue:", trailing
    CRLF, etc.

  - lavf/assdec: unchanged; it currently still only outputs SSA-lines
    packets.

  - lavf/mkv: the demuxer can now output ASS packets without the need of
    any "SSA-lines" reconstruction hack. It will become the default at
    next libavformat bump, and the SSA support will be dropped from the
    demuxer. The muxer can take ASS packets since it's muxed normally,
    and still supports the old SSA packets. All the SSA support and
    hacks in Matroska code will be dropped at next lavf bump.

[1]: http://www.matroska.org/technical/specs/subtitles/ssa.html
This commit is contained in:
Clément Bœsch 2013-01-03 03:06:43 +01:00
parent 5b9675b5ac
commit 7c1a002c78
16 changed files with 208 additions and 23 deletions

View File

@ -25,6 +25,8 @@ version <next>:
- inverse telecine filters (fieldmatch and decimate)
- colorbalance filter
- colorchannelmixer filter
- The matroska demuxer can now output proper verbatim ASS packets. It will
become the default at the next libavformat major bump.
version 1.2:

View File

@ -15,6 +15,10 @@ libavutil: 2012-10-22
API changes, most recent first:
2013-04-18 - xxxxxxx - lavf 55.3.100
The matroska demuxer can now output proper verbatim ASS packets. It will
become the default starting lavf 56.0.100.
2013-04-10 - xxxxxxx - lavu 25.26.100 - avutil.h,opt.h
Add av_int_list_length()
and av_opt_set_int_list().

View File

@ -110,6 +110,8 @@ OBJS-$(CONFIG_AMV_ENCODER) += mjpegenc.o mjpeg.o \
OBJS-$(CONFIG_ANM_DECODER) += anm.o
OBJS-$(CONFIG_ANSI_DECODER) += ansi.o cga_data.o
OBJS-$(CONFIG_APE_DECODER) += apedec.o
OBJS-$(CONFIG_SSA_DECODER) += assdec.o ass.o ass_split.o
OBJS-$(CONFIG_SSA_ENCODER) += assenc.o ass.o
OBJS-$(CONFIG_ASS_DECODER) += assdec.o ass.o ass_split.o
OBJS-$(CONFIG_ASS_ENCODER) += assenc.o ass.o
OBJS-$(CONFIG_ASV1_DECODER) += asvdec.o asv.o mpeg12data.o

View File

@ -449,6 +449,7 @@ void avcodec_register_all(void)
REGISTER_DECODER(VIMA, vima);
/* subtitles */
REGISTER_ENCDEC (SSA, ssa);
REGISTER_ENCDEC (ASS, ass);
REGISTER_ENCDEC (DVBSUB, dvbsub);
REGISTER_ENCDEC (DVDSUB, dvdsub);

View File

@ -85,17 +85,35 @@ int ff_ass_add_rect(AVSubtitle *sub, const char *dialog,
AVSubtitleRect **rects;
av_bprint_init(&buf, 0, AV_BPRINT_SIZE_UNLIMITED);
if (!raw) {
av_bprintf(&buf, "Dialogue: 0,");
if (!raw || raw == 2) {
long int layer = 0;
if (raw == 2) {
/* skip ReadOrder */
dialog = strchr(dialog, ',');
if (!dialog)
return AVERROR_INVALIDDATA;
dialog++;
/* extract Layer or Marked */
layer = strtol(dialog, (char**)&dialog, 10);
if (*dialog != ',')
return AVERROR_INVALIDDATA;
dialog++;
}
av_bprintf(&buf, "Dialogue: %ld,", layer);
insert_ts(&buf, ts_start);
insert_ts(&buf, duration == -1 ? -1 : ts_start + duration);
av_bprintf(&buf, "Default,");
if (raw != 2)
av_bprintf(&buf, "Default,");
}
dlen = strcspn(dialog, "\n");
dlen += dialog[dlen] == '\n';
av_bprintf(&buf, "%.*s", dlen, dialog);
if (raw == 2)
av_bprintf(&buf, "\r\n");
if (!av_bprint_is_complete(&buf))
return AVERROR(ENOMEM);

View File

@ -76,7 +76,9 @@ int ff_ass_subtitle_header_default(AVCodecContext *avctx);
* @param ts_start start timestamp for this dialog (in 1/100 second unit)
* @param duration duration for this dialog (in 1/100 second unit), can be -1
* to last until the end of the presentation
* @param raw when set to 1, it indicates that dialog contains a whole ASS
* @param raw when set to 2, it indicates that dialog contains an ASS
* dialog line as muxed in Matroska
* when set to 1, it indicates that dialog contains a whole SSA
* dialog line which should be copied as is.
* when set to 0, it indicates that dialog contains only the Text
* part of the ASS dialog line, the rest of the line

View File

@ -41,7 +41,15 @@ static av_cold int ass_decode_init(AVCodecContext *avctx)
return 0;
}
static int ass_decode_frame(AVCodecContext *avctx, void *data, int *got_sub_ptr,
static int ass_decode_close(AVCodecContext *avctx)
{
ff_ass_split_free(avctx->priv_data);
avctx->priv_data = NULL;
return 0;
}
#if CONFIG_SSA_DECODER
static int ssa_decode_frame(AVCodecContext *avctx, void *data, int *got_sub_ptr,
AVPacket *avpkt)
{
const char *ptr = avpkt->data;
@ -64,19 +72,49 @@ static int ass_decode_frame(AVCodecContext *avctx, void *data, int *got_sub_ptr,
return avpkt->size;
}
static int ass_decode_close(AVCodecContext *avctx)
{
ff_ass_split_free(avctx->priv_data);
avctx->priv_data = NULL;
return 0;
}
AVCodec ff_ass_decoder = {
.name = "ass",
AVCodec ff_ssa_decoder = {
.name = "ssa",
.long_name = NULL_IF_CONFIG_SMALL("SSA (SubStation Alpha) subtitle"),
.type = AVMEDIA_TYPE_SUBTITLE,
.id = AV_CODEC_ID_SSA,
.init = ass_decode_init,
.decode = ssa_decode_frame,
.close = ass_decode_close,
};
#endif
#if CONFIG_ASS_DECODER
static int ass_decode_frame(AVCodecContext *avctx, void *data, int *got_sub_ptr,
AVPacket *avpkt)
{
int ret;
AVSubtitle *sub = data;
const char *ptr = avpkt->data;
static const AVRational ass_tb = {1, 100};
const int ts_start = av_rescale_q(avpkt->pts, avctx->time_base, ass_tb);
const int ts_duration = av_rescale_q(avpkt->duration, avctx->time_base, ass_tb);
if (avpkt->size <= 0)
return avpkt->size;
ret = ff_ass_add_rect(sub, ptr, ts_start, ts_duration, 2);
if (ret < 0) {
if (ret == AVERROR_INVALIDDATA)
av_log(avctx, AV_LOG_ERROR, "Invalid ASS packet\n");
return ret;
}
*got_sub_ptr = avpkt->size > 0;
return avpkt->size;
}
AVCodec ff_ass_decoder = {
.name = "ass",
.long_name = NULL_IF_CONFIG_SMALL("ASS (Advanced SubStation Alpha) subtitle"),
.type = AVMEDIA_TYPE_SUBTITLE,
.id = AV_CODEC_ID_ASS,
.init = ass_decode_init,
.decode = ass_decode_frame,
.close = ass_decode_close,
};
#endif

View File

@ -22,10 +22,16 @@
#include <string.h>
#include "avcodec.h"
#include "ass_split.h"
#include "ass.h"
#include "libavutil/avstring.h"
#include "libavutil/internal.h"
#include "libavutil/mem.h"
typedef struct {
int id; ///< current event id, ReadOrder field
} ASSEncodeContext;
static av_cold int ass_encode_init(AVCodecContext *avctx)
{
avctx->extradata = av_malloc(avctx->subtitle_header_size + 1);
@ -41,15 +47,47 @@ static int ass_encode_frame(AVCodecContext *avctx,
unsigned char *buf, int bufsize,
const AVSubtitle *sub)
{
ASSEncodeContext *s = avctx->priv_data;
int i, len, total_len = 0;
for (i=0; i<sub->num_rects; i++) {
char ass_line[2048];
const char *ass = sub->rects[i]->ass;
if (sub->rects[i]->type != SUBTITLE_ASS) {
av_log(avctx, AV_LOG_ERROR, "Only SUBTITLE_ASS type supported.\n");
return -1;
}
len = av_strlcpy(buf+total_len, sub->rects[i]->ass, bufsize-total_len);
if (strncmp(ass, "Dialogue: ", 10)) {
av_log(avctx, AV_LOG_ERROR, "AVSubtitle rectangle ass \"%s\""
" does not look like a SSA markup\n", ass);
return AVERROR_INVALIDDATA;
}
if (avctx->codec->id == AV_CODEC_ID_ASS) {
long int layer;
char *p;
if (i > 0) {
av_log(avctx, AV_LOG_ERROR, "ASS encoder supports only one "
"ASS rectangle field.\n");
return AVERROR_INVALIDDATA;
}
ass += 10; // skip "Dialogue: "
/* parse Layer field. If it's a Marked field, the content
* will be "Marked=N" instead of the layer num, so we will
* have layer=0, which is fine. */
layer = strtol(ass, &p, 10);
if (*p) p += strcspn(p, ",") + 1; // skip layer or marked
if (*p) p += strcspn(p, ",") + 1; // skip start timestamp
if (*p) p += strcspn(p, ",") + 1; // skip end timestamp
snprintf(ass_line, sizeof(ass_line), "%d,%ld,%s", ++s->id, layer, p);
ass_line[strcspn(ass_line, "\r\n")] = 0;
ass = ass_line;
}
len = av_strlcpy(buf+total_len, ass, bufsize-total_len);
if (len > bufsize-total_len-1) {
av_log(avctx, AV_LOG_ERROR, "Buffer too small for ASS event.\n");
@ -62,11 +100,26 @@ static int ass_encode_frame(AVCodecContext *avctx,
return total_len;
}
AVCodec ff_ass_encoder = {
.name = "ass",
#if CONFIG_SSA_ENCODER
AVCodec ff_ssa_encoder = {
.name = "ssa",
.long_name = NULL_IF_CONFIG_SMALL("SSA (SubStation Alpha) subtitle"),
.type = AVMEDIA_TYPE_SUBTITLE,
.id = AV_CODEC_ID_SSA,
.init = ass_encode_init,
.encode_sub = ass_encode_frame,
.priv_data_size = sizeof(ASSEncodeContext),
};
#endif
#if CONFIG_ASS_ENCODER
AVCodec ff_ass_encoder = {
.name = "ass",
.long_name = NULL_IF_CONFIG_SMALL("ASS (Advanced SubStation Alpha) subtitle"),
.type = AVMEDIA_TYPE_SUBTITLE,
.id = AV_CODEC_ID_ASS,
.init = ass_encode_init,
.encode_sub = ass_encode_frame,
.priv_data_size = sizeof(ASSEncodeContext),
};
#endif

View File

@ -474,6 +474,7 @@ enum AVCodecID {
AV_CODEC_ID_MPL2 = MKBETAG('M','P','L','2'),
AV_CODEC_ID_VPLAYER = MKBETAG('V','P','l','r'),
AV_CODEC_ID_PJS = MKBETAG('P','h','J','S'),
AV_CODEC_ID_ASS = MKBETAG('A','S','S',' '), ///< ASS as defined in Matroska
/* other specific kind of codecs (generally used for attachments) */
AV_CODEC_ID_FIRST_UNKNOWN = 0x18000, ///< A dummy ID pointing at the start of various fake codecs.

View File

@ -2402,11 +2402,17 @@ static const AVCodecDescriptor codec_descriptors[] = {
.long_name = NULL_IF_CONFIG_SMALL("XSUB"),
.props = AV_CODEC_PROP_BITMAP_SUB,
},
{
.id = AV_CODEC_ID_ASS,
.type = AVMEDIA_TYPE_SUBTITLE,
.name = "ass",
.long_name = NULL_IF_CONFIG_SMALL("ASS (Advanced SSA) subtitle"),
},
{
.id = AV_CODEC_ID_SSA,
.type = AVMEDIA_TYPE_SUBTITLE,
.name = "ssa",
.long_name = NULL_IF_CONFIG_SMALL("SSA (SubStation Alpha) / ASS (Advanced SSA) subtitle"),
.long_name = NULL_IF_CONFIG_SMALL("SSA (SubStation Alpha) subtitle"),
},
{
.id = AV_CODEC_ID_MOV_TEXT,

View File

@ -29,7 +29,7 @@
#include "libavutil/avutil.h"
#define LIBAVCODEC_VERSION_MAJOR 55
#define LIBAVCODEC_VERSION_MINOR 2
#define LIBAVCODEC_VERSION_MINOR 3
#define LIBAVCODEC_VERSION_MICRO 100
#define LIBAVCODEC_VERSION_INT AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \

View File

@ -20,9 +20,11 @@
*/
#include "avformat.h"
#include "internal.h"
typedef struct ASSContext{
unsigned int extra_index;
int write_ts; // 0: ssa (timing in payload), 1: ass (matroska like)
}ASSContext;
static int write_header(AVFormatContext *s)
@ -31,10 +33,13 @@ static int write_header(AVFormatContext *s)
AVCodecContext *avctx= s->streams[0]->codec;
uint8_t *last= NULL;
if(s->nb_streams != 1 || avctx->codec_id != AV_CODEC_ID_SSA){
if (s->nb_streams != 1 || (avctx->codec_id != AV_CODEC_ID_SSA &&
avctx->codec_id != AV_CODEC_ID_ASS)) {
av_log(s, AV_LOG_ERROR, "Exactly one ASS/SSA stream is needed.\n");
return -1;
}
ass->write_ts = avctx->codec_id == AV_CODEC_ID_ASS;
avpriv_set_pts_info(s->streams[0], 64, 1, 100);
while(ass->extra_index < avctx->extradata_size){
uint8_t *p = avctx->extradata + ass->extra_index;
@ -57,7 +62,31 @@ static int write_header(AVFormatContext *s)
static int write_packet(AVFormatContext *s, AVPacket *pkt)
{
avio_write(s->pb, pkt->data, pkt->size);
ASSContext *ass = s->priv_data;
if (ass->write_ts) {
long int layer;
char *p;
int64_t start = pkt->pts;
int64_t end = start + pkt->duration;
int hh1, mm1, ss1, ms1;
int hh2, mm2, ss2, ms2;
p = pkt->data + strcspn(pkt->data, ",") + 1; // skip ReadOrder
layer = strtol(p, &p, 10);
if (*p == ',')
p++;
hh1 = (int)(start / 360000); mm1 = (int)(start / 6000) % 60;
hh2 = (int)(end / 360000); mm2 = (int)(end / 6000) % 60;
ss1 = (int)(start / 100) % 60; ms1 = (int)(start % 100);
ss2 = (int)(end / 100) % 60; ms2 = (int)(end % 100);
if (hh1 > 9) hh1 = 9, mm1 = 59, ss1 = 59, ms1 = 99;
if (hh2 > 9) hh2 = 9, mm2 = 59, ss2 = 59, ms2 = 99;
avio_printf(s->pb, "Dialogue: %ld,%d:%02d:%02d.%02d,%d:%02d:%02d.%02d,%s\r\n",
layer, hh1, mm1, ss1, ms1, hh2, mm2, ss2, ms2, p);
} else {
avio_write(s->pb, pkt->data, pkt->size);
}
return 0;
}

View File

@ -61,10 +61,16 @@ const CodecTags ff_mkv_codec_tags[]={
{"S_TEXT/UTF8" , AV_CODEC_ID_TEXT},
{"S_TEXT/UTF8" , AV_CODEC_ID_SRT},
{"S_TEXT/ASCII" , AV_CODEC_ID_TEXT},
#if FF_API_ASS_SSA
{"S_TEXT/ASS" , AV_CODEC_ID_SSA},
{"S_TEXT/SSA" , AV_CODEC_ID_SSA},
{"S_ASS" , AV_CODEC_ID_SSA},
{"S_SSA" , AV_CODEC_ID_SSA},
#endif
{"S_TEXT/ASS" , AV_CODEC_ID_ASS},
{"S_TEXT/SSA" , AV_CODEC_ID_ASS},
{"S_ASS" , AV_CODEC_ID_ASS},
{"S_SSA" , AV_CODEC_ID_ASS},
{"S_VOBSUB" , AV_CODEC_ID_DVD_SUBTITLE},
{"S_DVBSUB" , AV_CODEC_ID_DVB_SUBTITLE},
{"S_HDMV/PGS" , AV_CODEC_ID_HDMV_PGS_SUBTITLE},

View File

@ -1213,6 +1213,7 @@ static int matroska_decode_buffer(uint8_t** buf, int* buf_size,
return result;
}
#if FF_API_ASS_SSA
static void matroska_fix_ass_packet(MatroskaDemuxContext *matroska,
AVPacket *pkt, uint64_t display_duration)
{
@ -1259,6 +1260,7 @@ static int matroska_merge_packets(AVPacket *out, AVPacket *in)
av_free(in);
return 0;
}
#endif
static void matroska_convert_tag(AVFormatContext *s, EbmlList *list,
AVDictionary **metadata, char *prefix)
@ -1859,7 +1861,12 @@ static int matroska_read_header(AVFormatContext *s)
st->need_parsing = AVSTREAM_PARSE_HEADERS;
} else if (track->type == MATROSKA_TRACK_TYPE_SUBTITLE) {
st->codec->codec_type = AVMEDIA_TYPE_SUBTITLE;
if (st->codec->codec_id == AV_CODEC_ID_SSA)
#if FF_API_ASS_SSA
if (st->codec->codec_id == AV_CODEC_ID_SSA ||
st->codec->codec_id == AV_CODEC_ID_ASS)
#else
if (st->codec->codec_id == AV_CODEC_ID_ASS)
#endif
matroska->contains_ssa = 1;
}
}
@ -2221,6 +2228,7 @@ static int matroska_parse_frame(MatroskaDemuxContext *matroska,
pkt->duration = lace_duration;
}
#if FF_API_ASS_SSA
if (st->codec->codec_id == AV_CODEC_ID_SSA)
matroska_fix_ass_packet(matroska, pkt, lace_duration);
@ -2234,6 +2242,10 @@ static int matroska_parse_frame(MatroskaDemuxContext *matroska,
dynarray_add(&matroska->packets,&matroska->num_packets,pkt);
matroska->prev_pkt = pkt;
}
#else
dynarray_add(&matroska->packets, &matroska->num_packets, pkt);
matroska->prev_pkt = pkt;
#endif
return 0;
}

View File

@ -1072,6 +1072,7 @@ static int ass_get_duration(const uint8_t *p)
return end - start;
}
#if FF_API_ASS_SSA
static int mkv_write_ass_blocks(AVFormatContext *s, AVIOContext *pb, AVPacket *pkt)
{
MatroskaMuxContext *mkv = s->priv_data;
@ -1116,6 +1117,7 @@ static int mkv_write_ass_blocks(AVFormatContext *s, AVIOContext *pb, AVPacket *p
return max_duration;
}
#endif
static void mkv_write_block(AVFormatContext *s, AVIOContext *pb,
unsigned int blockid, AVPacket *pkt, int flags)
@ -1236,8 +1238,10 @@ static int mkv_write_packet_internal(AVFormatContext *s, AVPacket *pkt)
if (codec->codec_type != AVMEDIA_TYPE_SUBTITLE) {
mkv_write_block(s, pb, MATROSKA_ID_SIMPLEBLOCK, pkt, keyframe << 7);
#if FF_API_ASS_SSA
} else if (codec->codec_id == AV_CODEC_ID_SSA) {
duration = mkv_write_ass_blocks(s, pb, pkt);
#endif
} else if (codec->codec_id == AV_CODEC_ID_SRT) {
duration = mkv_write_srt_blocks(s, pb, pkt);
} else {
@ -1418,7 +1422,11 @@ AVOutputFormat ff_matroska_muxer = {
ff_codec_bmp_tags, ff_codec_wav_tags,
additional_audio_tags, additional_video_tags, 0
},
#if FF_API_ASS_SSA
.subtitle_codec = AV_CODEC_ID_SSA,
#else
.subtitle_codec = AV_CODEC_ID_ASS,
#endif
.query_codec = mkv_query_codec,
};
#endif

View File

@ -30,7 +30,7 @@
#include "libavutil/avutil.h"
#define LIBAVFORMAT_VERSION_MAJOR 55
#define LIBAVFORMAT_VERSION_MINOR 2
#define LIBAVFORMAT_VERSION_MINOR 3
#define LIBAVFORMAT_VERSION_MICRO 100
#define LIBAVFORMAT_VERSION_INT AV_VERSION_INT(LIBAVFORMAT_VERSION_MAJOR, \
@ -73,6 +73,9 @@
#ifndef FF_API_READ_PACKET
#define FF_API_READ_PACKET (LIBAVFORMAT_VERSION_MAJOR < 56)
#endif
#ifndef FF_API_ASS_SSA
#define FF_API_ASS_SSA (LIBAVFORMAT_VERSION_MAJOR < 56)
#endif
#ifndef FF_API_R_FRAME_RATE
#define FF_API_R_FRAME_RATE 1
#endif