1
mirror of https://github.com/mpv-player/mpv synced 2025-01-13 00:06:25 +01:00
mpv/subreader.c
arpi d200725e88 This patch is to add support for overlapping subtitles, that is
subtitles whose start or end happens during another subtitle.
After reading the subtitles from the file in sub_read_file(),
this patch looks for overlapping subtitles and split
them into more non-overlapping subtitles.

Salvatore Falco <sfalco@studenti.ing.uniroma1.it>


git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@7984 b3059339-0415-0410-9bf9-f77b7e298cf2
2002-10-30 19:26:05 +00:00

1410 lines
36 KiB
C

/*
* Subtitle reader with format autodetection
*
* Written by laaz
* Some code cleanup & realloc() by A'rpi/ESP-team
* dunnowhat sub format by szabi
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include "config.h"
#include "mp_msg.h"
#include "subreader.h"
#define ERR ((void *) -1)
#ifdef USE_ICONV
#include <iconv.h>
char *sub_cp=NULL;
#endif
/* Maximal length of line of a subtitle */
#define LINE_LEN 1000
static float mpsub_position=0;
int sub_uses_time=0;
int sub_errs=0;
int sub_num=0; // number of subtitle structs
int sub_slacktime=2000; // 20 seconds
/* Use the SUB_* constant defined in the header file */
int sub_format=SUB_INVALID;
static int eol(char p) {
return (p=='\r' || p=='\n' || p=='\0');
}
/* Remove leading and trailing space */
static void trail_space(char *s) {
int i = 0;
while (isspace(s[i])) ++i;
if (i) strcpy(s, s + i);
i = strlen(s) - 1;
while (i > 0 && isspace(s[i])) s[i--] = '\0';
}
subtitle *sub_read_line_sami(FILE *fd, subtitle *current) {
static char line[LINE_LEN+1];
static char *s = NULL, *slacktime_s;
char text[LINE_LEN+1], *p=NULL, *q;
int state;
current->lines = current->start = current->end = 0;
state = 0;
/* read the first line */
if (!s)
if (!(s = fgets(line, LINE_LEN, fd))) return 0;
do {
switch (state) {
case 0: /* find "START=" or "Slacktime:" */
slacktime_s = strstr (s, "Slacktime:");
if (slacktime_s) sub_slacktime = strtol (slacktime_s + 10, NULL, 0) / 10;
s = strstr (s, "Start=");
if (s) {
current->start = strtol (s + 6, &s, 0) / 10;
state = 1; continue;
}
break;
case 1: /* find "<P" */
if ((s = strstr (s, "<P"))) { s += 2; state = 2; continue; }
break;
case 2: /* find ">" */
if ((s = strchr (s, '>'))) { s++; state = 3; p = text; continue; }
break;
case 3: /* get all text until '<' appears */
if (*s == '\0') break;
else if (!strncasecmp (s, "<br>", 4)) {
*p = '\0'; p = text; trail_space (text);
if (text[0] != '\0')
current->text[current->lines++] = strdup (text);
s += 4;
}
else if (*s == '<') { state = 4; }
else if (!strncasecmp (s, "&nbsp;", 6)) { *p++ = ' '; s += 6; }
else if (*s == '\t') { *p++ = ' '; s++; }
else if (*s == '\r' || *s == '\n') { s++; }
else *p++ = *s++;
/* skip duplicated space */
if (p > text + 2) if (*(p-1) == ' ' && *(p-2) == ' ') p--;
continue;
case 4: /* get current->end or skip <TAG> */
q = strstr (s, "Start=");
if (q) {
current->end = strtol (q + 6, &q, 0) / 10 - 1;
*p = '\0'; trail_space (text);
if (text[0] != '\0')
current->text[current->lines++] = strdup (text);
if (current->lines > 0) { state = 99; break; }
state = 0; continue;
}
s = strchr (s, '>');
if (s) { s++; state = 3; continue; }
break;
}
/* read next line */
if (state != 99 && !(s = fgets (line, LINE_LEN, fd))) {
if (current->start > 0) {
break; // if it is the last subtitle
} else {
return 0;
}
}
} while (state != 99);
// For the last subtitle
if (current->end <= 0) {
current->end = current->start + sub_slacktime;
*p = '\0'; trail_space (text);
if (text[0] != '\0')
current->text[current->lines++] = strdup (text);
}
return current;
}
char *sub_readtext(char *source, char **dest) {
int len=0;
char *p=source;
// printf("src=%p dest=%p \n",source,dest);
while ( !eol(*p) && *p!= '|' ) {
p++,len++;
}
*dest= (char *)malloc (len+1);
if (!dest) {return ERR;}
strncpy(*dest, source, len);
(*dest)[len]=0;
while (*p=='\r' || *p=='\n' || *p=='|') p++;
if (*p) return p; // not-last text field
else return NULL; // last text field
}
subtitle *sub_read_line_microdvd(FILE *fd,subtitle *current) {
char line[LINE_LEN+1];
char line2[LINE_LEN+1];
char *p, *next;
int i;
do {
if (!fgets (line, LINE_LEN, fd)) return NULL;
} while ((sscanf (line,
"{%ld}{}%[^\r\n]",
&(current->start), line2) < 2) &&
(sscanf (line,
"{%ld}{%ld}%[^\r\n]",
&(current->start), &(current->end), line2) < 3));
p=line2;
next=p, i=0;
while ((next =sub_readtext (next, &(current->text[i])))) {
if (current->text[i]==ERR) {return ERR;}
i++;
if (i>=SUB_MAX_TEXT) { mp_msg(MSGT_SUBREADER,MSGL_WARN,"Too many lines in a subtitle\n");current->lines=i;return current;}
}
current->lines= ++i;
return current;
}
subtitle *sub_read_line_subrip(FILE *fd, subtitle *current) {
char line[LINE_LEN+1];
int a1,a2,a3,a4,b1,b2,b3,b4;
char *p=NULL, *q=NULL;
int len;
while (1) {
if (!fgets (line, LINE_LEN, fd)) return NULL;
if (sscanf (line, "%d:%d:%d.%d,%d:%d:%d.%d",&a1,&a2,&a3,&a4,&b1,&b2,&b3,&b4) < 8) continue;
current->start = a1*360000+a2*6000+a3*100+a4;
current->end = b1*360000+b2*6000+b3*100+b4;
if (!fgets (line, LINE_LEN, fd)) return NULL;
p=q=line;
for (current->lines=1; current->lines < SUB_MAX_TEXT; current->lines++) {
for (q=p,len=0; *p && *p!='\r' && *p!='\n' && *p!='|' && strncmp(p,"[br]",4); p++,len++);
current->text[current->lines-1]=(char *)malloc (len+1);
if (!current->text[current->lines-1]) return ERR;
strncpy (current->text[current->lines-1], q, len);
current->text[current->lines-1][len]='\0';
if (!*p || *p=='\r' || *p=='\n') break;
if (*p=='|') p++;
else while (*p++!=']');
}
break;
}
return current;
}
subtitle *sub_read_line_subviewer(FILE *fd,subtitle *current) {
char line[LINE_LEN+1];
int a1,a2,a3,a4,b1,b2,b3,b4;
char *p=NULL;
int i,len;
while (!current->text[0]) {
if (!fgets (line, LINE_LEN, fd)) return NULL;
if ((len=sscanf (line, "%d:%d:%d,%d --> %d:%d:%d,%d",&a1,&a2,&a3,&a4,&b1,&b2,&b3,&b4)) < 8)
continue;
current->start = a1*360000+a2*6000+a3*100+a4/10;
current->end = b1*360000+b2*6000+b3*100+b4/10;
for (i=0; i<SUB_MAX_TEXT;) {
if (!fgets (line, LINE_LEN, fd)) break;
len=0;
for (p=line; *p!='\n' && *p!='\r' && *p; p++,len++);
if (len) {
int j=0,skip=0;
char *curptr=current->text[i]=(char *)malloc (len+1);
if (!current->text[i]) return ERR;
//strncpy (current->text[i], line, len); current->text[i][len]='\0';
for(j; j<len; j++) {
/* let's filter html tags ::atmos */
if(line[j]=='>') {
skip=0;
continue;
}
if(line[j]=='<') {
skip=1;
continue;
}
if(skip) {
continue;
}
*curptr=line[j];
curptr++;
}
*curptr='\0';
i++;
} else {
break;
}
}
current->lines=i;
}
return current;
}
subtitle *sub_read_line_subviewer2(FILE *fd,subtitle *current) {
char line[LINE_LEN+1];
int a1,a2,a3,a4;
char *p=NULL;
int i,len;
while (!current->text[0]) {
if (!fgets (line, LINE_LEN, fd)) return NULL;
if (line[0]!='{')
continue;
if ((len=sscanf (line, "{T %d:%d:%d:%d",&a1,&a2,&a3,&a4)) < 4)
continue;
current->start = a1*360000+a2*6000+a3*100+a4/10;
for (i=0; i<SUB_MAX_TEXT;) {
if (!fgets (line, LINE_LEN, fd)) break;
if (line[0]=='}') break;
len=0;
for (p=line; *p!='\n' && *p!='\r' && *p; ++p,++len);
if (len) {
current->text[i]=(char *)malloc (len+1);
if (!current->text[i]) return ERR;
strncpy (current->text[i], line, len); current->text[i][len]='\0';
++i;
} else {
break;
}
}
current->lines=i;
}
return current;
}
subtitle *sub_read_line_vplayer(FILE *fd,subtitle *current) {
char line[LINE_LEN+1];
int a1,a2,a3;
char *p=NULL, *next,separator;
int i,len,plen;
while (!current->text[0]) {
if (!fgets (line, LINE_LEN, fd)) return NULL;
if ((len=sscanf (line, "%d:%d:%d%c%n",&a1,&a2,&a3,&separator,&plen)) < 4)
continue;
if (!(current->start = a1*360000+a2*6000+a3*100))
continue;
/* removed by wodzu
p=line;
// finds the body of the subtitle
for (i=0; i<3; i++){
p=strchr(p,':');
if (p==NULL) break;
++p;
}
if (p==NULL) {
printf("SUB: Skipping incorrect subtitle line!\n");
continue;
}
*/
// by wodzu: hey! this time we know what length it has! what is
// that magic for? it can't deal with space instead of third
// colon! look, what simple it can be:
p = &line[ plen ];
i=0;
if (*p!='|') {
//
next = p,i=0;
while ((next =sub_readtext (next, &(current->text[i])))) {
if (current->text[i]==ERR) {return ERR;}
i++;
if (i>=SUB_MAX_TEXT) { mp_msg(MSGT_SUBREADER,MSGL_WARN,"Too many lines in a subtitle\n");current->lines=i;return current;}
}
current->lines=i+1;
}
}
return current;
}
subtitle *sub_read_line_rt(FILE *fd,subtitle *current) {
//TODO: This format uses quite rich (sub/super)set of xhtml
// I couldn't check it since DTD is not included.
// WARNING: full XML parses can be required for proper parsing
char line[LINE_LEN+1];
int a1,a2,a3,a4,b1,b2,b3,b4;
char *p=NULL,*next=NULL;
int i,len,plen;
while (!current->text[0]) {
if (!fgets (line, LINE_LEN, fd)) return NULL;
//TODO: it seems that format of time is not easily determined, it may be 1:12, 1:12.0 or 0:1:12.0
//to describe the same moment in time. Maybe there are even more formats in use.
//if ((len=sscanf (line, "<Time Begin=\"%d:%d:%d.%d\" End=\"%d:%d:%d.%d\"",&a1,&a2,&a3,&a4,&b1,&b2,&b3,&b4)) < 8)
plen=a1=a2=a3=a4=b1=b2=b3=b4=0;
if (
((len=sscanf (line, "<%*[tT]ime %*[bB]egin=\"%d:%d\" %*[Ee]nd=\"%d:%d\"%*[^<]<clear/>%n",&a2,&a3,&b2,&b3,&plen)) < 4) &&
((len=sscanf (line, "<%*[tT]ime %*[bB]egin=\"%d:%d\" %*[Ee]nd=\"%d:%d.%d\"%*[^<]<clear/>%n",&a2,&a3,&b2,&b3,&b4,&plen)) < 5) &&
// ((len=sscanf (line, "<%*[tT]ime %*[bB]egin=\"%d:%d.%d\" %*[Ee]nd=\"%d:%d\"%*[^<]<clear/>%n",&a2,&a3,&a4,&b2,&b3,&plen)) < 5) &&
((len=sscanf (line, "<%*[tT]ime %*[bB]egin=\"%d:%d.%d\" %*[Ee]nd=\"%d:%d.%d\"%*[^<]<clear/>%n",&a2,&a3,&a4,&b2,&b3,&b4,&plen)) < 6) &&
((len=sscanf (line, "<%*[tT]ime %*[bB]egin=\"%d:%d:%d.%d\" %*[Ee]nd=\"%d:%d:%d.%d\"%*[^<]<clear/>%n",&a1,&a2,&a3,&a4,&b1,&b2,&b3,&b4,&plen)) < 8)
)
continue;
current->start = a1*360000+a2*6000+a3*100+a4/10;
current->end = b1*360000+b2*6000+b3*100+b4/10;
p=line; p+=plen;i=0;
// TODO: I don't know what kind of convention is here for marking multiline subs, maybe <br/> like in xml?
next = strstr(line,"<clear/>");
if(next && strlen(next)>8){
next+=8;i=0;
while ((next =sub_readtext (next, &(current->text[i])))) {
if (current->text[i]==ERR) {return ERR;}
i++;
if (i>=SUB_MAX_TEXT) { mp_msg(MSGT_SUBREADER,MSGL_WARN,"Too many lines in a subtitle\n");current->lines=i;return current;}
}
}
current->lines=i+1;
}
return current;
}
subtitle *sub_read_line_ssa(FILE *fd,subtitle *current) {
/*
* Sub Station Alpha v4 (and v2?) scripts have 9 commas before subtitle
* other Sub Station Alpha scripts have only 8 commas before subtitle
* Reading the "ScriptType:" field is not reliable since many scripts appear
* w/o it
*
* http://www.scriptclub.org is a good place to find more examples
* http://www.eswat.demon.co.uk is where the SSA specs can be found
*/
int comma;
static int max_comma = 32; /* let's use 32 for the case that the */
/* amount of commas increase with newer SSA versions */
int hour1, min1, sec1, hunsec1,
hour2, min2, sec2, hunsec2, nothing;
int num;
char line[LINE_LEN+1],
line3[LINE_LEN+1],
*line2;
char *tmp;
do {
if (!fgets (line, LINE_LEN, fd)) return NULL;
} while (sscanf (line, "Dialogue: Marked=%d,%d:%d:%d.%d,%d:%d:%d.%d,"
"%[^\n\r]", &nothing,
&hour1, &min1, &sec1, &hunsec1,
&hour2, &min2, &sec2, &hunsec2,
line3) < 9);
line2=strchr(line3, ',');
for (comma = 4; comma < max_comma; comma ++)
{
tmp = line2;
if(!(tmp=strchr(++tmp, ','))) break;
if(*(++tmp) == ' ') break;
/* a space after a comma means we're already in a sentence */
line2 = tmp;
}
if(comma < max_comma)max_comma = comma;
current->lines=0;num=0;
current->start = 360000*hour1 + 6000*min1 + 100*sec1 + hunsec1;
current->end = 360000*hour2 + 6000*min2 + 100*sec2 + hunsec2;
while (((tmp=strstr(line2, "\\n")) != NULL) || ((tmp=strstr(line2, "\\N")) != NULL) ){
current->text[num]=(char *)malloc(tmp-line2+1);
strncpy (current->text[num], line2, tmp-line2);
current->text[num][tmp-line2]='\0';
line2=tmp+2;
num++;
current->lines++;
if (current->lines >= SUB_MAX_TEXT) return current;
}
current->text[num]=strdup(line2);
current->lines++;
return current;
}
subtitle *sub_read_line_dunnowhat(FILE *fd,subtitle *current) {
char line[LINE_LEN+1];
char text[LINE_LEN+1];
if (!fgets (line, LINE_LEN, fd))
return NULL;
if (sscanf (line, "%ld,%ld,\"%[^\"]", &(current->start),
&(current->end), text) <3)
return ERR;
current->text[0] = strdup(text);
current->lines = 1;
return current;
}
subtitle *sub_read_line_mpsub(FILE *fd, subtitle *current) {
char line[LINE_LEN+1];
float a,b;
int num=0;
char *p, *q;
do
{
if (!fgets(line, LINE_LEN, fd)) return NULL;
} while (sscanf (line, "%f %f", &a, &b) !=2);
mpsub_position += a*(sub_uses_time ? 100.0 : 1.0);
current->start=(int) mpsub_position;
mpsub_position += b*(sub_uses_time ? 100.0 : 1.0);
current->end=(int) mpsub_position;
while (num < SUB_MAX_TEXT) {
if (!fgets (line, LINE_LEN, fd)) {
if (num == 0) return NULL;
else return current;
}
p=line;
while (isspace(*p)) p++;
if (eol(*p) && num > 0) return current;
if (eol(*p)) return NULL;
for (q=p; !eol(*q); q++);
*q='\0';
if (strlen(p)) {
current->text[num]=strdup(p);
// printf (">%s<\n",p);
current->lines = ++num;
} else {
if (num) return current;
else return NULL;
}
}
return NULL; // we should have returned before if it's OK
}
subtitle *previous_aqt_sub = NULL;
subtitle *sub_read_line_aqt(FILE *fd,subtitle *current) {
char line[LINE_LEN+1];
char *next;
int i;
while (1) {
// try to locate next subtitle
if (!fgets (line, LINE_LEN, fd))
return NULL;
if (!(sscanf (line, "-->> %ld", &(current->start)) <1))
break;
}
if (previous_aqt_sub != NULL)
previous_aqt_sub->end = current->start-1;
previous_aqt_sub = current;
if (!fgets (line, LINE_LEN, fd))
return NULL;
sub_readtext((char *) &line,&current->text[0]);
current->lines = 1;
current->end = current->start; // will be corrected by next subtitle
if (!fgets (line, LINE_LEN, fd))
return current;;
next = line,i=1;
while ((next =sub_readtext (next, &(current->text[i])))) {
if (current->text[i]==ERR) {return ERR;}
i++;
if (i>=SUB_MAX_TEXT) { mp_msg(MSGT_SUBREADER,MSGL_WARN,"Too many lines in a subtitle\n");current->lines=i;return current;}
}
current->lines=i+1;
if ((current->text[0]=="") && (current->text[1]=="")) {
// void subtitle -> end of previous marked and exit
previous_aqt_sub = NULL;
return NULL;
}
return current;
}
subtitle *previous_subrip09_sub = NULL;
subtitle *sub_read_line_subrip09(FILE *fd,subtitle *current) {
char line[LINE_LEN+1];
int a1,a2,a3;
char * next=NULL;
int i,len;
while (1) {
// try to locate next subtitle
if (!fgets (line, LINE_LEN, fd))
return NULL;
if (!((len=sscanf (line, "[%d:%d:%d]",&a1,&a2,&a3)) < 3))
break;
}
if (previous_subrip09_sub != NULL)
previous_subrip09_sub->end = current->start-1;
previous_subrip09_sub = current;
if (!fgets (line, LINE_LEN, fd))
return NULL;
current->start = a1*360000+a2*6000+a3*100;
next = line,i=0;
current->text[0]=""; // just to be sure that string is clear
while ((next =sub_readtext (next, &(current->text[i])))) {
if (current->text[i]==ERR) {return ERR;}
i++;
if (i>=SUB_MAX_TEXT) { mp_msg(MSGT_SUBREADER,MSGL_WARN,"Too many lines in a subtitle\n");current->lines=i;return current;}
}
current->lines=i+1;
if ((current->text[0]=="") && (i==0)) {
// void subtitle -> end of previous marked and exit
previous_subrip09_sub = NULL;
return NULL;
}
return current;
}
subtitle *sub_read_line_jacosub(FILE * fd, subtitle * current)
{
char commands[LINE_LEN], line1[LINE_LEN], line2[LINE_LEN],
text[LINE_LEN], *p, *q;
int a1, a2, a3, a4, b1, b2, b3, b4, comment = 0;
unsigned short directive = 0;
bzero(current, sizeof(subtitle));
bzero(commands, sizeof(char) * LINE_LEN);
bzero(line1, sizeof(char) * LINE_LEN);
bzero(line2, sizeof(char) * LINE_LEN);
while (!current->text[0]) {
if (!fgets(line1, LINE_LEN, fd))
return NULL;
if (sscanf
(line1, "%d:%d:%d.%d %d:%d:%d.%d %[^\n\r]", &a1, &a2, &a3, &a4,
&b1, &b2, &b3, &b4, line2) < 9)
continue;
//a Jacosub script *may* have some commands before the text, so let's recognize them
switch (toupper(line2[0])) {
case 'C':
switch (toupper(line2[1])) {
case 'F':
case 'P':
case 'S':
case 'B':
if (isdigit(line2[2]))
directive = 1;
}
break;
case 'D':
if ((line2[1] == ' ') || isdigit(line2[1]))
directive = 1;
// special case
if (toupper(line2[1]) == 'C')
directive = 1;
break;
case 'E':
switch (toupper(line2[1])) {
case 'D':
case 'P':
if (isdigit(line2[2]))
directive = 1;
break;
case 'I':
if ((toupper(line2[2]) == 'O') && (isdigit(line2[3])))
directive = 1;
break;
case 'R':
if ((toupper(line2[2]) == 'D') && (isdigit(line2[3])))
directive = 1;
break;
case 'W':
if ((toupper(line2[2]) == 'L')
|| (toupper(line2[2]) == 'R'))
directive = 1;
break;
}
break;
case 'F':
if (isdigit(line2[1]))
directive = 1;
else
switch (toupper(line2[1])) {
case 'O':
if (isdigit(line2[2]))
directive = 1;
break;
case 'S':
directive = 1;
}
break;
case 'G':
if ((line2[1] == 'G') && (isdigit(line2[2])))
directive = 2;
break;
case 'H':
switch (toupper(line2[1])) {
case 'L':
case 'R':
if (isdigit(line2[2]))
directive = 1;
}
break;
case 'J':
switch (toupper(line2[1])) {
case 'B':
if (toupper(line2[2]) == 'C')
directive = 1;
break;
case 'C':
case 'L':
case 'R':
directive = 1;
break;
case 'F':
if ((line2[2] == ':') && (toupper(line2[3]) == 'L'))
directive = 1;
}
break;
case 'R':
if (((toupper(line2[1]) == 'D') && (toupper(line2[2]) == 'B'))
|| ((toupper(line2[1]) == 'D')
&& (toupper(line2[2]) == 'C'))
|| ((toupper(line2[1]) == 'L')
&& (toupper(line2[2]) == 'B')))
directive = 2;
break;
case 'V':
switch (toupper(line2[1])) {
case 'A':
case 'B':
case 'M':
case 'T':
case 'U':
directive = 1;
break;
case 'L':
case 'P':
if (isdigit(line2[2]))
directive = 1;
}
break;
case '[':
directive = 1;
break;
case '~':
directive = 2;
}
if (directive == 1) {
strcpy(line1, line2);
sscanf(line1, "%s %[^\n\r]", commands, line2);
} else if (directive == 2) {
continue;
}
current->start = a1 * 360000 + a2 * 6000 + a3 * 100 + a4;
current->end = b1 * 360000 + b2 * 6000 + b3 * 100 + b4;
current->lines = 0;
q = text;
p = line2;
while ((*p) == ' ')
p++;
for (; (!eol(*p)) && (current->lines < SUB_MAX_TEXT); p++) {
switch (*p) {
case '{':
comment++;
break;
case '}':
comment--;
//the next to get rid of a blank after the comment
if ((*(p + 1)) == ' ')
p++;
break;
case '\\':
if (*(p + 1) == 'n') {
*q = '\0';
q = text;
current->text[current->lines++] = strdup(text);
p++;
break;
} else if (toupper(*(p + 1)) == 'C') {
p++;
p++;
break;
} else if ((toupper(*(p + 1)) == 'I')
|| (toupper(*(p + 1)) == 'B')
|| (toupper(*(p + 1)) == 'N')) {
p++;
break;
} else if (eol(*(p + 1))) {
if (!fgets(line1, LINE_LEN, fd))
return NULL;
trail_space(line1);
p = line1;
}
default:
if (!comment) {
*q = *p;
q++;
}
}
}
*q = '\0';
current->text[current->lines] = strdup(text);
}
current->lines++;
return current;
}
int sub_autodetect (FILE *fd) {
char line[LINE_LEN+1];
int i,j=0;
char p;
while (j < 100) {
j++;
if (!fgets (line, LINE_LEN, fd))
return SUB_INVALID;
if (sscanf (line, "{%d}{%d}", &i, &i)==2)
{sub_uses_time=0;return SUB_MICRODVD;}
if (sscanf (line, "{%d}{}", &i)==1)
{sub_uses_time=0;return SUB_MICRODVD;}
if (sscanf (line, "%d:%d:%d.%d,%d:%d:%d.%d", &i, &i, &i, &i, &i, &i, &i, &i)==8)
{sub_uses_time=1;return SUB_SUBRIP;}
if (sscanf (line, "%d:%d:%d,%d --> %d:%d:%d,%d", &i, &i, &i, &i, &i, &i, &i, &i)==8)
{sub_uses_time=1;return SUB_SUBVIEWER;}
if (sscanf (line, "{T %d:%d:%d:%d",&i, &i, &i, &i))
{sub_uses_time=1;return SUB_SUBVIEWER2;}
if (strstr (line, "<SAMI>"))
{sub_uses_time=1; return SUB_SAMI;}
if (sscanf(line, "%d:%d:%d.%d %d:%d:%d.%d", &i, &i, &i, &i, &i, &i, &i, &i) == 8)
{sub_uses_time = 1; return SUB_JACOSUB;}
if (sscanf (line, "%d:%d:%d:", &i, &i, &i )==3)
{sub_uses_time=1;return SUB_VPLAYER;}
if (sscanf (line, "%d:%d:%d ", &i, &i, &i )==3)
{sub_uses_time=1;return SUB_VPLAYER;}
//TODO: just checking if first line of sub starts with "<" is WAY
// too weak test for RT
// Please someone who knows the format of RT... FIX IT!!!
// It may conflict with other sub formats in the future (actually it doesn't)
if ( *line == '<' )
{sub_uses_time=1;return SUB_RT;}
if (!memcmp(line, "Dialogue: Marked", 16))
{sub_uses_time=1; return SUB_SSA;}
if (sscanf (line, "%d,%d,\"%c", &i, &i, (char *) &i) == 3)
{sub_uses_time=0;return SUB_DUNNOWHAT;}
if (sscanf (line, "FORMAT=%d", &i) == 1)
{sub_uses_time=0; return SUB_MPSUB;}
if (sscanf (line, "FORMAT=TIM%c", &p)==1 && p=='E')
{sub_uses_time=1; return SUB_MPSUB;}
if (strstr (line, "-->>"))
{sub_uses_time=0; return SUB_AQTITLE;}
if (sscanf (line, "[%d:%d:%d]", &i, &i, &i)==3)
{sub_uses_time=1;return SUB_SUBRIP09;}
}
return SUB_INVALID; // too many bad lines
}
#ifdef DUMPSUBS
int sub_utf8=0;
#else
extern int sub_utf8;
#endif
extern float sub_delay;
extern float sub_fps;
#ifdef USE_ICONV
static iconv_t icdsc;
void subcp_open (void)
{
char *tocp = "UTF-8";
icdsc = (iconv_t)(-1);
if (sub_cp){
if ((icdsc = iconv_open (tocp, sub_cp)) != (iconv_t)(-1)){
mp_msg(MSGT_SUBREADER,MSGL_V,"SUB: opened iconv descriptor.\n");
sub_utf8 = 2;
} else
mp_msg(MSGT_SUBREADER,MSGL_ERR,"SUB: error opening iconv descriptor.\n");
}
}
void subcp_close (void)
{
if (icdsc != (iconv_t)(-1)){
(void) iconv_close (icdsc);
mp_msg(MSGT_SUBREADER,MSGL_V,"SUB: closed iconv descriptor.\n");
}
}
#define ICBUFFSIZE 512
static char icbuffer[ICBUFFSIZE];
subtitle* subcp_recode (subtitle *sub)
{
int l=sub->lines;
size_t ileft, oleft;
char *op, *ip, *ot;
while (l){
op = icbuffer;
ip = sub->text[--l];
ileft = strlen(ip);
oleft = ICBUFFSIZE - 1;
if (iconv(icdsc, &ip, &ileft,
&op, &oleft) == (size_t)(-1)) {
mp_msg(MSGT_SUBREADER,MSGL_WARN,"SUB: error recoding line.\n");
l++;
break;
}
if (!(ot = (char *)malloc(op - icbuffer + 1))){
mp_msg(MSGT_SUBREADER,MSGL_WARN,"SUB: error allocating mem.\n");
l++;
break;
}
*op='\0' ;
strcpy (ot, icbuffer);
free (sub->text[l]);
sub->text[l] = ot;
}
if (l){
for (l = sub->lines; l;)
free (sub->text[--l]);
return ERR;
}
return sub;
}
#endif
static void adjust_subs_time(subtitle* sub, float subtime, float fps, int block){
int n,m;
subtitle* nextsub;
int i = sub_num;
unsigned long subfms = (sub_uses_time ? 100 : fps) * subtime;
n=m=0;
if (i) for (;;){
if (!block)
if (sub->end <= sub->start){
sub->end = sub->start + subfms;
m++;
n++;
}
if (!--i) break;
nextsub = sub + 1;
if (block){
if (sub->end >= nextsub->start){
sub->end = nextsub->start - 1;
if (sub->end - sub->start > subfms)
sub->end = sub->start + subfms;
if (!m)
n++;
}
/* Theory:
* Movies are often converted from FILM (24 fps)
* to PAL (25) by simply speeding it up, so we
* to multiply the original timestmaps by
* (Movie's FPS / Subtitle's (guessed) FPS)
* so eg. for 23.98 fps movie and PAL time based
* subtitles we say -subfps 25 and we're fine!
*/
/* timed sub fps correction ::atmos */
if(sub_uses_time && sub_fps) {
sub->start *= sub_fps/fps;
sub->end *= sub_fps/fps;
}
}
sub = nextsub;
m = 0;
}
if (n) mp_msg(MSGT_SUBREADER,MSGL_INFO,"SUB: Adjusted %d subtitle(s).\n", n);
}
subtitle* sub_read_file (char *filename, float fps) {
FILE *fd;
int n_max, n_first, i, j, sub_first, sub_orig;
subtitle *first, *second;
char *fmtname[] = { "microdvd", "subrip", "subviewer", "sami", "vplayer",
"rt", "ssa", "dunnowhat", "mpsub", "aqt", "subviewer 2.0", "subrip 0.9", "jacosub" };
subtitle * (*func[])(FILE *fd,subtitle *dest)=
{
sub_read_line_microdvd,
sub_read_line_subrip,
sub_read_line_subviewer,
sub_read_line_sami,
sub_read_line_vplayer,
sub_read_line_rt,
sub_read_line_ssa,
sub_read_line_dunnowhat,
sub_read_line_mpsub,
sub_read_line_aqt,
sub_read_line_subviewer2,
sub_read_line_subrip09,
sub_read_line_jacosub
};
if(filename==NULL) return NULL; //qnx segfault
fd=fopen (filename, "r"); if (!fd) return NULL;
sub_format=sub_autodetect (fd);
if (sub_format==SUB_INVALID) {mp_msg(MSGT_SUBREADER,MSGL_WARN,"SUB: Could not determine file format\n");return NULL;}
mp_msg(MSGT_SUBREADER,MSGL_INFO,"SUB: Detected subtitle file format: %s\n", fmtname[sub_format]);
rewind (fd);
#ifdef USE_ICONV
subcp_open();
#endif
sub_num=0;n_max=32;
first=(subtitle *)malloc(n_max*sizeof(subtitle));
if(!first) return NULL;
while(1){
subtitle *sub;
if(sub_num>=n_max){
n_max+=16;
first=realloc(first,n_max*sizeof(subtitle));
}
sub = &first[sub_num];
memset(sub, '\0', sizeof(subtitle));
sub=func[sub_format](fd,sub);
if(!sub) break; // EOF
#ifdef USE_ICONV
if ((sub!=ERR) && (sub_utf8 & 2)) sub=subcp_recode(sub);
#endif
if(sub==ERR) ++sub_errs; else ++sub_num; // Error vs. Valid
}
fclose(fd);
#ifdef USE_ICONV
subcp_close();
#endif
// printf ("SUB: Subtitle format %s time.\n", sub_uses_time?"uses":"doesn't use");
mp_msg(MSGT_SUBREADER,MSGL_INFO,"SUB: Read %i subtitles", sub_num);
if (sub_errs) mp_msg(MSGT_SUBREADER,MSGL_INFO,", %i bad line(s).\n", sub_errs);
else mp_msg(MSGT_SUBREADER,MSGL_INFO,".\n");
if(sub_num<=0){
free(first);
return NULL;
}
adjust_subs_time(first, 6.0, fps, 0); /* ~6 secs AST */
// here we manage overlapping subtitles
sub_orig = sub_num;
n_first = sub_num;
n_max = 0;
sub_num = 0;
second = NULL;
// for each subtitle in first[]
for (sub_first = 0; sub_first < n_first; ++sub_first) {
while (first[sub_first].start <= first[sub_first].end) {
unsigned long end_time = first[sub_first].end;
int lines_to_add = 0, sub_to_add, event, ls, lf;
// there is a new subtitle, so let's make second[] bigger
n_max += 1;
second = realloc(second, n_max * sizeof(subtitle));
memset(&second[n_max - 1], '\0', sizeof(subtitle));
// find the number of lines and subtitles that overlap the current subtitle
for (sub_to_add = 0;
(end_time > first[sub_first + sub_to_add + 1].start)
&& (sub_first + sub_to_add + 1 < n_first); ++sub_to_add) {
lines_to_add += first[sub_first + sub_to_add + 1].lines;
}
if ((lines_to_add > 0)
&& (first[sub_first].lines + lines_to_add <
SUB_MAX_TEXT)) {
unsigned long next;
// find next beginning-of-a-subtitle time
next = first[sub_first].end + 1;
event = sub_first;
for (j = 0; j < lines_to_add; j++) {
if ((first[sub_first + j + 1].end + 1 < next)
&& (first[sub_first + j + 1].end >=
first[sub_first].start)) {
event = sub_first + j + 1;
next = first[event].end + 1;
}
if ((first[sub_first + j + 1].start < next)
&& (first[sub_first + j + 1].start >
first[sub_first].start)) {
event = sub_first + j + 1;
next = first[event].start;
}
}
second[sub_num].start = first[sub_first].start;
second[sub_num].end = next - 1;
second[sub_num].lines = first[sub_first].lines;
for (ls = 0, lf = 0; ls < second[sub_num].lines; ls++, lf++) {
second[sub_num].text[ls] =
strdup(first[sub_first].text[lf]);
}
for (j = 0; j < sub_to_add; j++) {
if ((first[sub_first + j + 1].start <=
second[sub_num].start)
&& (first[sub_first + j + 1].end >=
second[sub_num].end)
&& (second[sub_num].lines +
first[sub_first + j + 1].lines <=
SUB_MAX_TEXT)) {
for (lf = 0; lf < first[sub_first + j + 1].lines;
lf++, ls++)
second[sub_num].text[ls] =
strdup(first[sub_first + j + 1].text[lf]);
first[sub_first + j + 1].start = next;
} else
for (lf = 0; lf < first[sub_first + j + 1].lines;
lf++, ls++)
second[sub_num].text[ls] = strdup(" ");
second[sub_num].lines +=
first[sub_first + j + 1].lines;
}
first[sub_first].start = next;
} else {
second[sub_num].start = first[sub_first].start;
second[sub_num].end = first[sub_first].end;
second[sub_num].lines = first[sub_first].lines;
for (ls = 0; ls < second[sub_num].lines; ls++)
second[sub_num].text[ls] =
strdup(first[sub_first].text[ls]);
first[sub_first].start = first[sub_first].end + 1;
}
++sub_num;
} // while
}
adjust_subs_time(second, 6.0, fps, 1); /* ~6 secs AST */
for (j = sub_orig - 1; j <= 0; --j) {
for (i = first[j].lines - 1; i <= 0; --i) {
free(first[j].text[i]);
}
free(&first[j]);
}
return second;
}
#if 0
char * strreplace( char * in,char * what,char * whereof )
{
int i;
char * tmp;
if ( ( in == NULL )||( what == NULL )||( whereof == NULL )||( ( tmp=strstr( in,what ) ) == NULL ) ) return NULL;
for( i=0;i<strlen( whereof );i++ ) tmp[i]=whereof[i];
if ( strlen( what ) > strlen( whereof ) ) tmp[i]=0;
return in;
}
#endif
char * sub_filename(char* path, char * fname )
{
char * sub_name1;
char * sub_name2;
char * aviptr1, * aviptr2, * tmp;
int i,j;
FILE * f;
int pos=0;
char * sub_exts[] =
{ ".utf",
".UTF",
".sub",
".SUB",
".srt",
".SRT",
".smi",
".SMI",
".rt",
".RT",
".txt",
".TXT",
".ssa",
".SSA",
".aqt",
".AQT",
".js",
".JS" };
if ( fname == NULL ) return NULL;
sub_name1=strrchr(fname,'.');
if (!sub_name1) return NULL;
pos=sub_name1-fname;
sub_name1=malloc(strlen(fname)+8);
strcpy(sub_name1,fname);
sub_name2=malloc (strlen(path) + strlen(fname) + 8);
if ((tmp=strrchr(fname,'/')))
sprintf (sub_name2, "%s%s", path, tmp+1);
else
sprintf (sub_name2, "%s%s", path, fname);
aviptr1=strrchr(sub_name1,'.');
aviptr2=strrchr(sub_name2,'.');
for(j=0;j<=1;j++){
char* sub_name=j?sub_name1:sub_name2;
#ifdef USE_ICONV
for ( i=(sub_cp?2:0);i<(sizeof(sub_exts)/sizeof(char*));i++ ) {
#else
for ( i=0;i<(sizeof(sub_exts)/sizeof(char*));i++ ) {
#endif
strcpy(j?aviptr1:aviptr2,sub_exts[i]);
// printf("trying: '%s'\n",sub_name);
if((f=fopen( sub_name,"rt" ))) {
fclose( f );
mp_msg(MSGT_SUBREADER,MSGL_INFO,"SUB: Detected sub file: %s\n",sub_name );
if (i<2) sub_utf8=1;
return sub_name;
}
}
}
free(sub_name2);
free(sub_name1);
return NULL;
}
void list_sub_file(subtitle* subs){
int i,j;
for(j=0;j<sub_num;j++){
subtitle* egysub=&subs[j];
printf ("%i line%c (%li-%li) ",
egysub->lines,
(1==egysub->lines)?' ':'s',
egysub->start,
egysub->end);
for (i=0; i<egysub->lines; i++) {
printf ("%s%s",egysub->text[i], i==egysub->lines-1?"":" <BREAK> ");
}
printf ("\n");
}
printf ("Subtitle format %s time.\n", sub_uses_time?"uses":"doesn't use");
printf ("Read %i subtitles, %i errors.\n", sub_num, sub_errs);
}
void dump_srt(subtitle* subs, float fps){
int i,j;
int h,m,s,ms;
FILE * fd;
subtitle * onesub;
unsigned long temp;
if (!sub_uses_time && sub_fps == 0)
sub_fps = fps;
fd=fopen("dumpsub.srt","w");
if(!fd)
{
perror("dump_srt: fopen");
return;
}
for(i=0;i<sub_num;i++)
{
onesub=subs+i; //=&subs[i];
fprintf(fd,"%d\n",i+1);//line number
temp=onesub->start;
if (!sub_uses_time)
temp = temp * 100 / sub_fps;
temp -= sub_delay * 100;
h=temp/360000;temp%=360000; //h =1*100*60*60
m=temp/6000; temp%=6000; //m =1*100*60
s=temp/100; temp%=100; //s =1*100
ms=temp*10; //ms=1*10
fprintf(fd,"%02d:%02d:%02d,%03d --> ",h,m,s,ms);
temp=onesub->end;
if (!sub_uses_time)
temp = temp * 100 / sub_fps;
temp -= sub_delay * 100;
h=temp/360000;temp%=360000;
m=temp/6000; temp%=6000;
s=temp/100; temp%=100;
ms=temp*10;
fprintf(fd,"%02d:%02d:%02d,%03d\n",h,m,s,ms);
for(j=0;j<onesub->lines;j++)
fprintf(fd,"%s\n",onesub->text[j]);
fprintf(fd,"\n");
}
fclose(fd);
mp_msg(MSGT_SUBREADER,MSGL_INFO,"SUB: Subtitles dumped in \'dumpsub.srt\'.\n");
}
void dump_mpsub(subtitle* subs, float fps){
int i,j;
FILE *fd;
float a,b;
mpsub_position=sub_uses_time?(sub_delay*100):(sub_delay*fps);
if (sub_fps==0) sub_fps=fps;
fd=fopen ("dump.mpsub", "w");
if (!fd) {
perror ("dump_mpsub: fopen");
return;
}
if (sub_uses_time) fprintf (fd,"FORMAT=TIME\n\n");
else fprintf (fd, "FORMAT=%5.2f\n\n", fps);
for(j=0;j<sub_num;j++){
subtitle* egysub=&subs[j];
if (sub_uses_time) {
a=((egysub->start-mpsub_position)/100.0);
b=((egysub->end-egysub->start)/100.0);
if ( (float)((int)a) == a)
fprintf (fd, "%.0f",a);
else
fprintf (fd, "%.2f",a);
if ( (float)((int)b) == b)
fprintf (fd, " %.0f\n",b);
else
fprintf (fd, " %.2f\n",b);
} else {
fprintf (fd, "%ld %ld\n", (long)((egysub->start*(fps/sub_fps))-((mpsub_position*(fps/sub_fps)))),
(long)(((egysub->end)-(egysub->start))*(fps/sub_fps)));
}
mpsub_position = egysub->end;
for (i=0; i<egysub->lines; i++) {
fprintf (fd, "%s\n",egysub->text[i]);
}
fprintf (fd, "\n");
}
fclose (fd);
mp_msg(MSGT_SUBREADER,MSGL_INFO,"SUB: Subtitles dumped in \'dump.mpsub\'.\n");
}
void dump_microdvd(subtitle* subs, float fps) {
int i, delay;
FILE *fd;
if (sub_fps == 0)
sub_fps = fps;
fd = fopen("dumpsub.txt", "w");
if (!fd) {
perror("dumpsub.txt: fopen");
return;
}
delay = sub_delay * sub_fps;
for (i = 0; i < sub_num; ++i) {
int j, start, end;
start = subs[i].start;
end = subs[i].end;
if (sub_uses_time) {
start = start * sub_fps / 100 ;
end = end * sub_fps / 100;
}
else {
start = start * sub_fps / fps;
end = end * sub_fps / fps;
}
start -= delay;
end -= delay;
fprintf(fd, "{%d}{%d}", start, end);
for (j = 0; j < subs[i].lines; ++j)
fprintf(fd, "%s%s", j ? "|" : "", subs[i].text[j]);
fprintf(fd, "\n");
}
fclose(fd);
mp_msg(MSGT_SUBREADER,MSGL_INFO,"SUB: Subtitles dumped in \'dumpsub.txt\'.\n");
}
void sub_free( subtitle * subs )
{
int i;
if ( !subs ) return;
sub_num=0;
sub_errs=0;
for ( i=0;i<subs->lines;i++ ) free( subs->text[i] );
free( subs );
subs=NULL;
}
#ifdef DUMPSUBS
int main(int argc, char **argv) { // for testing
int i,j;
subtitle *subs;
subtitle *egysub;
if(argc<2){
printf("\nUsage: subreader filename.sub\n\n");
exit(1);
}
sub_cp = argv[2];
subs=sub_read_file(argv[1]);
if(!subs){
printf("Couldn't load file.\n");
exit(1);
}
list_sub_file(subs);
return 0;
}
#endif