mirror of
https://code.videolan.org/videolan/vlc
synced 2024-09-04 09:11:33 +02:00
eb4b233b71
Signed-off-by: Jean-Baptiste Kempf <jb@videolan.org>
737 lines
35 KiB
C
737 lines
35 KiB
C
/*****************************************************************************
|
|
* algo_ivtc.h : IVTC (inverse telecine) algorithm for the VLC deinterlacer
|
|
*****************************************************************************
|
|
* Copyright (C) 2010-2011 the VideoLAN team
|
|
* $Id$
|
|
*
|
|
* Author: Juha Jeronen <juha.jeronen@jyu.fi>
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program; if not, write to the Free Software
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
|
|
*****************************************************************************/
|
|
|
|
#ifndef VLC_DEINTERLACE_ALGO_IVTC_H
|
|
#define VLC_DEINTERLACE_ALGO_IVTC_H 1
|
|
|
|
/* Forward declarations */
|
|
struct filter_t;
|
|
struct picture_t;
|
|
|
|
/*****************************************************************************
|
|
* Data structures
|
|
*****************************************************************************/
|
|
|
|
#define IVTC_NUM_FIELD_PAIRS 7
|
|
#define IVTC_DETECTION_HISTORY_SIZE 3
|
|
#define IVTC_LATEST (IVTC_DETECTION_HISTORY_SIZE-1)
|
|
/**
|
|
* Algorithm-specific state for IVTC.
|
|
* @see RenderIVTC()
|
|
*/
|
|
typedef struct
|
|
{
|
|
int i_mode; /**< Detecting, hard TC, or soft TC. @see ivtc_mode */
|
|
int i_old_mode; /**< @see IVTCSoftTelecineDetect() */
|
|
|
|
int i_cadence_pos; /**< Cadence counter, 0..4. Runs when locked on. */
|
|
int i_tfd; /**< TFF or BFF telecine. Detected from the video. */
|
|
|
|
/** Raw low-level detector output.
|
|
*
|
|
* @see IVTCLowLevelDetect()
|
|
*/
|
|
int pi_scores[IVTC_NUM_FIELD_PAIRS]; /**< Interlace scores. */
|
|
int pi_motion[IVTC_DETECTION_HISTORY_SIZE]; /**< 8x8 blocks with motion. */
|
|
int pi_top_rep[IVTC_DETECTION_HISTORY_SIZE]; /**< Hard top field repeat. */
|
|
int pi_bot_rep[IVTC_DETECTION_HISTORY_SIZE]; /**< Hard bot field repeat. */
|
|
|
|
/** Interlace scores of outgoing frames, used for judging IVTC output
|
|
* (detecting cadence breaks).
|
|
*
|
|
* @see IVTCOutputOrDropFrame()
|
|
*/
|
|
int pi_final_scores[IVTC_DETECTION_HISTORY_SIZE];
|
|
|
|
/** Cadence position detection history (in ivtc_cadence_pos format).
|
|
* Contains the detected cadence position and a corresponding
|
|
* reliability flag for each algorithm.
|
|
*
|
|
* s = scores, interlace scores based algorithm, original to this filter.
|
|
* v = vektor, hard field repeat based algorithm, inspired by
|
|
* the TVTime/Xine IVTC filter by Billy Biggs (Vektor).
|
|
*
|
|
* Each algorithm may also keep internal, opaque data.
|
|
*
|
|
* @see ivtc_cadence_pos
|
|
* @see IVTCCadenceDetectAlgoScores()
|
|
* @see IVTCCadenceDetectAlgoVektor()
|
|
*/
|
|
int pi_s_cadence_pos[IVTC_DETECTION_HISTORY_SIZE];
|
|
bool pb_s_reliable[IVTC_DETECTION_HISTORY_SIZE];
|
|
int pi_v_raw[IVTC_DETECTION_HISTORY_SIZE]; /**< "vektor" algo internal */
|
|
int pi_v_cadence_pos[IVTC_DETECTION_HISTORY_SIZE];
|
|
bool pb_v_reliable[IVTC_DETECTION_HISTORY_SIZE];
|
|
|
|
/** Final result, chosen by IVTCCadenceDetectFinalize() from the results
|
|
* given by the different detection algorithms.
|
|
*
|
|
* @see IVTCCadenceDetectFinalize()
|
|
*/
|
|
int pi_cadence_pos_history[IVTC_DETECTION_HISTORY_SIZE];
|
|
|
|
/**
|
|
* Set by cadence analyzer. Whether the sequence of last
|
|
* IVTC_DETECTION_HISTORY_SIZE detected positions, stored in
|
|
* pi_cadence_pos_history, looks like a valid telecine.
|
|
*
|
|
* @see IVTCCadenceAnalyze()
|
|
*/
|
|
bool b_sequence_valid;
|
|
|
|
/**
|
|
* Set by cadence analyzer. True if detected position = "dea".
|
|
* The three entries of this are used for detecting three progressive
|
|
* stencil positions in a row, i.e. five progressive frames in a row;
|
|
* this triggers exit from hard IVTC.
|
|
*
|
|
* @see IVTCCadenceAnalyze()
|
|
*/
|
|
bool pb_all_progressives[IVTC_DETECTION_HISTORY_SIZE];
|
|
} ivtc_sys_t;
|
|
|
|
/*****************************************************************************
|
|
* Functions
|
|
*****************************************************************************/
|
|
|
|
/**
|
|
* Deinterlace filter. Performs inverse telecine.
|
|
*
|
|
* Also known as "film mode" or "3:2 reverse pulldown" in some equipment.
|
|
*
|
|
* This filter attempts to reconstruct the original film frames from an
|
|
* NTSC telecined signal. It is intended for 24fps progressive material
|
|
* that was telecined to NTSC 60i. For example, most NTSC anime DVDs
|
|
* are like this.
|
|
*
|
|
* There is no input frame parameter, because the input frames
|
|
* are taken from the history buffer.
|
|
*
|
|
* This algorithm does CUSTOM_PTS timestamp mangling.
|
|
*
|
|
* See the file comment for a detailed description of the algorithm.
|
|
*
|
|
* @param p_filter The filter instance. Must be non-NULL.
|
|
* @param[out] p_dst Output frame. Must be allocated by caller.
|
|
* @return VLC error code (int).
|
|
* @retval VLC_SUCCESS A film frame was reconstructed to p_dst.
|
|
* @retval VLC_EGENERIC Frame dropped as part of normal IVTC operation.
|
|
* @see Deinterlace()
|
|
* @see ComposeFrame()
|
|
* @see CalculateInterlaceScore()
|
|
* @see EstimateNumBlocksWithMotion()
|
|
*/
|
|
int RenderIVTC( filter_t *p_filter, picture_t *p_dst );
|
|
|
|
/**
|
|
* Clears the inverse telecine subsystem state.
|
|
*
|
|
* Used during initialization and uninitialization
|
|
* (called from Open() and Flush()).
|
|
*
|
|
* @param p_filter The filter instance.
|
|
* @see RenderIVTC()
|
|
* @see Open()
|
|
* @see Flush()
|
|
*/
|
|
void IVTCClearState( filter_t *p_filter );
|
|
|
|
/*****************************************************************************
|
|
* Extra documentation
|
|
*****************************************************************************/
|
|
|
|
/**
|
|
* \file
|
|
* IVTC (inverse telecine) algorithm for the VLC deinterlacer.
|
|
* Also known as "film mode" or "3:2 reverse pulldown" in some equipment.
|
|
*
|
|
* Summary:
|
|
*
|
|
* This is a "live IVTC" filter, which attempts to do in realtime what
|
|
* Transcode's ivtc->decimate->32detect chain does offline. Additionally,
|
|
* it removes soft telecine. It is an original design, based on some ideas
|
|
* from Transcode, some from TVTime/Xine, and some original.
|
|
*
|
|
* If the input material is pure NTSC telecined film, inverse telecine
|
|
* will (ideally) exactly recover the original progressive film frames.
|
|
* The output will run at 4/5 of the original framerate with no loss of
|
|
* information. Interlacing artifacts are removed, and motion becomes
|
|
* as smooth as it was on the original film. For soft-telecined material,
|
|
* on the other hand, the progressive frames alredy exist, so only the
|
|
* timings are changed such that the output becomes smooth 24fps (or would,
|
|
* if the output device had an infinite framerate).
|
|
*
|
|
* Put in simple terms, this filter is targeted for NTSC movies and
|
|
* especially anime. Virtually all 1990s and early 2000s anime is
|
|
* hard-telecined. Because the source material is like that,
|
|
* IVTC is needed for also virtually all official R1 (US) anime DVDs.
|
|
*
|
|
* Note that some anime from the turn of the century (e.g. Silent Mobius
|
|
* and Sol Bianca) is a hybrid of telecined film and true interlaced
|
|
* computer-generated effects and camera pans. In this case, applying IVTC
|
|
* will effectively attempt to reconstruct the frames based on the film
|
|
* component, but even if this is successful, the framerate reduction will
|
|
* cause the computer-generated effects to stutter. This is mathematically
|
|
* unavoidable. Instead of IVTC, a framerate doubling deinterlacer is
|
|
* recommended for such material. Try "Phosphor", "Bob", or "Linear".
|
|
*
|
|
* Fortunately, 30fps true progressive anime is on the rise (e.g. ARIA,
|
|
* Black Lagoon, Galaxy Angel, Ghost in the Shell: Solid State Society,
|
|
* Mai Otome, Last Exile, and Rocket Girls). This type requires no
|
|
* deinterlacer at all.
|
|
*
|
|
* Another recent trend is using 24fps computer-generated effects and
|
|
* telecining them along with the cels (e.g. Kiddy Grade, Str.A.In. and
|
|
* The Third: The Girl with the Blue Eye). For this group, IVTC is the
|
|
* correct way to deinterlace, and works properly.
|
|
*
|
|
* Soft telecined anime, while rare, also exists. Stellvia of the Universe
|
|
* and Angel Links are examples of this. Stellvia constantly alternates
|
|
* between soft and hard telecine - pure CGI sequences are soft-telecined,
|
|
* while sequences incorporating cel animation are hard-telecined.
|
|
* This makes it very hard for the cadence detector to lock on,
|
|
* and indeed Stellvia gives some trouble for the filter.
|
|
*
|
|
* To finish the list of different material types, Azumanga Daioh deserves
|
|
* a special mention. The OP and ED sequences are both 30fps progressive,
|
|
* while the episodes themselves are hard-telecined. This filter should
|
|
* mostly work correctly with such material, too. (The beginning of the OP
|
|
* shows some artifacts, but otherwise both the OP and ED are indeed
|
|
* rendered progressive. The technical reason is that the filter has been
|
|
* designed to aggressively reconstruct film frames, which helps in many
|
|
* cases with hard-telecined material. In very rare cases, this approach may
|
|
* go wrong, regardless of whether the input is telecined or progressive.)
|
|
*
|
|
* Finally, note also that IVTC is the only correct way to deinterlace NTSC
|
|
* telecined material. Simply applying an interpolating deinterlacing filter
|
|
* (with no framerate doubling) is harmful for two reasons. First, even if
|
|
* the filter does not damage already progressive frames, it will lose half
|
|
* of the available vertical resolution of those frames that are judged
|
|
* interlaced. Some algorithms combining data from multiple frames may be
|
|
* able to counter this to an extent, effectively performing something akin
|
|
* to the frame reconstruction part of IVTC. A more serious problem is that
|
|
* any motion will stutter, because (even in the ideal case) one out of
|
|
* every four film frames will be shown twice, while the other three will
|
|
* be shown only once. Duplicate removal and framerate reduction - which are
|
|
* part of IVTC - are also needed to properly play back telecined material
|
|
* on progressive displays at a non-doubled framerate.
|
|
*
|
|
* So, try this filter on your NTSC anime DVDs. It just might help.
|
|
*
|
|
*
|
|
* Technical details:
|
|
*
|
|
*
|
|
* First, NTSC hard telecine in a nutshell:
|
|
*
|
|
* Film is commonly captured at 24 fps. The framerate must be raised from
|
|
* 24 fps to 59.94 fields per second, This starts by pretending that the
|
|
* original framerate is 23.976 fps. When authoring, the audio can be
|
|
* slowed down by 0.1% to match. Now 59.94 = 5/4 * (2*23.976), which gives
|
|
* a nice ratio made out of small integers.
|
|
*
|
|
* Thus, each group of four film frames must become five frames in the NTSC
|
|
* video stream. One cannot simply repeat one frame of every four, because
|
|
* this would result in jerky motion. To slightly soften the jerkiness,
|
|
* the extra frame is split into two extra fields, inserted at different
|
|
* times. The content of the extra fields is (in classical telecine)
|
|
* duplicated as-is from existing fields.
|
|
*
|
|
* The field duplication technique is called "3:2 pulldown". The pattern
|
|
* is called the cadence. The output from 3:2 pulldown looks like this
|
|
* (if the telecine is TFF, top field first):
|
|
*
|
|
* a b c d e Telecined frame (actual frames stored on DVD)
|
|
* T1 T1 T2 T3 T4 *T*op field content
|
|
* B1 B2 B3 B3 B4 *B*ottom field content
|
|
*
|
|
* Numbers 1-4 denote the original film frames. E.g. T1 = top field of
|
|
* original film frame 1. The field Tb, and one of either Bc or Bd, are
|
|
* the extra fields inserted in the telecine. With exact duplication, it
|
|
* of course doesn't matter whether Bc or Bd is the extra field, but
|
|
* with "full field blended" material (see below) this will affect how to
|
|
* correctly extract film frame 3.
|
|
*
|
|
* See the following web pages for illustrations and discussion:
|
|
* http://neuron2.net/LVG/telecining1.html
|
|
* http://arbor.ee.ntu.edu.tw/~jackeikuo/dvd2avi/ivtc/
|
|
*
|
|
* Note that film frame 2 has been stored "half and half" into two telecined
|
|
* frames (b and c). Note also that telecine produces a sequence of
|
|
* 3 progressive frames (d, e and a) followed by 2 interlaced frames
|
|
* (b and c).
|
|
*
|
|
* The output may also look like this (BFF telecine, bottom field first):
|
|
*
|
|
* a' b' c' d' e'
|
|
* T1 T2 T3 T3 T4
|
|
* B1 B1 B2 B3 B4
|
|
*
|
|
* Now field Bb', and one of either Tc' or Td', are the extra fields.
|
|
* Again, film frame 2 is stored "half and half" (into b' and c').
|
|
*
|
|
* Whether the pattern is like abcde or a'b'c'd'e', depends on the telecine
|
|
* field dominance (TFF or BFF). This must match the video field dominance,
|
|
* but is conceptually different. Importantly, there is no temporal
|
|
* difference between those fields that came from the same film frame.
|
|
* Also, see the section on soft telecine below.
|
|
*
|
|
* In a hard telecine, the TFD and VFD must match for field renderers
|
|
* (e.g. traditional DVD player + CRT TV) to work correctly; this should be
|
|
* fairly obvious by considering the above telecine patterns and how a
|
|
* field renderer displays the material (one field at a time, dominant
|
|
* field first).
|
|
*
|
|
* The VFD may, *correctly*, flip mid-stream, if soft field repeats
|
|
* (repeat_pict) have been used. They are commonly used in soft telecine
|
|
* (see below), but also occasional lone field repeats exist in some streams,
|
|
* e.g., Sol Bianca.
|
|
*
|
|
* See e.g.
|
|
* http://www.cambridgeimaging.co.uk/downloads/Telecine%20field%20dominance.pdf
|
|
* for discussion. The document discusses mostly PAL, but includes some notes
|
|
* on NTSC, too.
|
|
*
|
|
* The reason for the words "classical telecine" above, when field
|
|
* duplication was first mentioned, is that there exists a
|
|
* "full field blended" version, where the added fields are not exact
|
|
* duplicates, but are blends of the original film frames. This is rare
|
|
* in NTSC, but some material like this reportedly exists. See
|
|
* http://www.animemusicvideos.org/guides/avtech/videogetb2a.html
|
|
* In these cases, the additional fields are a (probably 50%) blend of the
|
|
* frames between which they have been inserted. Which one of the two
|
|
* possibilites is the extra field then becomes important.
|
|
* This filter does NOT support "full field blended" material.
|
|
*
|
|
* To summarize, the 3:2 pulldown sequence produces a group of ten fields
|
|
* out of every four film frames. Only eight of these fields are unique.
|
|
* To remove the telecine, the duplicate fields must be removed, and the
|
|
* original progressive frames restored. Additionally, the presentation
|
|
* timestamps (PTS) must be adjusted, and one frame out of five (containing
|
|
* no new information) dropped. The duration of each frame in the output
|
|
* becomes 5/4 of that in the input, i.e. 25% longer.
|
|
*
|
|
* Theoretically, this whole mess could be avoided by soft telecining, if the
|
|
* original material is pure 24fps progressive. By using the stream flags
|
|
* correctly, the original progressive frames can be stored on the DVD.
|
|
* In such cases, the DVD player will apply "soft" 3:2 pulldown. See the
|
|
* following section.
|
|
*
|
|
* Also, the mess with cadence detection for hard telecine (see below) could
|
|
* be avoided by using the progressive frame flag and a five-frame future
|
|
* buffer, but no one ever sets the flag correctly for hard-telecined
|
|
* streams. All frames are marked as interlaced, regardless of their cadence
|
|
* position. This is evil, but sort-of-understandable, given that video
|
|
* editors often come with "progressive" and "interlaced" editing modes,
|
|
* but no separate "telecined" mode that could correctly handle this
|
|
* information.
|
|
*
|
|
* In practice, most material with its origins in Asia (including virtually
|
|
* all official US (R1) anime DVDs) is hard-telecined. Combined with the
|
|
* turn-of-the-century practice of rendering true interlaced effects
|
|
* on top of the hard-telecined stream, we have what can only be described
|
|
* as a monstrosity. Fortunately, recent material is much more consistent,
|
|
* even though still almost always hard-telecined.
|
|
*
|
|
* Finally, note that telecined video is often edited directly in interlaced
|
|
* form, disregarding safe cut positions as pertains to the telecine sequence
|
|
* (there are only two: between "d" and "e", or between "e" and the
|
|
* next "a"). Thus, the telecine sequence will in practice jump erratically
|
|
* at cuts [**]. An aggressive detection strategy is needed to cope with
|
|
* this.
|
|
*
|
|
* [**] http://users.softlab.ece.ntua.gr/~ttsiod/ivtc.html
|
|
*
|
|
*
|
|
* Note about chroma formats: 4:2:0 is very common at least on anime DVDs.
|
|
* In the interlaced frames in a hard telecine, the chroma alternates
|
|
* every chroma line, even if the chroma format is 4:2:0! This means that
|
|
* if the interlaced picture is viewed as-is, the luma alternates every line,
|
|
* while the chroma alternates only every two lines of the picture.
|
|
*
|
|
* That is, an interlaced frame in a 4:2:0 telecine looks like this
|
|
* (numbers indicate which film frame the data comes from):
|
|
*
|
|
* luma stored 4:2:0 chroma displayed chroma
|
|
* 1111 1111 1111
|
|
* 2222 1111
|
|
* 1111 2222 2222
|
|
* 2222 2222
|
|
* ... ... ...
|
|
*
|
|
* The deinterlace filter sees the stored 4:2:0 chroma. The "displayed chroma"
|
|
* is only generated later in the filter chain (probably when YUV is converted
|
|
* to the display format, if the display does not accept YUV 4:2:0 directly).
|
|
*
|
|
*
|
|
* Next, how NTSC soft telecine works:
|
|
*
|
|
* a b c d Frame index (actual frames stored on DVD)
|
|
* T1 T2 T3 T4 *T*op field content
|
|
* B1 B2 B3 B4 *B*ottom field content
|
|
*
|
|
* Here the progressive frames are stored as-is. The catch is in the stream
|
|
* flags. For hard telecine, which was explained above, we have
|
|
* VFD = constant and nb_fields = 2, just like in a true progressive or
|
|
* true interlaced stream. Soft telecine, on the other hand, looks like this:
|
|
*
|
|
* a b c d
|
|
* 3 2 3 2 nb_fields
|
|
* T B B T *Video* field dominance (for TFF telecine)
|
|
* B T T B *Video* field dominance (for BFF telecine)
|
|
*
|
|
* Now the video field dominance flipflops every two frames!
|
|
*
|
|
* Note that nb_fields = 3 means the frame duration will be 1.5x that of a
|
|
* normal frame. Often, soft-telecined frames are correctly flagged as
|
|
* progressive.
|
|
*
|
|
* Here the telecining is expected to be done by the player, utilizing the
|
|
* soft field repeat (repeat_pict) feature. This is indeed what a field
|
|
* renderer (traditional interlaced equipment, or a framerate doubler)
|
|
* should do with such a stream.
|
|
*
|
|
* In the IVTC filter, our job is to even out the frame durations, but
|
|
* disregard video field dominance and just pass the progressive pictures
|
|
* through as-is.
|
|
*
|
|
* Fortunately, for soft telecine to work at all, the stream flags must be
|
|
* set correctly. Thus this type can be detected reliably by reading
|
|
* nb_fields from three consecutive frames:
|
|
*
|
|
* Let P = previous, C = current, N = next. If the frame to be rendered is C,
|
|
* there are only three relevant nb_fields flag patterns for the three-frame
|
|
* stencil concerning soft telecine:
|
|
*
|
|
* P C N What is happening:
|
|
* 2 3 2 Entering soft telecine at frame C, or running inside it already.
|
|
* 3 2 3 Running inside soft telecine.
|
|
* 3 2 2 Exiting soft telecine at frame C. C is the last frame that should
|
|
* be handled as soft-telecined. (If we do timing adjustments to the
|
|
* "3"s only, we can already exit soft telecine mode when we see
|
|
* this pattern.)
|
|
*
|
|
* Note that the same stream may alternate between soft and hard telecine,
|
|
* but these cannot occur at the same time. The start and end of the
|
|
* soft-telecined parts can be read off the stream flags, and the rest of
|
|
* the stream can be handed to the hard IVTC part of the filter for analysis.
|
|
*
|
|
* Finally, note also that a stream may also request a lone field repeat
|
|
* (a sudden "3" surrounded by "2"s). Fortunately, these can be handled as
|
|
* a two-frame soft telecine, as they match the first and third
|
|
* flag patterns above.
|
|
*
|
|
* Combinations with several "3"s in a row are not valid for soft or hard
|
|
* telecine, so if they occur, the frames can be passed through as-is.
|
|
*
|
|
*
|
|
* Cadence detection for hard telecine:
|
|
*
|
|
* Consider viewing the TFF and BFF hard telecine sequences through a
|
|
* three-frame stencil. Again, let P = previous, C = current, N = next.
|
|
* A brief analysis leads to the following cadence tables.
|
|
*
|
|
* PCN = stencil position (Previous Current Next),
|
|
* Dups. = duplicate fields,
|
|
* Best field pairs... = combinations of fields which correctly reproduce
|
|
* the original progressive frames,
|
|
* * = see timestamp considerations below for why
|
|
* this particular arrangement.
|
|
*
|
|
* For TFF:
|
|
*
|
|
* PCN Dups. Best field pairs for progressive (correct, theoretical)
|
|
* abc TP = TC TPBP = frame 1, TCBP = frame 1, TNBC = frame 2
|
|
* bcd BC = BN TCBP = frame 2, TNBC = frame 3, TNBN = frame 3
|
|
* cde BP = BC TCBP = frame 3, TCBC = frame 3, TNBN = frame 4
|
|
* dea none TPBP = frame 3, TCBC = frame 4, TNBN = frame 1
|
|
* eab TC = TN TPBP = frame 4, TCBC = frame 1, TNBC = frame 1
|
|
*
|
|
* (table cont'd)
|
|
* PCN Progressive output*
|
|
* abc frame 2 = TNBC (compose TN+BC)
|
|
* bcd frame 3 = TNBN (copy N)
|
|
* cde frame 4 = TNBN (copy N)
|
|
* dea (drop)
|
|
* eab frame 1 = TCBC (copy C), or TNBC (compose TN+BC)
|
|
*
|
|
* On the rows "dea" and "eab", frame 1 refers to a frame from the next
|
|
* group of 4. "Compose TN+BC" means to construct a frame using the
|
|
* top field of N, and the bottom field of C. See ComposeFrame().
|
|
*
|
|
* For BFF, swap all B and T, and rearrange the symbol pairs to again
|
|
* read "TxBx". We have:
|
|
*
|
|
* PCN Dups. Best field pairs for progressive (correct, theoretical)
|
|
* abc BP = BC TPBP = frame 1, TPBC = frame 1, TCBN = frame 2
|
|
* bcd TC = TN TPBC = frame 2, TCBN = frame 3, TNBN = frame 3
|
|
* cde TP = TC TPBC = frame 3, TCBC = frame 3, TNBN = frame 4
|
|
* dea none TPBP = frame 3, TCBC = frame 4, TNBN = frame 1
|
|
* eab BC = BN TPBP = frame 4, TCBC = frame 1, TCBN = frame 1
|
|
*
|
|
* (table cont'd)
|
|
* PCN Progressive output*
|
|
* abc frame 2 = TCBN (compose TC+BN)
|
|
* bcd frame 3 = TNBN (copy N)
|
|
* cde frame 4 = TNBN (copy N)
|
|
* dea (drop)
|
|
* eab frame 1 = TCBC (copy C), or TCBN (compose TC+BN)
|
|
*
|
|
* From these cadence tables we can extract two strategies for
|
|
* cadence detection. We use both.
|
|
*
|
|
* Strategy 1: duplicated fields ("vektor").
|
|
*
|
|
* Consider that each stencil position has a unique duplicate field
|
|
* condition. In one unique position, "dea", there is no match; in all
|
|
* other positions, exactly one. By conservatively filtering the
|
|
* possibilities based on detected hard field repeats (identical fields
|
|
* in successive input frames), it is possible to gradually lock on
|
|
* to the cadence. This kind of strategy is used by the classic IVTC filter
|
|
* in TVTime/Xine by Billy Biggs (Vektor), hence the name.
|
|
*
|
|
* "Conservative" here means that we do not rule anything out, but start at
|
|
* each stencil position by suggesting the position "dea", and then only add
|
|
* to the list of possibilities based on field repeats that are detected at
|
|
* the present stencil position. This estimate is then filtered by ANDing
|
|
* against a shifted (time-advanced) version of the estimate from the
|
|
* previous stencil position. Once the detected position becomes unique,
|
|
* the filter locks on. If the new detection is inconsistent with the
|
|
* previous one, the detector resets itself and starts from scratch.
|
|
*
|
|
* The strategy is very reliable, as it only requires running (fuzzy)
|
|
* duplicate field detection against the input. It is very good at staying
|
|
* locked on once it acquires the cadence, and it does so correctly very
|
|
* often. These are indeed characteristics that can be observed in the
|
|
* behaviour of the TVTime/Xine filter.
|
|
*
|
|
* Note especially that 8fps/12fps animation, common in anime, will cause
|
|
* spurious hard-repeated fields. The conservative nature of the method
|
|
* makes it very good at dealing with this - any spurious repeats will only
|
|
* slow down the lock-on, not completely confuse it. It should also be good
|
|
* at detecting the presence of a telecine, as neither true interlaced nor
|
|
* true progressive material should contain any hard field repeats.
|
|
* (This, however, has not been tested yet.)
|
|
*
|
|
* The disadvantages are that at times the method may lock on slowly,
|
|
* because the detection must be filtered against the history until
|
|
* a unique solution is found. Resets, if they happen, will also
|
|
* slow down the lock-on.
|
|
*
|
|
* The hard duplicate detection required by this strategy can be made
|
|
* data-adaptive in several ways. TVTime uses a running average of motion
|
|
* scores for its history buffer. We utilize a different, original approach.
|
|
* It is rare, if not nonexistent, that only one field changes between
|
|
* two valid frames. Thus, if one field changes "much more" than the other
|
|
* in fieldwise motion detection, the less changed one is probably a
|
|
* duplicate. Importantly, this works with telecined input, too - the field
|
|
* that changes "much" may be part of another film frame, while the "less"
|
|
* changed one is actually a duplicate from the previous film frame.
|
|
* If both fields change "about as much", then no hard field repeat
|
|
* is detected.
|
|
*
|
|
*
|
|
* Strategy 2: progressive/interlaced field combinations ("scores").
|
|
*
|
|
* We can also form a second strategy, which is not as reliable in practice,
|
|
* but which locks on faster when it does. This is original to this filter.
|
|
*
|
|
* Consider all possible field pairs from two successive frames: TCBC, TCBN,
|
|
* TNBC, TNBN. After one frame, these become TPBP, TPBC, TCBP, TCBC.
|
|
* These eight pairs (seven unique, disregarding the duplicate TCBC)
|
|
* are the exhaustive list of possible field pairs from two successive
|
|
* frames in the three-frame PCN stencil.
|
|
*
|
|
* The above tables list triplets of field pair combinations for each cadence
|
|
* position, which should produce progressive frames. All the given triplets
|
|
* are unique in each table alone, although the one at "dea" is
|
|
* indistinguishable from the case of pure progressive material. It is also
|
|
* the only one which is not unique across both tables.
|
|
*
|
|
* Thus, all sequences of two neighboring triplets are unique across both
|
|
* tables. (For "neighboring", each table is considered to wrap around from
|
|
* "eab" back to "abc", i.e. from the last row back to the first row.)
|
|
* Furthermore, each sequence of three neighboring triplets is redundantly
|
|
* unique (i.e. is unique, and reduces the chance of false positives).
|
|
* (In practice, though, we already know which table to consider, from the fact
|
|
* that TFD and VFD must match. Checking only the relevant table makes the
|
|
* strategy slightly more robust.)
|
|
*
|
|
* The important idea is: *all other* field pair combinations should produce
|
|
* frames that look interlaced. This includes those combinations present in
|
|
* the "wrong" (i.e. not current position) rows of the table (insofar as
|
|
* those combinations are not also present in the "correct" row; by the
|
|
* uniqueness property, *every* "wrong" row will always contain at least one
|
|
* combination that differs from those in the "correct" row).
|
|
*
|
|
* We generate the artificial frames TCBC, TCBN, TNBC and TNBN (virtually;
|
|
* no data is actually moved). Two of these are just the frames C and N,
|
|
* which already exist; the two others correspond to composing the given
|
|
* field pairs. We then compute the interlace score for each of these frames.
|
|
* The interlace scores of what are now TPBP, TPBC and TCBP, also needed,
|
|
* were computed by this same mechanism during the previous input frame.
|
|
* These can be slided in history and reused.
|
|
*
|
|
* We then check, using the computed interlace scores, and taking into
|
|
* account the video field dominance information, which field combination
|
|
* triplet given in the appropriate table produces the smallest sum of
|
|
* interlace scores. Unless we are at PCN = "dea" (which could also be pure
|
|
* progressive!), this immediately gives us the most likely current cadence
|
|
* position. Combined with a two-step history, the sequence of three most
|
|
* likely positions found this way always allows us to make a more or less
|
|
* reliable detection. (That is, when a reliable detection is possible; if the
|
|
* video has no motion at all, every detection will report the position "dea".
|
|
* In anime, still shots are common. Thus we must augment this with a
|
|
* full-frame motion detection that switches the detector off if no motion
|
|
* was detected.)
|
|
*
|
|
* The detection seems to need four full-frame interlace analyses per frame.
|
|
* Actually, three are enough, because the previous N is the new C, so we can
|
|
* slide the already computed result. Also during initialization, we only
|
|
* need to compute TNBN on the first frame; this has become TPBP when the
|
|
* third frame is reached. Similarly, we compute TNBN, TNBC and TCBN during
|
|
* the second frame (just before the filter starts), and these get slided
|
|
* into TCBC, TCBP and TPBC when the third frame is reached. At that point,
|
|
* initialization is complete.
|
|
*
|
|
* Because we only compare interlace scores against each other, no threshold
|
|
* is needed in the cadence detector. Thus it, trivially, adapts to the
|
|
* material automatically.
|
|
*
|
|
* The weakness of this approach is that any comb metric detects incorrectly
|
|
* every now and then. Especially slow vertical camera pans often get treated
|
|
* wrong, because the messed-up field combination looks less interlaced
|
|
* according to the comb metric (especially in anime) than the correct one
|
|
* (which contains, correctly, one-pixel thick cartoon outlines, parts of
|
|
* which often perfectly horizontal).
|
|
*
|
|
* The advantage is that this strategy catches horizontal camera pans
|
|
* immediately and reliably, while the other strategy may still be trying
|
|
* to lock on.
|
|
*
|
|
*
|
|
* Frame reconstruction:
|
|
*
|
|
* We utilize a hybrid approach. If a valid cadence is locked on, we use the
|
|
* operation table to decide what to do. This handles those cases correctly,
|
|
* which would be difficult for the interlace detector alone (e.g. vertical
|
|
* camera pans). Note that the operations that must be performed for IVTC
|
|
* include timestamp mangling and frame dropping, which can only be done
|
|
* reliably on a valid cadence.
|
|
*
|
|
* When the cadence fails (we detect this from a sudden upward jump in the
|
|
* interlace scores of the constructed frames), we reset the "vektor"
|
|
* detector strategy and fall back to an emergency frame composer, where we
|
|
* use ideas from Transcode's IVTC.
|
|
*
|
|
* In this emergency mode, we simply output the least interlaced frame out of
|
|
* the combinations TNBN, TNBC and TCBN (where only one of the last two is
|
|
* tested, based on the stream TFF/BFF information). In this mode, we do not
|
|
* touch the timestamps, and just pass all five frames from each group right
|
|
* through. This introduces some stutter, but in practice it is often not
|
|
* noticeable. This is because the kind of material that is likely to trip up
|
|
* the cadence detector usually includes irregular 8fps/12fps motion. With
|
|
* true 24fps motion, the cadence quickly locks on, and stays locked on.
|
|
*
|
|
* Once the cadence locks on again, we resume normal operation based on
|
|
* the operation table.
|
|
*
|
|
*
|
|
* Timestamp mangling:
|
|
*
|
|
* To make five into four we need to extend frame durations by 25%.
|
|
* Consider the following diagram (times given in 90kHz ticks, rounded to
|
|
* integers; this is just for illustration, and for comparison with the
|
|
* "scratch paper" comments in pulldown.c of TVTime/Xine):
|
|
*
|
|
* NTSC input (29.97 fps)
|
|
* a b c d e a (from next group) ...
|
|
* 0 3003 6006 9009 12012 15015
|
|
* 0 3754 7508 11261 15015
|
|
* 1 2 3 4 1 (from next group) ...
|
|
* Film output (23.976 fps)
|
|
*
|
|
* Three of the film frames have length 3754, and one has 3753
|
|
* (it is 1/90000 sec shorter). This rounding was chosen so that the lengths
|
|
* of the group of four sum to the original 15015.
|
|
*
|
|
* From the diagram we get these deltas for presentation timestamp adjustment
|
|
* (in 90 kHz ticks, for illustration):
|
|
* (1-a) (2-b) (3-c) (4-d) (skip) (1-a) ...
|
|
* 0 +751 +1502 +2252 (skip) 0 ...
|
|
*
|
|
* In fractions of (p_next->date - p_cur->date), regardless of actual
|
|
* time unit, the deltas are:
|
|
* (1-a) (2-b) (3-c) (4-d) (skip) (1-a) ...
|
|
* 0 +0.25 +0.50 +0.75 (skip) 0 ...
|
|
*
|
|
* This is what we actually use. In our implementation, the values are stored
|
|
* multiplied by 4, as integers.
|
|
*
|
|
* The "current" frame should be displayed at [original time + delta].
|
|
* E.g., when "current" = b (i.e. PCN = abc), start displaying film frame 2
|
|
* at time [original time of b + 751 ticks]. So, when we catch the cadence,
|
|
* we will start mangling the timestamps according to the cadence position
|
|
* of the "current" frame, using the deltas given above. This will cause
|
|
* a one-time jerk, most noticeable if the cadence happens to catch at
|
|
* position "d". (Alternatively, upon lock-on, we could wait until we are
|
|
* at "a" before switching on IVTC, but this makes the maximal delay
|
|
* [max. detection + max. wait] = 3 + 4 = 7 input frames, which comes to
|
|
* 7/30 ~ 0.23 seconds instead of the 3/30 = 0.10 seconds from purely
|
|
* the detection. The one-time jerk is simpler to implement and gives the
|
|
* faster lock-on.)
|
|
*
|
|
* It is clear that "e" is a safe choice for the dropped frame. This can be
|
|
* seen from the timings and the cadence tables. First, consider the timings.
|
|
* If we have only one future frame, "e" is the only one whose PTS, comparing
|
|
* to the film frames, allows dropping it safely. To see this, consider which
|
|
* film frame needs to be rendered as each new input frame arrives. Secondly,
|
|
* consider the cadence tables. It is ok to drop "e", because the same
|
|
* film frame "1" is available also at the next PCN position "eab".
|
|
* (As a side note, it is interesting that Vektor's filter drops "b".
|
|
* See the TVTime sources.)
|
|
*
|
|
* When the filter falls out of film mode, the timestamps of the incoming
|
|
* frames are left untouched. Thus, the output from this filter has a
|
|
* variable framerate: 4/5 of the input framerate when IVTC is active
|
|
* (whether hard or soft), and the same framerate as input when it is not
|
|
* (or when in emergency mode).
|
|
*
|
|
*
|
|
* For other open-source IVTC codes, which may be a useful source for ideas,
|
|
* see the following:
|
|
*
|
|
* The classic filter by Billy Biggs (Vektor). Written in 2001-2003 for
|
|
* TVTime, and adapted into Xine later. In xine-lib 1.1.19, it is at
|
|
* src/post/deinterlace/pulldown.*. Also needed are tvtime.*, and speedy.*.
|
|
*
|
|
* Transcode's ivtc->decimate->32detect chain by Thanassis Tsiodras.
|
|
* Written in 2002, added in Transcode 0.6.12. This probably has something
|
|
* to do with the same chain in MPlayer, considering that MPlayer acquired
|
|
* an IVTC filter around the same time. In Transcode 1.1.5, the IVTC part is
|
|
* at filter/filter_ivtc.c. Transcode 1.1.5 sources can be downloaded from
|
|
* http://developer.berlios.de/project/showfiles.php?group_id=10094
|
|
*/
|
|
|
|
#endif
|