1
mirror of https://code.videolan.org/videolan/vlc synced 2024-09-04 09:11:33 +02:00
vlc/modules/video_filter/deinterlace/algo_ivtc.h
Juha Jeronen eb4b233b71 deinterlace: algo_ivtc.h: typo in comment
Signed-off-by: Jean-Baptiste Kempf <jb@videolan.org>
2011-05-02 17:18:58 +02:00

737 lines
35 KiB
C

/*****************************************************************************
* algo_ivtc.h : IVTC (inverse telecine) algorithm for the VLC deinterlacer
*****************************************************************************
* Copyright (C) 2010-2011 the VideoLAN team
* $Id$
*
* Author: Juha Jeronen <juha.jeronen@jyu.fi>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
*****************************************************************************/
#ifndef VLC_DEINTERLACE_ALGO_IVTC_H
#define VLC_DEINTERLACE_ALGO_IVTC_H 1
/* Forward declarations */
struct filter_t;
struct picture_t;
/*****************************************************************************
* Data structures
*****************************************************************************/
#define IVTC_NUM_FIELD_PAIRS 7
#define IVTC_DETECTION_HISTORY_SIZE 3
#define IVTC_LATEST (IVTC_DETECTION_HISTORY_SIZE-1)
/**
* Algorithm-specific state for IVTC.
* @see RenderIVTC()
*/
typedef struct
{
int i_mode; /**< Detecting, hard TC, or soft TC. @see ivtc_mode */
int i_old_mode; /**< @see IVTCSoftTelecineDetect() */
int i_cadence_pos; /**< Cadence counter, 0..4. Runs when locked on. */
int i_tfd; /**< TFF or BFF telecine. Detected from the video. */
/** Raw low-level detector output.
*
* @see IVTCLowLevelDetect()
*/
int pi_scores[IVTC_NUM_FIELD_PAIRS]; /**< Interlace scores. */
int pi_motion[IVTC_DETECTION_HISTORY_SIZE]; /**< 8x8 blocks with motion. */
int pi_top_rep[IVTC_DETECTION_HISTORY_SIZE]; /**< Hard top field repeat. */
int pi_bot_rep[IVTC_DETECTION_HISTORY_SIZE]; /**< Hard bot field repeat. */
/** Interlace scores of outgoing frames, used for judging IVTC output
* (detecting cadence breaks).
*
* @see IVTCOutputOrDropFrame()
*/
int pi_final_scores[IVTC_DETECTION_HISTORY_SIZE];
/** Cadence position detection history (in ivtc_cadence_pos format).
* Contains the detected cadence position and a corresponding
* reliability flag for each algorithm.
*
* s = scores, interlace scores based algorithm, original to this filter.
* v = vektor, hard field repeat based algorithm, inspired by
* the TVTime/Xine IVTC filter by Billy Biggs (Vektor).
*
* Each algorithm may also keep internal, opaque data.
*
* @see ivtc_cadence_pos
* @see IVTCCadenceDetectAlgoScores()
* @see IVTCCadenceDetectAlgoVektor()
*/
int pi_s_cadence_pos[IVTC_DETECTION_HISTORY_SIZE];
bool pb_s_reliable[IVTC_DETECTION_HISTORY_SIZE];
int pi_v_raw[IVTC_DETECTION_HISTORY_SIZE]; /**< "vektor" algo internal */
int pi_v_cadence_pos[IVTC_DETECTION_HISTORY_SIZE];
bool pb_v_reliable[IVTC_DETECTION_HISTORY_SIZE];
/** Final result, chosen by IVTCCadenceDetectFinalize() from the results
* given by the different detection algorithms.
*
* @see IVTCCadenceDetectFinalize()
*/
int pi_cadence_pos_history[IVTC_DETECTION_HISTORY_SIZE];
/**
* Set by cadence analyzer. Whether the sequence of last
* IVTC_DETECTION_HISTORY_SIZE detected positions, stored in
* pi_cadence_pos_history, looks like a valid telecine.
*
* @see IVTCCadenceAnalyze()
*/
bool b_sequence_valid;
/**
* Set by cadence analyzer. True if detected position = "dea".
* The three entries of this are used for detecting three progressive
* stencil positions in a row, i.e. five progressive frames in a row;
* this triggers exit from hard IVTC.
*
* @see IVTCCadenceAnalyze()
*/
bool pb_all_progressives[IVTC_DETECTION_HISTORY_SIZE];
} ivtc_sys_t;
/*****************************************************************************
* Functions
*****************************************************************************/
/**
* Deinterlace filter. Performs inverse telecine.
*
* Also known as "film mode" or "3:2 reverse pulldown" in some equipment.
*
* This filter attempts to reconstruct the original film frames from an
* NTSC telecined signal. It is intended for 24fps progressive material
* that was telecined to NTSC 60i. For example, most NTSC anime DVDs
* are like this.
*
* There is no input frame parameter, because the input frames
* are taken from the history buffer.
*
* This algorithm does CUSTOM_PTS timestamp mangling.
*
* See the file comment for a detailed description of the algorithm.
*
* @param p_filter The filter instance. Must be non-NULL.
* @param[out] p_dst Output frame. Must be allocated by caller.
* @return VLC error code (int).
* @retval VLC_SUCCESS A film frame was reconstructed to p_dst.
* @retval VLC_EGENERIC Frame dropped as part of normal IVTC operation.
* @see Deinterlace()
* @see ComposeFrame()
* @see CalculateInterlaceScore()
* @see EstimateNumBlocksWithMotion()
*/
int RenderIVTC( filter_t *p_filter, picture_t *p_dst );
/**
* Clears the inverse telecine subsystem state.
*
* Used during initialization and uninitialization
* (called from Open() and Flush()).
*
* @param p_filter The filter instance.
* @see RenderIVTC()
* @see Open()
* @see Flush()
*/
void IVTCClearState( filter_t *p_filter );
/*****************************************************************************
* Extra documentation
*****************************************************************************/
/**
* \file
* IVTC (inverse telecine) algorithm for the VLC deinterlacer.
* Also known as "film mode" or "3:2 reverse pulldown" in some equipment.
*
* Summary:
*
* This is a "live IVTC" filter, which attempts to do in realtime what
* Transcode's ivtc->decimate->32detect chain does offline. Additionally,
* it removes soft telecine. It is an original design, based on some ideas
* from Transcode, some from TVTime/Xine, and some original.
*
* If the input material is pure NTSC telecined film, inverse telecine
* will (ideally) exactly recover the original progressive film frames.
* The output will run at 4/5 of the original framerate with no loss of
* information. Interlacing artifacts are removed, and motion becomes
* as smooth as it was on the original film. For soft-telecined material,
* on the other hand, the progressive frames alredy exist, so only the
* timings are changed such that the output becomes smooth 24fps (or would,
* if the output device had an infinite framerate).
*
* Put in simple terms, this filter is targeted for NTSC movies and
* especially anime. Virtually all 1990s and early 2000s anime is
* hard-telecined. Because the source material is like that,
* IVTC is needed for also virtually all official R1 (US) anime DVDs.
*
* Note that some anime from the turn of the century (e.g. Silent Mobius
* and Sol Bianca) is a hybrid of telecined film and true interlaced
* computer-generated effects and camera pans. In this case, applying IVTC
* will effectively attempt to reconstruct the frames based on the film
* component, but even if this is successful, the framerate reduction will
* cause the computer-generated effects to stutter. This is mathematically
* unavoidable. Instead of IVTC, a framerate doubling deinterlacer is
* recommended for such material. Try "Phosphor", "Bob", or "Linear".
*
* Fortunately, 30fps true progressive anime is on the rise (e.g. ARIA,
* Black Lagoon, Galaxy Angel, Ghost in the Shell: Solid State Society,
* Mai Otome, Last Exile, and Rocket Girls). This type requires no
* deinterlacer at all.
*
* Another recent trend is using 24fps computer-generated effects and
* telecining them along with the cels (e.g. Kiddy Grade, Str.A.In. and
* The Third: The Girl with the Blue Eye). For this group, IVTC is the
* correct way to deinterlace, and works properly.
*
* Soft telecined anime, while rare, also exists. Stellvia of the Universe
* and Angel Links are examples of this. Stellvia constantly alternates
* between soft and hard telecine - pure CGI sequences are soft-telecined,
* while sequences incorporating cel animation are hard-telecined.
* This makes it very hard for the cadence detector to lock on,
* and indeed Stellvia gives some trouble for the filter.
*
* To finish the list of different material types, Azumanga Daioh deserves
* a special mention. The OP and ED sequences are both 30fps progressive,
* while the episodes themselves are hard-telecined. This filter should
* mostly work correctly with such material, too. (The beginning of the OP
* shows some artifacts, but otherwise both the OP and ED are indeed
* rendered progressive. The technical reason is that the filter has been
* designed to aggressively reconstruct film frames, which helps in many
* cases with hard-telecined material. In very rare cases, this approach may
* go wrong, regardless of whether the input is telecined or progressive.)
*
* Finally, note also that IVTC is the only correct way to deinterlace NTSC
* telecined material. Simply applying an interpolating deinterlacing filter
* (with no framerate doubling) is harmful for two reasons. First, even if
* the filter does not damage already progressive frames, it will lose half
* of the available vertical resolution of those frames that are judged
* interlaced. Some algorithms combining data from multiple frames may be
* able to counter this to an extent, effectively performing something akin
* to the frame reconstruction part of IVTC. A more serious problem is that
* any motion will stutter, because (even in the ideal case) one out of
* every four film frames will be shown twice, while the other three will
* be shown only once. Duplicate removal and framerate reduction - which are
* part of IVTC - are also needed to properly play back telecined material
* on progressive displays at a non-doubled framerate.
*
* So, try this filter on your NTSC anime DVDs. It just might help.
*
*
* Technical details:
*
*
* First, NTSC hard telecine in a nutshell:
*
* Film is commonly captured at 24 fps. The framerate must be raised from
* 24 fps to 59.94 fields per second, This starts by pretending that the
* original framerate is 23.976 fps. When authoring, the audio can be
* slowed down by 0.1% to match. Now 59.94 = 5/4 * (2*23.976), which gives
* a nice ratio made out of small integers.
*
* Thus, each group of four film frames must become five frames in the NTSC
* video stream. One cannot simply repeat one frame of every four, because
* this would result in jerky motion. To slightly soften the jerkiness,
* the extra frame is split into two extra fields, inserted at different
* times. The content of the extra fields is (in classical telecine)
* duplicated as-is from existing fields.
*
* The field duplication technique is called "3:2 pulldown". The pattern
* is called the cadence. The output from 3:2 pulldown looks like this
* (if the telecine is TFF, top field first):
*
* a b c d e Telecined frame (actual frames stored on DVD)
* T1 T1 T2 T3 T4 *T*op field content
* B1 B2 B3 B3 B4 *B*ottom field content
*
* Numbers 1-4 denote the original film frames. E.g. T1 = top field of
* original film frame 1. The field Tb, and one of either Bc or Bd, are
* the extra fields inserted in the telecine. With exact duplication, it
* of course doesn't matter whether Bc or Bd is the extra field, but
* with "full field blended" material (see below) this will affect how to
* correctly extract film frame 3.
*
* See the following web pages for illustrations and discussion:
* http://neuron2.net/LVG/telecining1.html
* http://arbor.ee.ntu.edu.tw/~jackeikuo/dvd2avi/ivtc/
*
* Note that film frame 2 has been stored "half and half" into two telecined
* frames (b and c). Note also that telecine produces a sequence of
* 3 progressive frames (d, e and a) followed by 2 interlaced frames
* (b and c).
*
* The output may also look like this (BFF telecine, bottom field first):
*
* a' b' c' d' e'
* T1 T2 T3 T3 T4
* B1 B1 B2 B3 B4
*
* Now field Bb', and one of either Tc' or Td', are the extra fields.
* Again, film frame 2 is stored "half and half" (into b' and c').
*
* Whether the pattern is like abcde or a'b'c'd'e', depends on the telecine
* field dominance (TFF or BFF). This must match the video field dominance,
* but is conceptually different. Importantly, there is no temporal
* difference between those fields that came from the same film frame.
* Also, see the section on soft telecine below.
*
* In a hard telecine, the TFD and VFD must match for field renderers
* (e.g. traditional DVD player + CRT TV) to work correctly; this should be
* fairly obvious by considering the above telecine patterns and how a
* field renderer displays the material (one field at a time, dominant
* field first).
*
* The VFD may, *correctly*, flip mid-stream, if soft field repeats
* (repeat_pict) have been used. They are commonly used in soft telecine
* (see below), but also occasional lone field repeats exist in some streams,
* e.g., Sol Bianca.
*
* See e.g.
* http://www.cambridgeimaging.co.uk/downloads/Telecine%20field%20dominance.pdf
* for discussion. The document discusses mostly PAL, but includes some notes
* on NTSC, too.
*
* The reason for the words "classical telecine" above, when field
* duplication was first mentioned, is that there exists a
* "full field blended" version, where the added fields are not exact
* duplicates, but are blends of the original film frames. This is rare
* in NTSC, but some material like this reportedly exists. See
* http://www.animemusicvideos.org/guides/avtech/videogetb2a.html
* In these cases, the additional fields are a (probably 50%) blend of the
* frames between which they have been inserted. Which one of the two
* possibilites is the extra field then becomes important.
* This filter does NOT support "full field blended" material.
*
* To summarize, the 3:2 pulldown sequence produces a group of ten fields
* out of every four film frames. Only eight of these fields are unique.
* To remove the telecine, the duplicate fields must be removed, and the
* original progressive frames restored. Additionally, the presentation
* timestamps (PTS) must be adjusted, and one frame out of five (containing
* no new information) dropped. The duration of each frame in the output
* becomes 5/4 of that in the input, i.e. 25% longer.
*
* Theoretically, this whole mess could be avoided by soft telecining, if the
* original material is pure 24fps progressive. By using the stream flags
* correctly, the original progressive frames can be stored on the DVD.
* In such cases, the DVD player will apply "soft" 3:2 pulldown. See the
* following section.
*
* Also, the mess with cadence detection for hard telecine (see below) could
* be avoided by using the progressive frame flag and a five-frame future
* buffer, but no one ever sets the flag correctly for hard-telecined
* streams. All frames are marked as interlaced, regardless of their cadence
* position. This is evil, but sort-of-understandable, given that video
* editors often come with "progressive" and "interlaced" editing modes,
* but no separate "telecined" mode that could correctly handle this
* information.
*
* In practice, most material with its origins in Asia (including virtually
* all official US (R1) anime DVDs) is hard-telecined. Combined with the
* turn-of-the-century practice of rendering true interlaced effects
* on top of the hard-telecined stream, we have what can only be described
* as a monstrosity. Fortunately, recent material is much more consistent,
* even though still almost always hard-telecined.
*
* Finally, note that telecined video is often edited directly in interlaced
* form, disregarding safe cut positions as pertains to the telecine sequence
* (there are only two: between "d" and "e", or between "e" and the
* next "a"). Thus, the telecine sequence will in practice jump erratically
* at cuts [**]. An aggressive detection strategy is needed to cope with
* this.
*
* [**] http://users.softlab.ece.ntua.gr/~ttsiod/ivtc.html
*
*
* Note about chroma formats: 4:2:0 is very common at least on anime DVDs.
* In the interlaced frames in a hard telecine, the chroma alternates
* every chroma line, even if the chroma format is 4:2:0! This means that
* if the interlaced picture is viewed as-is, the luma alternates every line,
* while the chroma alternates only every two lines of the picture.
*
* That is, an interlaced frame in a 4:2:0 telecine looks like this
* (numbers indicate which film frame the data comes from):
*
* luma stored 4:2:0 chroma displayed chroma
* 1111 1111 1111
* 2222 1111
* 1111 2222 2222
* 2222 2222
* ... ... ...
*
* The deinterlace filter sees the stored 4:2:0 chroma. The "displayed chroma"
* is only generated later in the filter chain (probably when YUV is converted
* to the display format, if the display does not accept YUV 4:2:0 directly).
*
*
* Next, how NTSC soft telecine works:
*
* a b c d Frame index (actual frames stored on DVD)
* T1 T2 T3 T4 *T*op field content
* B1 B2 B3 B4 *B*ottom field content
*
* Here the progressive frames are stored as-is. The catch is in the stream
* flags. For hard telecine, which was explained above, we have
* VFD = constant and nb_fields = 2, just like in a true progressive or
* true interlaced stream. Soft telecine, on the other hand, looks like this:
*
* a b c d
* 3 2 3 2 nb_fields
* T B B T *Video* field dominance (for TFF telecine)
* B T T B *Video* field dominance (for BFF telecine)
*
* Now the video field dominance flipflops every two frames!
*
* Note that nb_fields = 3 means the frame duration will be 1.5x that of a
* normal frame. Often, soft-telecined frames are correctly flagged as
* progressive.
*
* Here the telecining is expected to be done by the player, utilizing the
* soft field repeat (repeat_pict) feature. This is indeed what a field
* renderer (traditional interlaced equipment, or a framerate doubler)
* should do with such a stream.
*
* In the IVTC filter, our job is to even out the frame durations, but
* disregard video field dominance and just pass the progressive pictures
* through as-is.
*
* Fortunately, for soft telecine to work at all, the stream flags must be
* set correctly. Thus this type can be detected reliably by reading
* nb_fields from three consecutive frames:
*
* Let P = previous, C = current, N = next. If the frame to be rendered is C,
* there are only three relevant nb_fields flag patterns for the three-frame
* stencil concerning soft telecine:
*
* P C N What is happening:
* 2 3 2 Entering soft telecine at frame C, or running inside it already.
* 3 2 3 Running inside soft telecine.
* 3 2 2 Exiting soft telecine at frame C. C is the last frame that should
* be handled as soft-telecined. (If we do timing adjustments to the
* "3"s only, we can already exit soft telecine mode when we see
* this pattern.)
*
* Note that the same stream may alternate between soft and hard telecine,
* but these cannot occur at the same time. The start and end of the
* soft-telecined parts can be read off the stream flags, and the rest of
* the stream can be handed to the hard IVTC part of the filter for analysis.
*
* Finally, note also that a stream may also request a lone field repeat
* (a sudden "3" surrounded by "2"s). Fortunately, these can be handled as
* a two-frame soft telecine, as they match the first and third
* flag patterns above.
*
* Combinations with several "3"s in a row are not valid for soft or hard
* telecine, so if they occur, the frames can be passed through as-is.
*
*
* Cadence detection for hard telecine:
*
* Consider viewing the TFF and BFF hard telecine sequences through a
* three-frame stencil. Again, let P = previous, C = current, N = next.
* A brief analysis leads to the following cadence tables.
*
* PCN = stencil position (Previous Current Next),
* Dups. = duplicate fields,
* Best field pairs... = combinations of fields which correctly reproduce
* the original progressive frames,
* * = see timestamp considerations below for why
* this particular arrangement.
*
* For TFF:
*
* PCN Dups. Best field pairs for progressive (correct, theoretical)
* abc TP = TC TPBP = frame 1, TCBP = frame 1, TNBC = frame 2
* bcd BC = BN TCBP = frame 2, TNBC = frame 3, TNBN = frame 3
* cde BP = BC TCBP = frame 3, TCBC = frame 3, TNBN = frame 4
* dea none TPBP = frame 3, TCBC = frame 4, TNBN = frame 1
* eab TC = TN TPBP = frame 4, TCBC = frame 1, TNBC = frame 1
*
* (table cont'd)
* PCN Progressive output*
* abc frame 2 = TNBC (compose TN+BC)
* bcd frame 3 = TNBN (copy N)
* cde frame 4 = TNBN (copy N)
* dea (drop)
* eab frame 1 = TCBC (copy C), or TNBC (compose TN+BC)
*
* On the rows "dea" and "eab", frame 1 refers to a frame from the next
* group of 4. "Compose TN+BC" means to construct a frame using the
* top field of N, and the bottom field of C. See ComposeFrame().
*
* For BFF, swap all B and T, and rearrange the symbol pairs to again
* read "TxBx". We have:
*
* PCN Dups. Best field pairs for progressive (correct, theoretical)
* abc BP = BC TPBP = frame 1, TPBC = frame 1, TCBN = frame 2
* bcd TC = TN TPBC = frame 2, TCBN = frame 3, TNBN = frame 3
* cde TP = TC TPBC = frame 3, TCBC = frame 3, TNBN = frame 4
* dea none TPBP = frame 3, TCBC = frame 4, TNBN = frame 1
* eab BC = BN TPBP = frame 4, TCBC = frame 1, TCBN = frame 1
*
* (table cont'd)
* PCN Progressive output*
* abc frame 2 = TCBN (compose TC+BN)
* bcd frame 3 = TNBN (copy N)
* cde frame 4 = TNBN (copy N)
* dea (drop)
* eab frame 1 = TCBC (copy C), or TCBN (compose TC+BN)
*
* From these cadence tables we can extract two strategies for
* cadence detection. We use both.
*
* Strategy 1: duplicated fields ("vektor").
*
* Consider that each stencil position has a unique duplicate field
* condition. In one unique position, "dea", there is no match; in all
* other positions, exactly one. By conservatively filtering the
* possibilities based on detected hard field repeats (identical fields
* in successive input frames), it is possible to gradually lock on
* to the cadence. This kind of strategy is used by the classic IVTC filter
* in TVTime/Xine by Billy Biggs (Vektor), hence the name.
*
* "Conservative" here means that we do not rule anything out, but start at
* each stencil position by suggesting the position "dea", and then only add
* to the list of possibilities based on field repeats that are detected at
* the present stencil position. This estimate is then filtered by ANDing
* against a shifted (time-advanced) version of the estimate from the
* previous stencil position. Once the detected position becomes unique,
* the filter locks on. If the new detection is inconsistent with the
* previous one, the detector resets itself and starts from scratch.
*
* The strategy is very reliable, as it only requires running (fuzzy)
* duplicate field detection against the input. It is very good at staying
* locked on once it acquires the cadence, and it does so correctly very
* often. These are indeed characteristics that can be observed in the
* behaviour of the TVTime/Xine filter.
*
* Note especially that 8fps/12fps animation, common in anime, will cause
* spurious hard-repeated fields. The conservative nature of the method
* makes it very good at dealing with this - any spurious repeats will only
* slow down the lock-on, not completely confuse it. It should also be good
* at detecting the presence of a telecine, as neither true interlaced nor
* true progressive material should contain any hard field repeats.
* (This, however, has not been tested yet.)
*
* The disadvantages are that at times the method may lock on slowly,
* because the detection must be filtered against the history until
* a unique solution is found. Resets, if they happen, will also
* slow down the lock-on.
*
* The hard duplicate detection required by this strategy can be made
* data-adaptive in several ways. TVTime uses a running average of motion
* scores for its history buffer. We utilize a different, original approach.
* It is rare, if not nonexistent, that only one field changes between
* two valid frames. Thus, if one field changes "much more" than the other
* in fieldwise motion detection, the less changed one is probably a
* duplicate. Importantly, this works with telecined input, too - the field
* that changes "much" may be part of another film frame, while the "less"
* changed one is actually a duplicate from the previous film frame.
* If both fields change "about as much", then no hard field repeat
* is detected.
*
*
* Strategy 2: progressive/interlaced field combinations ("scores").
*
* We can also form a second strategy, which is not as reliable in practice,
* but which locks on faster when it does. This is original to this filter.
*
* Consider all possible field pairs from two successive frames: TCBC, TCBN,
* TNBC, TNBN. After one frame, these become TPBP, TPBC, TCBP, TCBC.
* These eight pairs (seven unique, disregarding the duplicate TCBC)
* are the exhaustive list of possible field pairs from two successive
* frames in the three-frame PCN stencil.
*
* The above tables list triplets of field pair combinations for each cadence
* position, which should produce progressive frames. All the given triplets
* are unique in each table alone, although the one at "dea" is
* indistinguishable from the case of pure progressive material. It is also
* the only one which is not unique across both tables.
*
* Thus, all sequences of two neighboring triplets are unique across both
* tables. (For "neighboring", each table is considered to wrap around from
* "eab" back to "abc", i.e. from the last row back to the first row.)
* Furthermore, each sequence of three neighboring triplets is redundantly
* unique (i.e. is unique, and reduces the chance of false positives).
* (In practice, though, we already know which table to consider, from the fact
* that TFD and VFD must match. Checking only the relevant table makes the
* strategy slightly more robust.)
*
* The important idea is: *all other* field pair combinations should produce
* frames that look interlaced. This includes those combinations present in
* the "wrong" (i.e. not current position) rows of the table (insofar as
* those combinations are not also present in the "correct" row; by the
* uniqueness property, *every* "wrong" row will always contain at least one
* combination that differs from those in the "correct" row).
*
* We generate the artificial frames TCBC, TCBN, TNBC and TNBN (virtually;
* no data is actually moved). Two of these are just the frames C and N,
* which already exist; the two others correspond to composing the given
* field pairs. We then compute the interlace score for each of these frames.
* The interlace scores of what are now TPBP, TPBC and TCBP, also needed,
* were computed by this same mechanism during the previous input frame.
* These can be slided in history and reused.
*
* We then check, using the computed interlace scores, and taking into
* account the video field dominance information, which field combination
* triplet given in the appropriate table produces the smallest sum of
* interlace scores. Unless we are at PCN = "dea" (which could also be pure
* progressive!), this immediately gives us the most likely current cadence
* position. Combined with a two-step history, the sequence of three most
* likely positions found this way always allows us to make a more or less
* reliable detection. (That is, when a reliable detection is possible; if the
* video has no motion at all, every detection will report the position "dea".
* In anime, still shots are common. Thus we must augment this with a
* full-frame motion detection that switches the detector off if no motion
* was detected.)
*
* The detection seems to need four full-frame interlace analyses per frame.
* Actually, three are enough, because the previous N is the new C, so we can
* slide the already computed result. Also during initialization, we only
* need to compute TNBN on the first frame; this has become TPBP when the
* third frame is reached. Similarly, we compute TNBN, TNBC and TCBN during
* the second frame (just before the filter starts), and these get slided
* into TCBC, TCBP and TPBC when the third frame is reached. At that point,
* initialization is complete.
*
* Because we only compare interlace scores against each other, no threshold
* is needed in the cadence detector. Thus it, trivially, adapts to the
* material automatically.
*
* The weakness of this approach is that any comb metric detects incorrectly
* every now and then. Especially slow vertical camera pans often get treated
* wrong, because the messed-up field combination looks less interlaced
* according to the comb metric (especially in anime) than the correct one
* (which contains, correctly, one-pixel thick cartoon outlines, parts of
* which often perfectly horizontal).
*
* The advantage is that this strategy catches horizontal camera pans
* immediately and reliably, while the other strategy may still be trying
* to lock on.
*
*
* Frame reconstruction:
*
* We utilize a hybrid approach. If a valid cadence is locked on, we use the
* operation table to decide what to do. This handles those cases correctly,
* which would be difficult for the interlace detector alone (e.g. vertical
* camera pans). Note that the operations that must be performed for IVTC
* include timestamp mangling and frame dropping, which can only be done
* reliably on a valid cadence.
*
* When the cadence fails (we detect this from a sudden upward jump in the
* interlace scores of the constructed frames), we reset the "vektor"
* detector strategy and fall back to an emergency frame composer, where we
* use ideas from Transcode's IVTC.
*
* In this emergency mode, we simply output the least interlaced frame out of
* the combinations TNBN, TNBC and TCBN (where only one of the last two is
* tested, based on the stream TFF/BFF information). In this mode, we do not
* touch the timestamps, and just pass all five frames from each group right
* through. This introduces some stutter, but in practice it is often not
* noticeable. This is because the kind of material that is likely to trip up
* the cadence detector usually includes irregular 8fps/12fps motion. With
* true 24fps motion, the cadence quickly locks on, and stays locked on.
*
* Once the cadence locks on again, we resume normal operation based on
* the operation table.
*
*
* Timestamp mangling:
*
* To make five into four we need to extend frame durations by 25%.
* Consider the following diagram (times given in 90kHz ticks, rounded to
* integers; this is just for illustration, and for comparison with the
* "scratch paper" comments in pulldown.c of TVTime/Xine):
*
* NTSC input (29.97 fps)
* a b c d e a (from next group) ...
* 0 3003 6006 9009 12012 15015
* 0 3754 7508 11261 15015
* 1 2 3 4 1 (from next group) ...
* Film output (23.976 fps)
*
* Three of the film frames have length 3754, and one has 3753
* (it is 1/90000 sec shorter). This rounding was chosen so that the lengths
* of the group of four sum to the original 15015.
*
* From the diagram we get these deltas for presentation timestamp adjustment
* (in 90 kHz ticks, for illustration):
* (1-a) (2-b) (3-c) (4-d) (skip) (1-a) ...
* 0 +751 +1502 +2252 (skip) 0 ...
*
* In fractions of (p_next->date - p_cur->date), regardless of actual
* time unit, the deltas are:
* (1-a) (2-b) (3-c) (4-d) (skip) (1-a) ...
* 0 +0.25 +0.50 +0.75 (skip) 0 ...
*
* This is what we actually use. In our implementation, the values are stored
* multiplied by 4, as integers.
*
* The "current" frame should be displayed at [original time + delta].
* E.g., when "current" = b (i.e. PCN = abc), start displaying film frame 2
* at time [original time of b + 751 ticks]. So, when we catch the cadence,
* we will start mangling the timestamps according to the cadence position
* of the "current" frame, using the deltas given above. This will cause
* a one-time jerk, most noticeable if the cadence happens to catch at
* position "d". (Alternatively, upon lock-on, we could wait until we are
* at "a" before switching on IVTC, but this makes the maximal delay
* [max. detection + max. wait] = 3 + 4 = 7 input frames, which comes to
* 7/30 ~ 0.23 seconds instead of the 3/30 = 0.10 seconds from purely
* the detection. The one-time jerk is simpler to implement and gives the
* faster lock-on.)
*
* It is clear that "e" is a safe choice for the dropped frame. This can be
* seen from the timings and the cadence tables. First, consider the timings.
* If we have only one future frame, "e" is the only one whose PTS, comparing
* to the film frames, allows dropping it safely. To see this, consider which
* film frame needs to be rendered as each new input frame arrives. Secondly,
* consider the cadence tables. It is ok to drop "e", because the same
* film frame "1" is available also at the next PCN position "eab".
* (As a side note, it is interesting that Vektor's filter drops "b".
* See the TVTime sources.)
*
* When the filter falls out of film mode, the timestamps of the incoming
* frames are left untouched. Thus, the output from this filter has a
* variable framerate: 4/5 of the input framerate when IVTC is active
* (whether hard or soft), and the same framerate as input when it is not
* (or when in emergency mode).
*
*
* For other open-source IVTC codes, which may be a useful source for ideas,
* see the following:
*
* The classic filter by Billy Biggs (Vektor). Written in 2001-2003 for
* TVTime, and adapted into Xine later. In xine-lib 1.1.19, it is at
* src/post/deinterlace/pulldown.*. Also needed are tvtime.*, and speedy.*.
*
* Transcode's ivtc->decimate->32detect chain by Thanassis Tsiodras.
* Written in 2002, added in Transcode 0.6.12. This probably has something
* to do with the same chain in MPlayer, considering that MPlayer acquired
* an IVTC filter around the same time. In Transcode 1.1.5, the IVTC part is
* at filter/filter_ivtc.c. Transcode 1.1.5 sources can be downloaded from
* http://developer.berlios.de/project/showfiles.php?group_id=10094
*/
#endif