From 2727dbb05345204883f43f6474a5867f5a63300f Mon Sep 17 00:00:00 2001
From: Georg Martius <martius@mis.mpg.de>
Date: Tue, 8 Oct 2013 23:57:17 +0200
Subject: [PATCH] lavfi/vidstabdetect,vidstabtransform: update to vid.stab 0.98

In particular:
* set default value for accuracy to 15 (max)
* add zoomspeed for dynamic zoom (optzoom=2)
* make camera path optimization algorithm configurable
* update optzoom documenation in code to use new localmotions calculation function commandline arguments
* add debug option in vidstabtransform

Signed-off-by: Georg Martius <martius@mis.mpg.de>
Signed-off-by: Stefano Sabatini <stefasab@gmail.com>
---
 doc/filters.texi                  | 32 ++++++++++++++++++++++++-----
 libavfilter/vf_vidstabdetect.c    |  3 ++-
 libavfilter/vf_vidstabtransform.c | 34 +++++++++++++++++++++++++------
 3 files changed, 57 insertions(+), 12 deletions(-)

diff --git a/doc/filters.texi b/doc/filters.texi
index 0f9036535d..64dd084a56 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -8064,7 +8064,7 @@ value of 10 means strong shakiness. Default value is 5.
 @item accuracy
 Set the accuracy of the detection process. It must be a value in the
 range 1-15. A value of 1 means low accuracy, a value of 15 means high
-accuracy. Default value is 9.
+accuracy. Default value is 15.
 
 @item stepsize
 Set stepsize of the search process. The region around minimum is
@@ -8144,11 +8144,21 @@ This filter accepts the following options:
 path to the file used to read the transforms (default: @file{transforms.trf})
 
 @item smoothing
-number of frames (value*2 + 1) used for lowpass filtering the camera movements
+Set the number of frames (value*2 + 1) used for lowpass filtering the camera movements
 (default: 10). For example a number of 10 means that 21 frames are used
 (10 in the past and 10 in the future) to smoothen the motion in the
 video. A larger values leads to a smoother video, but limits the
 acceleration of the camera (pan/tilt movements).
+0 is a special case where a static camera is simulated.
+
+@item optalgo
+Set the camera path optimization algorithm:
+@table @samp
+@item gauss
+gaussian kernel low-pass filter on camera motion (default)
+@item avg
+averaging on transformations
+@end table
 
 @item maxshift
 maximal number of pixels to translate frames (default: -1 no limit)
@@ -8186,7 +8196,7 @@ relative to previous frame (default)
 @end table
 
 @item zoom
-percentage to zoom (default: 0)
+Set percentage to zoom (default: 0)
 @table @samp
 @item >0
 zoom in
@@ -8195,18 +8205,21 @@ zoom out
 @end table
 
 @item optzoom
-set optimal zooming to avoid borders
+Set optimal zooming to avoid borders
 @table @samp
 @item 0
 disabled
 @item 1
 optimal static zoom value is determined (only very strong movements will lead to visible borders) (default)
 @item 2
-optimal adaptive zoom value is determined (no borders will be visible)
+optimal adaptive zoom value is determined (no borders will be visible), see @option{zoomspeed}
 @end table
 Note that the value given at zoom is added to the one calculated
 here.
 
+@item zoomspeed
+Set percent to zoom maximally each frame (for @option{optzoom=2}). Range is from 0 to 5, default value is 0.2
+
 @item interpol
 type of interpolation
 
@@ -8233,6 +8246,15 @@ off (default)
 virtual tripod mode: equivalent to @code{relative=0:smoothing=0}
 @end table
 
+@item debug
+Increase log verbosity of set to 1. Also the detected global motions are written to the temporary file @file{global_motions.trf}.
+@table @samp
+@item 0
+disabled (default)
+@item 1
+enabled
+@end table
+
 @end table
 
 @subsection Examples
diff --git a/libavfilter/vf_vidstabdetect.c b/libavfilter/vf_vidstabdetect.c
index b2977ff730..008c993368 100644
--- a/libavfilter/vf_vidstabdetect.c
+++ b/libavfilter/vf_vidstabdetect.c
@@ -49,7 +49,7 @@ static const AVOption vidstabdetect_options[] = {
     {"result",      "path to the file used to write the transforms",                 OFFSET(result),             AV_OPT_TYPE_STRING, {.str = DEFAULT_RESULT_NAME}, .flags = FLAGS},
     {"shakiness",   "how shaky is the video and how quick is the camera?"
                     " 1: little (fast) 10: very strong/quick (slow)",                OFFSETC(shakiness),         AV_OPT_TYPE_INT,    {.i64 = 5},      1,  10, FLAGS},
-    {"accuracy",    "(>=shakiness) 1: low 15: high (slow)",                          OFFSETC(accuracy),          AV_OPT_TYPE_INT,    {.i64 = 9},      1,  15, FLAGS},
+    {"accuracy",    "(>=shakiness) 1: low 15: high (slow)",                          OFFSETC(accuracy),          AV_OPT_TYPE_INT,    {.i64 = 15},     1,  15, FLAGS},
     {"stepsize",    "region around minimum is scanned with 1 pixel resolution",      OFFSETC(stepSize),          AV_OPT_TYPE_INT,    {.i64 = 6},      1,  32, FLAGS},
     {"mincontrast", "below this contrast a field is discarded (0-1)",                OFFSETC(contrastThreshold), AV_OPT_TYPE_DOUBLE, {.dbl = 0.25}, 0.0, 1.0, FLAGS},
     {"show",        "0: draw nothing; 1,2: show fields and transforms",              OFFSETC(show),              AV_OPT_TYPE_INT,    {.i64 = 0},      0,   2, FLAGS},
@@ -135,6 +135,7 @@ static int config_input(AVFilterLink *inlink)
     av_log(ctx, AV_LOG_INFO, "      accuracy = %d\n", sd->conf.accuracy);
     av_log(ctx, AV_LOG_INFO, "      stepsize = %d\n", sd->conf.stepSize);
     av_log(ctx, AV_LOG_INFO, "   mincontrast = %f\n", sd->conf.contrastThreshold);
+    av_log(ctx, AV_LOG_INFO, "        tripod = %d\n", sd->conf.virtualTripod);
     av_log(ctx, AV_LOG_INFO, "          show = %d\n", sd->conf.show);
     av_log(ctx, AV_LOG_INFO, "        result = %s\n", sd->result);
 
diff --git a/libavfilter/vf_vidstabtransform.c b/libavfilter/vf_vidstabtransform.c
index 3c0a5ee40d..b4ef1c225f 100644
--- a/libavfilter/vf_vidstabtransform.c
+++ b/libavfilter/vf_vidstabtransform.c
@@ -39,6 +39,7 @@ typedef struct {
     VSTransformations trans;    // transformations
     char *input;                // name of transform file
     int tripod;
+    int debug;
 } TransformContext;
 
 #define OFFSET(x) offsetof(TransformContext, x)
@@ -49,7 +50,15 @@ static const AVOption vidstabtransform_options[] = {
     {"input",     "path to the file storing the transforms",                        OFFSET(input),
                    AV_OPT_TYPE_STRING, {.str = DEFAULT_INPUT_NAME}, .flags = FLAGS },
     {"smoothing", "number of frames*2 + 1 used for lowpass filtering",              OFFSETC(smoothing),
-                   AV_OPT_TYPE_INT,    {.i64 = 10},       1, 1000, FLAGS},
+                   AV_OPT_TYPE_INT,    {.i64 = 15},       0, 1000, FLAGS},
+    {"optalgo",   "camera path optimization algo",                                  OFFSETC(camPathAlgo),
+                   AV_OPT_TYPE_INT,    {.i64 = VSOptimalL1}, VSOptimalL1, VSAvg, FLAGS, "optalgo"},
+    {  "opt",     "global optimization",                                            0, // from version 1.0 on
+                   AV_OPT_TYPE_CONST,  {.i64 = VSOptimalL1 }, 0, 0, FLAGS, "optalgo"},
+    {  "gauss",   "gaussian kernel",                                                0,
+                   AV_OPT_TYPE_CONST,  {.i64 = VSGaussian }, 0, 0, FLAGS,  "optalgo"},
+    {  "avg",     "simple averaging on motion",                                     0,
+                   AV_OPT_TYPE_CONST,  {.i64 = VSAvg },      0, 0, FLAGS,  "optalgo"},
     {"maxshift",  "maximal number of pixels to translate image",                    OFFSETC(maxShift),
                    AV_OPT_TYPE_INT,    {.i64 = -1},      -1, 500,  FLAGS},
     {"maxangle",  "maximal angle in rad to rotate image",                           OFFSETC(maxAngle),
@@ -66,8 +75,10 @@ static const AVOption vidstabtransform_options[] = {
                    AV_OPT_TYPE_INT,    {.i64 = 1},        0, 1,    FLAGS},
     {"zoom",      "percentage to zoom >0: zoom in, <0 zoom out",                    OFFSETC(zoom),
                    AV_OPT_TYPE_DOUBLE, {.dbl = 0},     -100, 100,  FLAGS},
-    {"optzoom",   "0: nothing, 1: determine optimal zoom (added to 'zoom')",        OFFSETC(optZoom),
+    {"optzoom",   "0: nothing, 1: optimal static zoom, 2: optimal dynamic zoom",    OFFSETC(optZoom),
                    AV_OPT_TYPE_INT,    {.i64 = 1},        0, 2,    FLAGS},
+    {"zoomspeed", "for adative zoom: percent to zoom maximally each frame",         OFFSETC(zoomSpeed),
+                   AV_OPT_TYPE_DOUBLE, {.dbl = 0.25},     0, 5,    FLAGS},
     {"interpol",  "type of interpolation",                                          OFFSETC(interpolType),
                    AV_OPT_TYPE_INT,    {.i64 = 2},        0, 3,    FLAGS, "interpol"},
     {  "no",      "no interpolation",                                               0,
@@ -80,6 +91,8 @@ static const AVOption vidstabtransform_options[] = {
                    AV_OPT_TYPE_CONST,  {.i64 = VS_BiCubic },0, 0,  FLAGS, "interpol"},
     {"tripod",    "if 1: virtual tripod mode (equiv. to relative=0:smoothing=0)",   OFFSET(tripod),
                    AV_OPT_TYPE_INT,    {.i64 = 0},        0, 1,    FLAGS},
+    {"debug",     "if 1: more output printed and global motions are stored to file",OFFSET(debug),
+                   AV_OPT_TYPE_INT,    {.i64 = 0},        0, 1,    FLAGS},
     {NULL}
 };
 
@@ -153,12 +166,15 @@ static int config_input(AVFilterLink *inlink)
 
     // set values that are not initializes by the options
     tc->conf.modName = "vidstabtransform";
-    tc->conf.verbose =1;
+    tc->conf.verbose = 1 + tc->debug;
     if (tc->tripod) {
-        av_log(ctx, AV_LOG_INFO, "Virtual tripod mode: relative=0, smoothing=0");
+        av_log(ctx, AV_LOG_INFO, "Virtual tripod mode: relative=0, smoothing=0\n");
         tc->conf.relative  = 0;
         tc->conf.smoothing = 0;
     }
+    tc->conf.simpleMotionCalculation = 0;
+    tc->conf.storeTransforms         = tc->debug;
+    tc->conf.smoothZoom              = 0;
 
     if (vsTransformDataInit(td, &tc->conf, &fi_src, &fi_dest) != VS_OK) {
         av_log(ctx, AV_LOG_ERROR, "initialization of vid.stab transform failed, please report a BUG\n");
@@ -169,13 +185,19 @@ static int config_input(AVFilterLink *inlink)
     av_log(ctx, AV_LOG_INFO, "Video transformation/stabilization settings (pass 2/2):\n");
     av_log(ctx, AV_LOG_INFO, "    input     = %s\n", tc->input);
     av_log(ctx, AV_LOG_INFO, "    smoothing = %d\n", tc->conf.smoothing);
+    av_log(ctx, AV_LOG_INFO, "    optalgo   = %s\n",
+           tc->conf.camPathAlgo == VSOptimalL1 ? "opt" :
+           (tc->conf.camPathAlgo == VSGaussian ? "gauss" : "avg" ));
     av_log(ctx, AV_LOG_INFO, "    maxshift  = %d\n", tc->conf.maxShift);
     av_log(ctx, AV_LOG_INFO, "    maxangle  = %f\n", tc->conf.maxAngle);
     av_log(ctx, AV_LOG_INFO, "    crop      = %s\n", tc->conf.crop ? "Black" : "Keep");
     av_log(ctx, AV_LOG_INFO, "    relative  = %s\n", tc->conf.relative ? "True": "False");
     av_log(ctx, AV_LOG_INFO, "    invert    = %s\n", tc->conf.invert ? "True" : "False");
     av_log(ctx, AV_LOG_INFO, "    zoom      = %f\n", tc->conf.zoom);
-    av_log(ctx, AV_LOG_INFO, "    optzoom   = %s\n", tc->conf.optZoom ? "On" : "Off");
+    av_log(ctx, AV_LOG_INFO, "    optzoom   = %s\n",
+           tc->conf.optZoom == 1 ? "Static (1)" : (tc->conf.optZoom == 2 ? "Dynamic (2)" : "Off (0)" ));
+    if (tc->conf.optZoom == 2)
+        av_log(ctx, AV_LOG_INFO, "    zoomspeed = %g\n", tc->conf.zoomSpeed );
     av_log(ctx, AV_LOG_INFO, "    interpol  = %s\n", getInterpolationTypeName(tc->conf.interpolType));
 
     f = fopen(tc->input, "r");
@@ -186,7 +208,7 @@ static int config_input(AVFilterLink *inlink)
         VSManyLocalMotions mlms;
         if (vsReadLocalMotionsFile(f, &mlms) == VS_OK) {
             // calculate the actual transforms from the local motions
-            if (vsLocalmotions2TransformsSimple(td, &mlms, &tc->trans) != VS_OK) {
+            if (vsLocalmotions2Transforms(td, &mlms, &tc->trans) != VS_OK) {
                 av_log(ctx, AV_LOG_ERROR, "calculating transformations failed\n");
                 return AVERROR(EINVAL);
             }