work in progress
This commit is contained in:
commit
d809a085cb
|
@ -0,0 +1,21 @@
|
|||
The MIT License (MIT)
|
||||
|
||||
Copyright (c) 2020 nihui
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
|
@ -0,0 +1,139 @@
|
|||
# RIFE ncnn Vulkan
|
||||
|
||||
:exclamation: :exclamation: :exclamation: This software is in the early development stage, it may bite your cat
|
||||
|
||||
ncnn implementation of RIFE, Real-Time Intermediate Flow Estimation for Video Frame Interpolation.
|
||||
|
||||
rife-ncnn-vulkan uses [ncnn project](https://github.com/Tencent/ncnn) as the universal neural network inference framework.
|
||||
|
||||
## [Download]
|
||||
|
||||
Download Windows/Linux/MacOS Executable for Intel/AMD/Nvidia GPU
|
||||
|
||||
**https://github.com/nihui/rife-ncnn-vulkan/actions**
|
||||
|
||||
This package includes all the binaries and models required. It is portable, so no CUDA or PyTorch runtime environment is needed :)
|
||||
|
||||
## About RIFE
|
||||
|
||||
RIFE (Real-Time Intermediate Flow Estimation for Video Frame Interpolation)
|
||||
|
||||
https://github.com/hzwer/arXiv2020-RIFE
|
||||
|
||||
Huang, Zhewei and Zhang, Tianyuan and Heng, Wen and Shi, Boxin and Zhou, Shuchang
|
||||
|
||||
https://rife-vfi.github.io
|
||||
|
||||
https://arxiv.org/abs/2011.06294
|
||||
|
||||
## Usages
|
||||
|
||||
Input two frame images, output one interpolated frame image.
|
||||
|
||||
### Example Command
|
||||
|
||||
```shell
|
||||
./rife-ncnn-vulkan -0 0.jpg -1 1.jpg -o 01.jpg
|
||||
./rife-ncnn-vulkan -i input_frames/ -o output_frames/
|
||||
```
|
||||
|
||||
### Video Interpolation with FFmpeg
|
||||
|
||||
```shell
|
||||
mkdir input_frames
|
||||
mkdir output_frames
|
||||
|
||||
# find the source fps and format with ffprobe, for example 24fps, AAC
|
||||
ffprobe input.mp4
|
||||
|
||||
# extract audio
|
||||
ffmpeg -i input.mp4 -vn -acodec copy audio.m4a
|
||||
|
||||
# decode all frames
|
||||
ffmpeg -i input.mp4 input_frames/frame_%06d.png
|
||||
|
||||
# interpolate 2x frame count
|
||||
./rife-ncnn-vulkan -i input_frames -o output_frames
|
||||
|
||||
# encode interpolated frames in 48fps with audio
|
||||
ffmpeg -framerate 48 -i output_frames/%06d.png -i audio.m4a -c:a copy -crf 20 -c:v libx264 -pix_fmt yuv420p output.mp4
|
||||
```
|
||||
|
||||
### Full Usages
|
||||
|
||||
```console
|
||||
Usage: rife-ncnn-vulkan -0 infile -1 infile1 -o outfile [options]...
|
||||
rife-ncnn-vulkan -i indir -o outdir [options]...
|
||||
|
||||
-h show this help
|
||||
-v verbose output
|
||||
-0 input0-path input image0 path (jpg/png/webp)
|
||||
-1 input1-path input image1 path (jpg/png/webp)
|
||||
-i input-path input image directory (jpg/png/webp)
|
||||
-o output-path output image path (jpg/png/webp) or directory
|
||||
-t tile-size tile size (>=128, default=512) can be 256,256,128 for multi-gpu
|
||||
-g gpu-id gpu device to use (default=auto) can be 0,1,2 for multi-gpu
|
||||
-j load:proc:save thread count for load/proc/save (default=1:2:2) can be 1:2,2,2:2 for multi-gpu
|
||||
-f format output image format (jpg/png/webp, default=ext/png)
|
||||
```
|
||||
|
||||
- `input0-path`, `input1-path` and `output-path` accept file path
|
||||
- `input-path` and `output-path` accept file directory
|
||||
- `tile-size` = tile size, use smaller value to reduce GPU memory usage, must be multiple of 32, default 512
|
||||
- `load:proc:save` = thread count for the three stages (image decoding + rife interpolation + image encoding), using larger values may increase GPU usage and consume more GPU memory. You can tune this configuration with "4:4:4" for many small-size images, and "2:2:2" for large-size images. The default setting usually works fine for most situations. If you find that your GPU is hungry, try increasing thread count to achieve faster processing.
|
||||
- `format` = the format of the image to be output, png is better supported, however webp generally yields smaller file sizes, both are losslessly encoded
|
||||
|
||||
If you encounter a crash or error, try upgrading your GPU driver:
|
||||
|
||||
- Intel: https://downloadcenter.intel.com/product/80939/Graphics-Drivers
|
||||
- AMD: https://www.amd.com/en/support
|
||||
- NVIDIA: https://www.nvidia.com/Download/index.aspx
|
||||
|
||||
## Build from Source
|
||||
|
||||
1. Download and setup the Vulkan SDK from https://vulkan.lunarg.com/
|
||||
- For Linux distributions, you can either get the essential build requirements from package manager
|
||||
```shell
|
||||
dnf install vulkan-headers vulkan-loader-devel
|
||||
```
|
||||
```shell
|
||||
apt-get install libvulkan-dev
|
||||
```
|
||||
```shell
|
||||
pacman -S vulkan-headers vulkan-icd-loader
|
||||
```
|
||||
|
||||
2. Clone this project with all submodules
|
||||
|
||||
```shell
|
||||
git clone https://github.com/nihui/rife-ncnn-vulkan.git
|
||||
cd rife-ncnn-vulkan
|
||||
git submodule update --init --recursive
|
||||
```
|
||||
|
||||
3. Build with CMake
|
||||
- You can pass -DUSE_STATIC_MOLTENVK=ON option to avoid linking the vulkan loader library on MacOS
|
||||
|
||||
```shell
|
||||
mkdir build
|
||||
cd build
|
||||
cmake ../src
|
||||
cmake --build . -j 4
|
||||
```
|
||||
|
||||
### TODO
|
||||
|
||||
* implement postproc properly
|
||||
* test-time sptial augmentation aka TTA-s
|
||||
* test-time temporal augmentation aka TTA-t
|
||||
|
||||
## Original RIFE Project
|
||||
|
||||
- https://github.com/hzwer/arXiv2020-RIFE
|
||||
|
||||
## Other Open-Source Code Used
|
||||
|
||||
- https://github.com/Tencent/ncnn for fast neural network inference on ALL PLATFORMS
|
||||
- https://github.com/webmproject/libwebp for encoding and decoding Webp images on ALL PLATFORMS
|
||||
- https://github.com/nothings/stb for decoding and encoding image on Linux / MacOS
|
||||
- https://github.com/tronkko/dirent for listing files in directory on Windows
|
Binary file not shown.
|
@ -0,0 +1,66 @@
|
|||
7767517
|
||||
64 78
|
||||
Input input.1 0 1 input.1
|
||||
Split splitncnn_input0 1 2 input.1 input.1_splitncnn_0 input.1_splitncnn_1
|
||||
Input flow.1 0 1 flow.1
|
||||
UnaryOp flow.0 1 1 flow.1 flow.0 0=1
|
||||
Split splitncnn_input1 1 2 flow.0 flow.1_splitncnn_0 flow.1_splitncnn_1
|
||||
Convolution Conv_0 1 1 input.1_splitncnn_1 42 0=16 1=3 3=2 4=1 6=432
|
||||
Convolution Conv_1 1 1 input.1_splitncnn_0 43 0=16 1=3 3=2 4=1 5=1 6=432
|
||||
PReLU PRelu_2 1 1 43 45 0=16
|
||||
Convolution Conv_3 1 1 45 46 0=16 1=3 4=1 5=1 6=2304
|
||||
Split splitncnn_0 1 2 46 46_splitncnn_0 46_splitncnn_1
|
||||
Pooling ReduceMean_5 1 1 46_splitncnn_1 48 0=1 4=1
|
||||
InnerProduct Conv_6 1 1 48 51 0=16 2=256 9=2 -23310=1,1.170065e+00
|
||||
InnerProduct Conv_8 1 1 51 53 0=16 2=256 9=4
|
||||
BinaryOp Mul_10 2 1 46_splitncnn_0 53 54 0=2
|
||||
BinaryOp Add_11 2 1 54 42 55
|
||||
PReLU PRelu_12 1 1 55 57 0=16
|
||||
Split splitncnn_1 1 3 57 57_splitncnn_0 57_splitncnn_1 57_splitncnn_2
|
||||
rife.Warp Warp_18 2 1 57_splitncnn_2 flow.1_splitncnn_1 63
|
||||
Convolution Conv_19 1 1 57_splitncnn_1 64 0=32 1=3 3=2 4=1 6=4608
|
||||
Convolution Conv_20 1 1 57_splitncnn_0 65 0=32 1=3 3=2 4=1 5=1 6=4608
|
||||
PReLU PRelu_21 1 1 65 67 0=32
|
||||
Convolution Conv_22 1 1 67 68 0=32 1=3 4=1 5=1 6=9216
|
||||
Split splitncnn_2 1 2 68 68_splitncnn_0 68_splitncnn_1
|
||||
Pooling ReduceMean_24 1 1 68_splitncnn_1 70 0=1 4=1
|
||||
InnerProduct Conv_25 1 1 70 73 0=16 2=512 9=2 -23310=1,5.760695e-01
|
||||
InnerProduct Conv_27 1 1 73 75 0=32 2=512 9=4
|
||||
BinaryOp Mul_29 2 1 68_splitncnn_0 75 76 0=2
|
||||
BinaryOp Add_30 2 1 76 64 77
|
||||
PReLU PRelu_31 1 1 77 79 0=32
|
||||
Split splitncnn_3 1 3 79 79_splitncnn_0 79_splitncnn_1 79_splitncnn_2
|
||||
Interp Resize_33 1 1 flow.1_splitncnn_0 89 0=2 1=5.000000e-01 2=5.000000e-01
|
||||
BinaryOp Mul_35 1 1 89 91 0=2 1=1 2=5.000000e-01
|
||||
Split splitncnn_4 1 2 91 91_splitncnn_0 91_splitncnn_1
|
||||
rife.Warp Warp_41 2 1 79_splitncnn_2 91_splitncnn_1 97
|
||||
Convolution Conv_42 1 1 79_splitncnn_1 98 0=64 1=3 3=2 4=1 6=18432
|
||||
Convolution Conv_43 1 1 79_splitncnn_0 99 0=64 1=3 3=2 4=1 5=1 6=18432
|
||||
PReLU PRelu_44 1 1 99 101 0=64
|
||||
Convolution Conv_45 1 1 101 102 0=64 1=3 4=1 5=1 6=36864
|
||||
Split splitncnn_5 1 2 102 102_splitncnn_0 102_splitncnn_1
|
||||
Pooling ReduceMean_47 1 1 102_splitncnn_1 104 0=1 4=1
|
||||
InnerProduct Conv_48 1 1 104 107 0=16 2=1024 9=2 -23310=1,3.197831e-01
|
||||
InnerProduct Conv_50 1 1 107 109 0=64 2=1024 9=4
|
||||
BinaryOp Mul_52 2 1 102_splitncnn_0 109 110 0=2
|
||||
BinaryOp Add_53 2 1 110 98 111
|
||||
PReLU PRelu_54 1 1 111 113 0=64
|
||||
Split splitncnn_6 1 3 113 113_splitncnn_0 113_splitncnn_1 113_splitncnn_2
|
||||
Interp Resize_56 1 1 91_splitncnn_0 123 0=2 1=5.000000e-01 2=5.000000e-01
|
||||
BinaryOp Mul_58 1 1 123 125 0=2 1=1 2=5.000000e-01
|
||||
Split splitncnn_7 1 2 125 125_splitncnn_0 125_splitncnn_1
|
||||
rife.Warp Warp_64 2 1 113_splitncnn_2 125_splitncnn_1 131
|
||||
Convolution Conv_65 1 1 113_splitncnn_1 132 0=128 1=3 3=2 4=1 6=73728
|
||||
Convolution Conv_66 1 1 113_splitncnn_0 133 0=128 1=3 3=2 4=1 5=1 6=73728
|
||||
PReLU PRelu_67 1 1 133 135 0=128
|
||||
Convolution Conv_68 1 1 135 136 0=128 1=3 4=1 5=1 6=147456
|
||||
Split splitncnn_8 1 2 136 136_splitncnn_0 136_splitncnn_1
|
||||
Pooling ReduceMean_70 1 1 136_splitncnn_1 138 0=1 4=1
|
||||
InnerProduct Conv_71 1 1 138 141 0=16 2=2048 9=2 -23310=1,3.248079e-01
|
||||
InnerProduct Conv_73 1 1 141 143 0=128 2=2048 9=4
|
||||
BinaryOp Mul_75 2 1 136_splitncnn_0 143 144 0=2
|
||||
BinaryOp Add_76 2 1 144 132 145
|
||||
PReLU PRelu_77 1 1 145 147 0=128
|
||||
Interp Resize_79 1 1 125_splitncnn_0 157 0=2 1=5.000000e-01 2=5.000000e-01
|
||||
BinaryOp Mul_81 1 1 157 159 0=2 1=1 2=5.000000e-01
|
||||
rife.Warp Warp_87 2 1 147 159 165
|
Binary file not shown.
|
@ -0,0 +1,239 @@
|
|||
7767517
|
||||
237 284
|
||||
Input input0 0 1 input0
|
||||
Input input1 0 1 input1
|
||||
Concat input.1 2 1 input0 input1 input.1
|
||||
Interp Resize_1 1 1 input.1 343 0=2 1=5.000000e-01 2=5.000000e-01
|
||||
Split splitncnn_0 1 5 343 343_splitncnn_0 343_splitncnn_1 343_splitncnn_2 343_splitncnn_3 343_splitncnn_4
|
||||
Interp Resize_3 1 1 343_splitncnn_4 353 0=2 1=2.500000e-01 2=2.500000e-01
|
||||
Convolution Conv_4 1 1 353 355 0=192 1=3 3=2 4=1 5=1 6=10368
|
||||
PReLU PRelu_6 1 1 355 357 0=192
|
||||
Split splitncnn_1 1 2 357 357_splitncnn_0 357_splitncnn_1
|
||||
Convolution Conv_7 1 1 357_splitncnn_1 359 0=192 1=3 4=1 5=1 6=331776
|
||||
PReLU PRelu_9 1 1 359 361 0=192
|
||||
Convolution Conv_10 1 1 361 363 0=192 1=3 4=1 5=1 6=331776
|
||||
Split splitncnn_2 1 2 363 363_splitncnn_0 363_splitncnn_1
|
||||
Pooling ReduceMean_13 1 1 363_splitncnn_1 365 0=1 4=1
|
||||
InnerProduct Conv_14 1 1 365 368 0=16 2=3072 9=2 -23310=1,-9.818018e-04
|
||||
InnerProduct Conv_16 1 1 368 370 0=192 2=3072 9=4
|
||||
BinaryOp Mul_18 2 1 363_splitncnn_0 370 371 0=2
|
||||
BinaryOp Add_19 2 1 371 357_splitncnn_0 372
|
||||
PReLU PRelu_20 1 1 372 374 0=192
|
||||
Split splitncnn_3 1 2 374 374_splitncnn_0 374_splitncnn_1
|
||||
Convolution Conv_21 1 1 374_splitncnn_1 376 0=192 1=3 4=1 5=1 6=331776
|
||||
PReLU PRelu_23 1 1 376 378 0=192
|
||||
Convolution Conv_24 1 1 378 380 0=192 1=3 4=1 5=1 6=331776
|
||||
Split splitncnn_4 1 2 380 380_splitncnn_0 380_splitncnn_1
|
||||
Pooling ReduceMean_27 1 1 380_splitncnn_1 382 0=1 4=1
|
||||
InnerProduct Conv_28 1 1 382 385 0=16 2=3072 9=2 -23310=1,1.656540e-01
|
||||
InnerProduct Conv_30 1 1 385 387 0=192 2=3072 9=4
|
||||
BinaryOp Mul_32 2 1 380_splitncnn_0 387 388 0=2
|
||||
BinaryOp Add_33 2 1 388 374_splitncnn_0 389
|
||||
PReLU PRelu_34 1 1 389 391 0=192
|
||||
Split splitncnn_5 1 2 391 391_splitncnn_0 391_splitncnn_1
|
||||
Convolution Conv_35 1 1 391_splitncnn_1 393 0=192 1=3 4=1 5=1 6=331776
|
||||
PReLU PRelu_37 1 1 393 395 0=192
|
||||
Convolution Conv_38 1 1 395 397 0=192 1=3 4=1 5=1 6=331776
|
||||
Split splitncnn_6 1 2 397 397_splitncnn_0 397_splitncnn_1
|
||||
Pooling ReduceMean_41 1 1 397_splitncnn_1 399 0=1 4=1
|
||||
InnerProduct Conv_42 1 1 399 402 0=16 2=3072 9=2 -23310=1,3.486148e-01
|
||||
InnerProduct Conv_44 1 1 402 404 0=192 2=3072 9=4
|
||||
BinaryOp Mul_46 2 1 397_splitncnn_0 404 405 0=2
|
||||
BinaryOp Add_47 2 1 405 391_splitncnn_0 406
|
||||
PReLU PRelu_48 1 1 406 408 0=192
|
||||
Split splitncnn_7 1 2 408 408_splitncnn_0 408_splitncnn_1
|
||||
Convolution Conv_49 1 1 408_splitncnn_1 410 0=192 1=3 4=1 5=1 6=331776
|
||||
PReLU PRelu_51 1 1 410 412 0=192
|
||||
Convolution Conv_52 1 1 412 414 0=192 1=3 4=1 5=1 6=331776
|
||||
Split splitncnn_8 1 2 414 414_splitncnn_0 414_splitncnn_1
|
||||
Pooling ReduceMean_55 1 1 414_splitncnn_1 416 0=1 4=1
|
||||
InnerProduct Conv_56 1 1 416 419 0=16 2=3072 9=2 -23310=1,4.422835e-02
|
||||
InnerProduct Conv_58 1 1 419 421 0=192 2=3072 9=4
|
||||
BinaryOp Mul_60 2 1 414_splitncnn_0 421 422 0=2
|
||||
BinaryOp Add_61 2 1 422 408_splitncnn_0 423
|
||||
PReLU PRelu_62 1 1 423 425 0=192
|
||||
Split splitncnn_9 1 2 425 425_splitncnn_0 425_splitncnn_1
|
||||
Convolution Conv_63 1 1 425_splitncnn_1 427 0=192 1=3 4=1 5=1 6=331776
|
||||
PReLU PRelu_65 1 1 427 429 0=192
|
||||
Convolution Conv_66 1 1 429 431 0=192 1=3 4=1 5=1 6=331776
|
||||
Split splitncnn_10 1 2 431 431_splitncnn_0 431_splitncnn_1
|
||||
Pooling ReduceMean_69 1 1 431_splitncnn_1 433 0=1 4=1
|
||||
InnerProduct Conv_70 1 1 433 436 0=16 2=3072 9=2 -23310=1,1.658438e-01
|
||||
InnerProduct Conv_72 1 1 436 438 0=192 2=3072 9=4
|
||||
BinaryOp Mul_74 2 1 431_splitncnn_0 438 439 0=2
|
||||
BinaryOp Add_75 2 1 439 425_splitncnn_0 440
|
||||
PReLU PRelu_76 1 1 440 442 0=192
|
||||
Split splitncnn_11 1 2 442 442_splitncnn_0 442_splitncnn_1
|
||||
Convolution Conv_77 1 1 442_splitncnn_1 444 0=192 1=3 4=1 5=1 6=331776
|
||||
PReLU PRelu_79 1 1 444 446 0=192
|
||||
Convolution Conv_80 1 1 446 448 0=192 1=3 4=1 5=1 6=331776
|
||||
Split splitncnn_12 1 2 448 448_splitncnn_0 448_splitncnn_1
|
||||
Pooling ReduceMean_83 1 1 448_splitncnn_1 450 0=1 4=1
|
||||
InnerProduct Conv_84 1 1 450 453 0=16 2=3072 9=2 -23310=1,2.899390e-01
|
||||
InnerProduct Conv_86 1 1 453 455 0=192 2=3072 9=4
|
||||
BinaryOp Mul_88 2 1 448_splitncnn_0 455 456 0=2
|
||||
BinaryOp Add_89 2 1 456 442_splitncnn_0 457
|
||||
PReLU PRelu_90 1 1 457 459 0=192
|
||||
Convolution Conv_91 1 1 459 460 0=8 1=3 4=1 5=1 6=13824
|
||||
PixelShuffle DepthToSpace_92 1 1 460 461 0=2
|
||||
Interp Resize_94 1 1 461 471 0=2 1=4.000000e+00 2=4.000000e+00
|
||||
Split splitncnn_13 1 5 471 471_splitncnn_0 471_splitncnn_1 471_splitncnn_2 471_splitncnn_3 471_splitncnn_4
|
||||
Crop Slice_99 1 1 343_splitncnn_3 476 -23309=1,0 -23310=1,3 -23311=1,0
|
||||
rife.Warp Warp_105 2 1 476 471_splitncnn_4 482
|
||||
Crop Slice_110 1 1 343_splitncnn_2 487 -23309=1,3 -23310=1,-1 -23311=1,0
|
||||
UnaryOp Neg_111 1 1 471_splitncnn_3 488 0=1
|
||||
rife.Warp Warp_117 2 1 487 488 494
|
||||
Concat Concat_118 3 1 482 494 471_splitncnn_2 495
|
||||
Interp Resize_120 1 1 495 505 0=2 1=5.000000e-01 2=5.000000e-01
|
||||
Convolution Conv_121 1 1 505 507 0=128 1=3 3=2 4=1 5=1 6=9216
|
||||
PReLU PRelu_123 1 1 507 509 0=128
|
||||
Split splitncnn_14 1 2 509 509_splitncnn_0 509_splitncnn_1
|
||||
Convolution Conv_124 1 1 509_splitncnn_1 511 0=128 1=3 4=1 5=1 6=147456
|
||||
PReLU PRelu_126 1 1 511 513 0=128
|
||||
Convolution Conv_127 1 1 513 515 0=128 1=3 4=1 5=1 6=147456
|
||||
Split splitncnn_15 1 2 515 515_splitncnn_0 515_splitncnn_1
|
||||
Pooling ReduceMean_130 1 1 515_splitncnn_1 517 0=1 4=1
|
||||
InnerProduct Conv_131 1 1 517 520 0=16 2=2048 9=2 -23310=1,3.783025e-02
|
||||
InnerProduct Conv_133 1 1 520 522 0=128 2=2048 9=4
|
||||
BinaryOp Mul_135 2 1 515_splitncnn_0 522 523 0=2
|
||||
BinaryOp Add_136 2 1 523 509_splitncnn_0 524
|
||||
PReLU PRelu_137 1 1 524 526 0=128
|
||||
Split splitncnn_16 1 2 526 526_splitncnn_0 526_splitncnn_1
|
||||
Convolution Conv_138 1 1 526_splitncnn_1 528 0=128 1=3 4=1 5=1 6=147456
|
||||
PReLU PRelu_140 1 1 528 530 0=128
|
||||
Convolution Conv_141 1 1 530 532 0=128 1=3 4=1 5=1 6=147456
|
||||
Split splitncnn_17 1 2 532 532_splitncnn_0 532_splitncnn_1
|
||||
Pooling ReduceMean_144 1 1 532_splitncnn_1 534 0=1 4=1
|
||||
InnerProduct Conv_145 1 1 534 537 0=16 2=2048 9=2 -23310=1,2.474383e-01
|
||||
InnerProduct Conv_147 1 1 537 539 0=128 2=2048 9=4
|
||||
BinaryOp Mul_149 2 1 532_splitncnn_0 539 540 0=2
|
||||
BinaryOp Add_150 2 1 540 526_splitncnn_0 541
|
||||
PReLU PRelu_151 1 1 541 543 0=128
|
||||
Split splitncnn_18 1 2 543 543_splitncnn_0 543_splitncnn_1
|
||||
Convolution Conv_152 1 1 543_splitncnn_1 545 0=128 1=3 4=1 5=1 6=147456
|
||||
PReLU PRelu_154 1 1 545 547 0=128
|
||||
Convolution Conv_155 1 1 547 549 0=128 1=3 4=1 5=1 6=147456
|
||||
Split splitncnn_19 1 2 549 549_splitncnn_0 549_splitncnn_1
|
||||
Pooling ReduceMean_158 1 1 549_splitncnn_1 551 0=1 4=1
|
||||
InnerProduct Conv_159 1 1 551 554 0=16 2=2048 9=2 -23310=1,2.253069e-01
|
||||
InnerProduct Conv_161 1 1 554 556 0=128 2=2048 9=4
|
||||
BinaryOp Mul_163 2 1 549_splitncnn_0 556 557 0=2
|
||||
BinaryOp Add_164 2 1 557 543_splitncnn_0 558
|
||||
PReLU PRelu_165 1 1 558 560 0=128
|
||||
Split splitncnn_20 1 2 560 560_splitncnn_0 560_splitncnn_1
|
||||
Convolution Conv_166 1 1 560_splitncnn_1 562 0=128 1=3 4=1 5=1 6=147456
|
||||
PReLU PRelu_168 1 1 562 564 0=128
|
||||
Convolution Conv_169 1 1 564 566 0=128 1=3 4=1 5=1 6=147456
|
||||
Split splitncnn_21 1 2 566 566_splitncnn_0 566_splitncnn_1
|
||||
Pooling ReduceMean_172 1 1 566_splitncnn_1 568 0=1 4=1
|
||||
InnerProduct Conv_173 1 1 568 571 0=16 2=2048 9=2 -23310=1,2.221507e-01
|
||||
InnerProduct Conv_175 1 1 571 573 0=128 2=2048 9=4
|
||||
BinaryOp Mul_177 2 1 566_splitncnn_0 573 574 0=2
|
||||
BinaryOp Add_178 2 1 574 560_splitncnn_0 575
|
||||
PReLU PRelu_179 1 1 575 577 0=128
|
||||
Split splitncnn_22 1 2 577 577_splitncnn_0 577_splitncnn_1
|
||||
Convolution Conv_180 1 1 577_splitncnn_1 579 0=128 1=3 4=1 5=1 6=147456
|
||||
PReLU PRelu_182 1 1 579 581 0=128
|
||||
Convolution Conv_183 1 1 581 583 0=128 1=3 4=1 5=1 6=147456
|
||||
Split splitncnn_23 1 2 583 583_splitncnn_0 583_splitncnn_1
|
||||
Pooling ReduceMean_186 1 1 583_splitncnn_1 585 0=1 4=1
|
||||
InnerProduct Conv_187 1 1 585 588 0=16 2=2048 9=2 -23310=1,1.850068e-01
|
||||
InnerProduct Conv_189 1 1 588 590 0=128 2=2048 9=4
|
||||
BinaryOp Mul_191 2 1 583_splitncnn_0 590 591 0=2
|
||||
BinaryOp Add_192 2 1 591 577_splitncnn_0 592
|
||||
PReLU PRelu_193 1 1 592 594 0=128
|
||||
Split splitncnn_24 1 2 594 594_splitncnn_0 594_splitncnn_1
|
||||
Convolution Conv_194 1 1 594_splitncnn_1 596 0=128 1=3 4=1 5=1 6=147456
|
||||
PReLU PRelu_196 1 1 596 598 0=128
|
||||
Convolution Conv_197 1 1 598 600 0=128 1=3 4=1 5=1 6=147456
|
||||
Split splitncnn_25 1 2 600 600_splitncnn_0 600_splitncnn_1
|
||||
Pooling ReduceMean_200 1 1 600_splitncnn_1 602 0=1 4=1
|
||||
InnerProduct Conv_201 1 1 602 605 0=16 2=2048 9=2 -23310=1,1.788969e-01
|
||||
InnerProduct Conv_203 1 1 605 607 0=128 2=2048 9=4
|
||||
BinaryOp Mul_205 2 1 600_splitncnn_0 607 608 0=2
|
||||
BinaryOp Add_206 2 1 608 594_splitncnn_0 609
|
||||
PReLU PRelu_207 1 1 609 611 0=128
|
||||
Convolution Conv_208 1 1 611 612 0=8 1=3 4=1 5=1 6=9216
|
||||
PixelShuffle DepthToSpace_209 1 1 612 613 0=2
|
||||
Interp Resize_211 1 1 613 623 0=2 1=2.000000e+00 2=2.000000e+00
|
||||
Split splitncnn_26 1 2 623 623_splitncnn_0 623_splitncnn_1
|
||||
BinaryOp Add_212 2 1 471_splitncnn_1 623_splitncnn_1 624
|
||||
Split splitncnn_27 1 3 624 624_splitncnn_0 624_splitncnn_1 624_splitncnn_2
|
||||
Crop Slice_217 1 1 343_splitncnn_1 629 -23309=1,0 -23310=1,3 -23311=1,0
|
||||
rife.Warp Warp_223 2 1 629 624_splitncnn_2 635
|
||||
Crop Slice_228 1 1 343_splitncnn_0 640 -23309=1,3 -23310=1,-1 -23311=1,0
|
||||
UnaryOp Neg_229 1 1 624_splitncnn_1 641 0=1
|
||||
rife.Warp Warp_235 2 1 640 641 647
|
||||
Concat Concat_236 3 1 635 647 624_splitncnn_0 648
|
||||
Convolution Conv_237 1 1 648 650 0=64 1=3 3=2 4=1 5=1 6=4608
|
||||
PReLU PRelu_239 1 1 650 652 0=64
|
||||
Split splitncnn_28 1 2 652 652_splitncnn_0 652_splitncnn_1
|
||||
Convolution Conv_240 1 1 652_splitncnn_1 654 0=64 1=3 4=1 5=1 6=36864
|
||||
PReLU PRelu_242 1 1 654 656 0=64
|
||||
Convolution Conv_243 1 1 656 658 0=64 1=3 4=1 5=1 6=36864
|
||||
Split splitncnn_29 1 2 658 658_splitncnn_0 658_splitncnn_1
|
||||
Pooling ReduceMean_246 1 1 658_splitncnn_1 660 0=1 4=1
|
||||
InnerProduct Conv_247 1 1 660 663 0=16 2=1024 9=2 -23310=1,1.606955e-01
|
||||
InnerProduct Conv_249 1 1 663 665 0=64 2=1024 9=4
|
||||
BinaryOp Mul_251 2 1 658_splitncnn_0 665 666 0=2
|
||||
BinaryOp Add_252 2 1 666 652_splitncnn_0 667
|
||||
PReLU PRelu_253 1 1 667 669 0=64
|
||||
Split splitncnn_30 1 2 669 669_splitncnn_0 669_splitncnn_1
|
||||
Convolution Conv_254 1 1 669_splitncnn_1 671 0=64 1=3 4=1 5=1 6=36864
|
||||
PReLU PRelu_256 1 1 671 673 0=64
|
||||
Convolution Conv_257 1 1 673 675 0=64 1=3 4=1 5=1 6=36864
|
||||
Split splitncnn_31 1 2 675 675_splitncnn_0 675_splitncnn_1
|
||||
Pooling ReduceMean_260 1 1 675_splitncnn_1 677 0=1 4=1
|
||||
InnerProduct Conv_261 1 1 677 680 0=16 2=1024 9=2 -23310=1,3.747779e-01
|
||||
InnerProduct Conv_263 1 1 680 682 0=64 2=1024 9=4
|
||||
BinaryOp Mul_265 2 1 675_splitncnn_0 682 683 0=2
|
||||
BinaryOp Add_266 2 1 683 669_splitncnn_0 684
|
||||
PReLU PRelu_267 1 1 684 686 0=64
|
||||
Split splitncnn_32 1 2 686 686_splitncnn_0 686_splitncnn_1
|
||||
Convolution Conv_268 1 1 686_splitncnn_1 688 0=64 1=3 4=1 5=1 6=36864
|
||||
PReLU PRelu_270 1 1 688 690 0=64
|
||||
Convolution Conv_271 1 1 690 692 0=64 1=3 4=1 5=1 6=36864
|
||||
Split splitncnn_33 1 2 692 692_splitncnn_0 692_splitncnn_1
|
||||
Pooling ReduceMean_274 1 1 692_splitncnn_1 694 0=1 4=1
|
||||
InnerProduct Conv_275 1 1 694 697 0=16 2=1024 9=2 -23310=1,5.849604e-01
|
||||
InnerProduct Conv_277 1 1 697 699 0=64 2=1024 9=4
|
||||
BinaryOp Mul_279 2 1 692_splitncnn_0 699 700 0=2
|
||||
BinaryOp Add_280 2 1 700 686_splitncnn_0 701
|
||||
PReLU PRelu_281 1 1 701 703 0=64
|
||||
Split splitncnn_34 1 2 703 703_splitncnn_0 703_splitncnn_1
|
||||
Convolution Conv_282 1 1 703_splitncnn_1 705 0=64 1=3 4=1 5=1 6=36864
|
||||
PReLU PRelu_284 1 1 705 707 0=64
|
||||
Convolution Conv_285 1 1 707 709 0=64 1=3 4=1 5=1 6=36864
|
||||
Split splitncnn_35 1 2 709 709_splitncnn_0 709_splitncnn_1
|
||||
Pooling ReduceMean_288 1 1 709_splitncnn_1 711 0=1 4=1
|
||||
InnerProduct Conv_289 1 1 711 714 0=16 2=1024 9=2 -23310=1,2.789340e-01
|
||||
InnerProduct Conv_291 1 1 714 716 0=64 2=1024 9=4
|
||||
BinaryOp Mul_293 2 1 709_splitncnn_0 716 717 0=2
|
||||
BinaryOp Add_294 2 1 717 703_splitncnn_0 718
|
||||
PReLU PRelu_295 1 1 718 720 0=64
|
||||
Split splitncnn_36 1 2 720 720_splitncnn_0 720_splitncnn_1
|
||||
Convolution Conv_296 1 1 720_splitncnn_1 722 0=64 1=3 4=1 5=1 6=36864
|
||||
PReLU PRelu_298 1 1 722 724 0=64
|
||||
Convolution Conv_299 1 1 724 726 0=64 1=3 4=1 5=1 6=36864
|
||||
Split splitncnn_37 1 2 726 726_splitncnn_0 726_splitncnn_1
|
||||
Pooling ReduceMean_302 1 1 726_splitncnn_1 728 0=1 4=1
|
||||
InnerProduct Conv_303 1 1 728 731 0=16 2=1024 9=2 -23310=1,4.707932e-01
|
||||
InnerProduct Conv_305 1 1 731 733 0=64 2=1024 9=4
|
||||
BinaryOp Mul_307 2 1 726_splitncnn_0 733 734 0=2
|
||||
BinaryOp Add_308 2 1 734 720_splitncnn_0 735
|
||||
PReLU PRelu_309 1 1 735 737 0=64
|
||||
Split splitncnn_38 1 2 737 737_splitncnn_0 737_splitncnn_1
|
||||
Convolution Conv_310 1 1 737_splitncnn_1 739 0=64 1=3 4=1 5=1 6=36864
|
||||
PReLU PRelu_312 1 1 739 741 0=64
|
||||
Convolution Conv_313 1 1 741 743 0=64 1=3 4=1 5=1 6=36864
|
||||
Split splitncnn_39 1 2 743 743_splitncnn_0 743_splitncnn_1
|
||||
Pooling ReduceMean_316 1 1 743_splitncnn_1 745 0=1 4=1
|
||||
InnerProduct Conv_317 1 1 745 748 0=16 2=1024 9=2 -23310=1,7.429121e-01
|
||||
InnerProduct Conv_319 1 1 748 750 0=64 2=1024 9=4
|
||||
BinaryOp Mul_321 2 1 743_splitncnn_0 750 751 0=2
|
||||
BinaryOp Add_322 2 1 751 737_splitncnn_0 752
|
||||
PReLU PRelu_323 1 1 752 754 0=64
|
||||
Convolution Conv_324 1 1 754 755 0=8 1=3 4=1 5=1 6=4608
|
||||
PixelShuffle DepthToSpace_325 1 1 755 756 0=2
|
||||
BinaryOp Add_326 2 1 471_splitncnn_0 623_splitncnn_0 757
|
||||
BinaryOp Add_327 2 1 757 756 758
|
Binary file not shown.
|
@ -0,0 +1,85 @@
|
|||
7767517
|
||||
83 96
|
||||
Input img0 0 1 img0
|
||||
Input img1 0 1 img1
|
||||
Input flow 0 1 flow
|
||||
Split splitncnn_input2 1 3 flow flow_splitncnn_0 flow_splitncnn_1 flow_splitncnn_2
|
||||
Input 3 0 1 3
|
||||
Input 4 0 1 4
|
||||
Input 5 0 1 5
|
||||
Input 6 0 1 6
|
||||
Input 7 0 1 7
|
||||
Input 8 0 1 8
|
||||
Input 9 0 1 9
|
||||
Input 10 0 1 10
|
||||
rife.Warp Warp_5 2 1 img0 flow_splitncnn_2 70
|
||||
UnaryOp Neg_6 1 1 flow_splitncnn_1 71 0=1
|
||||
rife.Warp Warp_12 2 1 img1 71 77
|
||||
Concat Concat_13 3 1 70 77 flow_splitncnn_0 78
|
||||
Split splitncnn_0 1 2 78 78_splitncnn_0 78_splitncnn_1
|
||||
Convolution Conv_14 1 1 78_splitncnn_1 79 0=32 1=3 3=2 4=1 6=2304
|
||||
Convolution Conv_15 1 1 78_splitncnn_0 80 0=32 1=3 3=2 4=1 5=1 6=2304
|
||||
PReLU PRelu_16 1 1 80 82 0=32
|
||||
Convolution Conv_17 1 1 82 83 0=32 1=3 4=1 5=1 6=9216
|
||||
Split splitncnn_1 1 2 83 83_splitncnn_0 83_splitncnn_1
|
||||
Pooling ReduceMean_19 1 1 83_splitncnn_1 85 0=1 4=1
|
||||
InnerProduct Conv_20 1 1 85 88 0=16 2=512 9=2 -23310=1,8.258036e-02
|
||||
InnerProduct Conv_22 1 1 88 90 0=32 2=512 9=4
|
||||
BinaryOp Mul_24 2 1 83_splitncnn_0 90 91 0=2
|
||||
BinaryOp Add_25 2 1 91 79 92
|
||||
PReLU PRelu_26 1 1 92 94 0=32
|
||||
Split splitncnn_2 1 2 94 94_splitncnn_0 94_splitncnn_1
|
||||
Concat Concat_27 3 1 94_splitncnn_1 3 7 95
|
||||
Split splitncnn_3 1 2 95 95_splitncnn_0 95_splitncnn_1
|
||||
Convolution Conv_28 1 1 95_splitncnn_1 96 0=64 1=3 3=2 4=1 6=36864
|
||||
Convolution Conv_29 1 1 95_splitncnn_0 97 0=64 1=3 3=2 4=1 5=1 6=36864
|
||||
PReLU PRelu_30 1 1 97 99 0=64
|
||||
Convolution Conv_31 1 1 99 100 0=64 1=3 4=1 5=1 6=36864
|
||||
Split splitncnn_4 1 2 100 100_splitncnn_0 100_splitncnn_1
|
||||
Pooling ReduceMean_33 1 1 100_splitncnn_1 102 0=1 4=1
|
||||
InnerProduct Conv_34 1 1 102 105 0=16 2=1024 9=2 -23310=1,1.095001e-01
|
||||
InnerProduct Conv_36 1 1 105 107 0=64 2=1024 9=4
|
||||
BinaryOp Mul_38 2 1 100_splitncnn_0 107 108 0=2
|
||||
BinaryOp Add_39 2 1 108 96 109
|
||||
PReLU PRelu_40 1 1 109 111 0=64
|
||||
Split splitncnn_5 1 2 111 111_splitncnn_0 111_splitncnn_1
|
||||
Concat Concat_41 3 1 111_splitncnn_1 4 8 112
|
||||
Split splitncnn_6 1 2 112 112_splitncnn_0 112_splitncnn_1
|
||||
Convolution Conv_42 1 1 112_splitncnn_1 113 0=128 1=3 3=2 4=1 6=147456
|
||||
Convolution Conv_43 1 1 112_splitncnn_0 114 0=128 1=3 3=2 4=1 5=1 6=147456
|
||||
PReLU PRelu_44 1 1 114 116 0=128
|
||||
Convolution Conv_45 1 1 116 117 0=128 1=3 4=1 5=1 6=147456
|
||||
Split splitncnn_7 1 2 117 117_splitncnn_0 117_splitncnn_1
|
||||
Pooling ReduceMean_47 1 1 117_splitncnn_1 119 0=1 4=1
|
||||
InnerProduct Conv_48 1 1 119 122 0=16 2=2048 9=2 -23310=1,1.442167e-02
|
||||
InnerProduct Conv_50 1 1 122 124 0=128 2=2048 9=4
|
||||
BinaryOp Mul_52 2 1 117_splitncnn_0 124 125 0=2
|
||||
BinaryOp Add_53 2 1 125 113 126
|
||||
PReLU PRelu_54 1 1 126 128 0=128
|
||||
Split splitncnn_8 1 2 128 128_splitncnn_0 128_splitncnn_1
|
||||
Concat Concat_55 3 1 128_splitncnn_1 5 9 129
|
||||
Split splitncnn_9 1 2 129 129_splitncnn_0 129_splitncnn_1
|
||||
Convolution Conv_56 1 1 129_splitncnn_1 130 0=256 1=3 3=2 4=1 6=589824
|
||||
Convolution Conv_57 1 1 129_splitncnn_0 131 0=256 1=3 3=2 4=1 5=1 6=589824
|
||||
PReLU PRelu_58 1 1 131 133 0=256
|
||||
Convolution Conv_59 1 1 133 134 0=256 1=3 4=1 5=1 6=589824
|
||||
Split splitncnn_10 1 2 134 134_splitncnn_0 134_splitncnn_1
|
||||
Pooling ReduceMean_61 1 1 134_splitncnn_1 136 0=1 4=1
|
||||
InnerProduct Conv_62 1 1 136 139 0=16 2=4096 9=2 -23310=1,1.379933e-02
|
||||
InnerProduct Conv_64 1 1 139 141 0=256 2=4096 9=4
|
||||
BinaryOp Mul_66 2 1 134_splitncnn_0 141 142 0=2
|
||||
BinaryOp Add_67 2 1 142 130 143
|
||||
PReLU PRelu_68 1 1 143 145 0=256
|
||||
Concat Concat_69 3 1 145 6 10 146
|
||||
Deconvolution ConvTranspose_70 1 1 146 147 0=128 1=4 3=2 4=1 5=1 6=1048576
|
||||
PReLU PRelu_71 1 1 147 149 0=128
|
||||
Concat Concat_72 2 1 149 128_splitncnn_0 150
|
||||
Deconvolution ConvTranspose_73 1 1 150 151 0=64 1=4 3=2 4=1 5=1 6=262144
|
||||
PReLU PRelu_74 1 1 151 153 0=64
|
||||
Concat Concat_75 2 1 153 111_splitncnn_0 154
|
||||
Deconvolution ConvTranspose_76 1 1 154 155 0=32 1=4 3=2 4=1 5=1 6=65536
|
||||
PReLU PRelu_77 1 1 155 157 0=32
|
||||
Concat Concat_78 2 1 157 94_splitncnn_0 158
|
||||
Deconvolution ConvTranspose_79 1 1 158 159 0=16 1=4 3=2 4=1 5=1 6=16384
|
||||
PReLU PRelu_80 1 1 159 161 0=16
|
||||
Convolution Conv_81 1 1 161 162 0=4 1=3 4=1 5=1 6=576
|
|
@ -0,0 +1,256 @@
|
|||
cmake_policy(SET CMP0091 NEW)
|
||||
set(CMAKE_POLICY_DEFAULT_CMP0091 NEW)
|
||||
set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreaded$<$<CONFIG:Debug>:Debug>")
|
||||
|
||||
project(rife-ncnn-vulkan)
|
||||
|
||||
cmake_minimum_required(VERSION 3.9)
|
||||
|
||||
set(CMAKE_BUILD_TYPE Release)
|
||||
|
||||
option(USE_SYSTEM_NCNN "build with system libncnn" OFF)
|
||||
option(USE_SYSTEM_WEBP "build with system libwebp" OFF)
|
||||
option(USE_STATIC_MOLTENVK "link moltenvk static library" OFF)
|
||||
|
||||
find_package(Threads)
|
||||
find_package(OpenMP)
|
||||
find_package(Vulkan REQUIRED)
|
||||
|
||||
macro(rife_add_shader SHADER_SRC)
|
||||
get_filename_component(SHADER_SRC_NAME_WE ${SHADER_SRC} NAME_WE)
|
||||
set(SHADER_COMP_HEADER ${CMAKE_CURRENT_BINARY_DIR}/${SHADER_SRC_NAME_WE}.comp.hex.h)
|
||||
|
||||
add_custom_command(
|
||||
OUTPUT ${SHADER_COMP_HEADER}
|
||||
COMMAND ${CMAKE_COMMAND} -DSHADER_SRC=${CMAKE_CURRENT_SOURCE_DIR}/${SHADER_SRC} -DSHADER_COMP_HEADER=${SHADER_COMP_HEADER} -P "${CMAKE_CURRENT_SOURCE_DIR}/generate_shader_comp_header.cmake"
|
||||
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/${SHADER_SRC}
|
||||
COMMENT "Preprocessing shader source ${SHADER_SRC_NAME_WE}.comp"
|
||||
VERBATIM
|
||||
)
|
||||
set_source_files_properties(${SHADER_COMP_HEADER} PROPERTIES GENERATED TRUE)
|
||||
|
||||
list(APPEND SHADER_SPV_HEX_FILES ${SHADER_COMP_HEADER})
|
||||
endmacro()
|
||||
|
||||
include_directories(${CMAKE_CURRENT_BINARY_DIR})
|
||||
|
||||
if(OPENMP_FOUND)
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
|
||||
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}")
|
||||
endif()
|
||||
|
||||
# enable global link time optimization
|
||||
cmake_policy(SET CMP0069 NEW)
|
||||
set(CMAKE_POLICY_DEFAULT_CMP0069 NEW)
|
||||
include(CheckIPOSupported)
|
||||
check_ipo_supported(RESULT ipo_supported OUTPUT ipo_supported_output)
|
||||
if(ipo_supported)
|
||||
set(CMAKE_INTERPROCEDURAL_OPTIMIZATION FALSE)
|
||||
else()
|
||||
message(WARNING "IPO is not supported: ${ipo_supported_output}")
|
||||
endif()
|
||||
|
||||
if(USE_SYSTEM_NCNN)
|
||||
set(GLSLANG_TARGET_DIR "GLSLANG-NOTFOUND" CACHE PATH "Absolute path to glslangTargets.cmake directory")
|
||||
if(NOT GLSLANG_TARGET_DIR AND NOT DEFINED ENV{GLSLANG_TARGET_DIR})
|
||||
message(WARNING "GLSLANG_TARGET_DIR must be defined! USE_SYSTEM_NCNN will be turned off.")
|
||||
set(USE_SYSTEM_NCNN OFF)
|
||||
else()
|
||||
message(STATUS "Using glslang install located at ${GLSLANG_TARGET_DIR}")
|
||||
|
||||
find_package(Threads)
|
||||
|
||||
include("${GLSLANG_TARGET_DIR}/OSDependentTargets.cmake")
|
||||
include("${GLSLANG_TARGET_DIR}/OGLCompilerTargets.cmake")
|
||||
if(EXISTS "${GLSLANG_TARGET_DIR}/HLSLTargets.cmake")
|
||||
# hlsl support can be optional
|
||||
include("${GLSLANG_TARGET_DIR}/HLSLTargets.cmake")
|
||||
endif()
|
||||
include("${GLSLANG_TARGET_DIR}/glslangTargets.cmake")
|
||||
include("${GLSLANG_TARGET_DIR}/SPIRVTargets.cmake")
|
||||
|
||||
if (NOT TARGET glslang OR NOT TARGET SPIRV)
|
||||
message(WARNING "glslang or SPIRV target not found! USE_SYSTEM_NCNN will be turned off.")
|
||||
set(USE_SYSTEM_NCNN OFF)
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(USE_SYSTEM_NCNN)
|
||||
find_package(ncnn)
|
||||
if(NOT TARGET ncnn)
|
||||
message(WARNING "ncnn target not found! USE_SYSTEM_NCNN will be turned off.")
|
||||
set(USE_SYSTEM_NCNN OFF)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(NOT USE_SYSTEM_NCNN)
|
||||
# build ncnn library
|
||||
if(NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/ncnn/CMakeLists.txt")
|
||||
message(FATAL_ERROR "The submodules were not downloaded! Please update submodules with \"git submodule update --init --recursive\" and try again.")
|
||||
endif()
|
||||
|
||||
option(NCNN_INSTALL_SDK "" OFF)
|
||||
option(NCNN_PIXEL_ROTATE "" OFF)
|
||||
option(NCNN_VULKAN "" ON)
|
||||
option(NCNN_VULKAN_ONLINE_SPIRV "" ON)
|
||||
option(NCNN_BUILD_BENCHMARK "" OFF)
|
||||
option(NCNN_BUILD_TESTS "" OFF)
|
||||
option(NCNN_BUILD_TOOLS "" OFF)
|
||||
option(NCNN_BUILD_EXAMPLES "" OFF)
|
||||
option(NCNN_DISABLE_RTTI "" ON)
|
||||
option(NCNN_DISABLE_EXCEPTION "" ON)
|
||||
|
||||
option(WITH_LAYER_absval "" OFF)
|
||||
option(WITH_LAYER_argmax "" OFF)
|
||||
option(WITH_LAYER_batchnorm "" OFF)
|
||||
option(WITH_LAYER_bias "" OFF)
|
||||
option(WITH_LAYER_bnll "" OFF)
|
||||
option(WITH_LAYER_concat "" ON)
|
||||
option(WITH_LAYER_convolution "" ON)
|
||||
option(WITH_LAYER_crop "" ON)
|
||||
option(WITH_LAYER_deconvolution "" ON)
|
||||
option(WITH_LAYER_dropout "" OFF)
|
||||
option(WITH_LAYER_eltwise "" OFF)
|
||||
option(WITH_LAYER_elu "" OFF)
|
||||
option(WITH_LAYER_embed "" OFF)
|
||||
option(WITH_LAYER_exp "" OFF)
|
||||
option(WITH_LAYER_flatten "" ON)
|
||||
option(WITH_LAYER_innerproduct "" ON)
|
||||
option(WITH_LAYER_input "" ON)
|
||||
option(WITH_LAYER_log "" OFF)
|
||||
option(WITH_LAYER_lrn "" OFF)
|
||||
option(WITH_LAYER_memorydata "" OFF)
|
||||
option(WITH_LAYER_mvn "" OFF)
|
||||
option(WITH_LAYER_pooling "" ON)
|
||||
option(WITH_LAYER_power "" OFF)
|
||||
option(WITH_LAYER_prelu "" OFF)
|
||||
option(WITH_LAYER_proposal "" OFF)
|
||||
option(WITH_LAYER_reduction "" OFF)
|
||||
option(WITH_LAYER_relu "" ON)
|
||||
option(WITH_LAYER_reshape "" OFF)
|
||||
option(WITH_LAYER_roipooling "" OFF)
|
||||
option(WITH_LAYER_scale "" OFF)
|
||||
option(WITH_LAYER_sigmoid "" OFF)
|
||||
option(WITH_LAYER_slice "" OFF)
|
||||
option(WITH_LAYER_softmax "" OFF)
|
||||
option(WITH_LAYER_split "" ON)
|
||||
option(WITH_LAYER_spp "" OFF)
|
||||
option(WITH_LAYER_tanh "" OFF)
|
||||
option(WITH_LAYER_threshold "" OFF)
|
||||
option(WITH_LAYER_tile "" OFF)
|
||||
option(WITH_LAYER_rnn "" OFF)
|
||||
option(WITH_LAYER_lstm "" OFF)
|
||||
option(WITH_LAYER_binaryop "" ON)
|
||||
option(WITH_LAYER_unaryop "" ON)
|
||||
option(WITH_LAYER_convolutiondepthwise "" OFF)
|
||||
option(WITH_LAYER_padding "" ON)
|
||||
option(WITH_LAYER_squeeze "" OFF)
|
||||
option(WITH_LAYER_expanddims "" OFF)
|
||||
option(WITH_LAYER_normalize "" OFF)
|
||||
option(WITH_LAYER_permute "" OFF)
|
||||
option(WITH_LAYER_priorbox "" OFF)
|
||||
option(WITH_LAYER_detectionoutput "" OFF)
|
||||
option(WITH_LAYER_interp "" ON)
|
||||
option(WITH_LAYER_deconvolutiondepthwise "" OFF)
|
||||
option(WITH_LAYER_shufflechannel "" OFF)
|
||||
option(WITH_LAYER_instancenorm "" OFF)
|
||||
option(WITH_LAYER_clip "" OFF)
|
||||
option(WITH_LAYER_reorg "" ON)
|
||||
option(WITH_LAYER_yolodetectionoutput "" OFF)
|
||||
option(WITH_LAYER_quantize "" OFF)
|
||||
option(WITH_LAYER_dequantize "" OFF)
|
||||
option(WITH_LAYER_yolov3detectionoutput "" OFF)
|
||||
option(WITH_LAYER_psroipooling "" OFF)
|
||||
option(WITH_LAYER_roialign "" OFF)
|
||||
option(WITH_LAYER_packing "" ON)
|
||||
option(WITH_LAYER_requantize "" OFF)
|
||||
option(WITH_LAYER_cast "" ON)
|
||||
option(WITH_LAYER_hardsigmoid "" OFF)
|
||||
option(WITH_LAYER_selu "" OFF)
|
||||
option(WITH_LAYER_hardswish "" OFF)
|
||||
option(WITH_LAYER_noop "" OFF)
|
||||
option(WITH_LAYER_pixelshuffle "" ON)
|
||||
option(WITH_LAYER_deepcopy "" OFF)
|
||||
option(WITH_LAYER_mish "" OFF)
|
||||
option(WITH_LAYER_statisticspooling "" OFF)
|
||||
option(WITH_LAYER_swish "" OFF)
|
||||
|
||||
add_subdirectory(ncnn)
|
||||
endif()
|
||||
|
||||
if(USE_SYSTEM_WEBP)
|
||||
set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}")
|
||||
find_package(WebP)
|
||||
if(NOT TARGET webp)
|
||||
message(WARNING "webp target not found! USE_SYSTEM_WEBP will be turned off.")
|
||||
set(USE_SYSTEM_WEBP OFF)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(NOT USE_SYSTEM_WEBP)
|
||||
# build libwebp library
|
||||
if(NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/libwebp/CMakeLists.txt")
|
||||
message(FATAL_ERROR "The submodules were not downloaded! Please update submodules with \"git submodule update --init --recursive\" and try again.")
|
||||
endif()
|
||||
|
||||
option(WEBP_ENABLE_SIMD "" ON)
|
||||
option(WEBP_BUILD_ANIM_UTILS "" OFF)
|
||||
option(WEBP_BUILD_CWEBP "" OFF)
|
||||
option(WEBP_BUILD_DWEBP "" OFF)
|
||||
option(WEBP_BUILD_GIF2WEBP "" OFF)
|
||||
option(WEBP_BUILD_IMG2WEBP "" OFF)
|
||||
option(WEBP_BUILD_VWEBP "" OFF)
|
||||
option(WEBP_BUILD_WEBPINFO "" OFF)
|
||||
option(WEBP_BUILD_WEBPMUX "" OFF)
|
||||
option(WEBP_BUILD_EXTRAS "" OFF)
|
||||
option(WEBP_BUILD_WEBP_JS "" OFF)
|
||||
option(WEBP_NEAR_LOSSLESS "" OFF)
|
||||
option(WEBP_ENABLE_SWAP_16BIT_CSP "" OFF)
|
||||
|
||||
add_subdirectory(libwebp)
|
||||
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/libwebp/src)
|
||||
endif()
|
||||
|
||||
rife_add_shader(rife_preproc.comp)
|
||||
rife_add_shader(rife_postproc.comp)
|
||||
rife_add_shader(warp.comp)
|
||||
rife_add_shader(warp_pack4.comp)
|
||||
|
||||
add_custom_target(generate-spirv DEPENDS ${SHADER_SPV_HEX_FILES})
|
||||
|
||||
add_executable(rife-ncnn-vulkan
|
||||
main.cpp
|
||||
rife.cpp
|
||||
warp.cpp
|
||||
)
|
||||
|
||||
add_dependencies(rife-ncnn-vulkan generate-spirv)
|
||||
|
||||
set(RIFE_LINK_LIBRARIES ncnn webp ${Vulkan_LIBRARY})
|
||||
|
||||
if(USE_STATIC_MOLTENVK)
|
||||
find_library(CoreFoundation NAMES CoreFoundation)
|
||||
find_library(Foundation NAMES Foundation)
|
||||
find_library(Metal NAMES Metal)
|
||||
find_library(QuartzCore NAMES QuartzCore)
|
||||
find_library(CoreGraphics NAMES CoreGraphics)
|
||||
find_library(Cocoa NAMES Cocoa)
|
||||
find_library(IOKit NAMES IOKit)
|
||||
find_library(IOSurface NAMES IOSurface)
|
||||
|
||||
list(APPEND RIFE_LINK_LIBRARIES
|
||||
${Metal}
|
||||
${QuartzCore}
|
||||
${CoreGraphics}
|
||||
${Cocoa}
|
||||
${IOKit}
|
||||
${IOSurface}
|
||||
${Foundation}
|
||||
${CoreFoundation}
|
||||
)
|
||||
endif()
|
||||
|
||||
target_link_libraries(rife-ncnn-vulkan ${RIFE_LINK_LIBRARIES})
|
|
@ -0,0 +1,96 @@
|
|||
# Copyright (C) 2020 Sony Interactive Entertainment Inc.
|
||||
# Copyright (C) 2012 Raphael Kubo da Costa <rakuco@webkit.org>
|
||||
# Copyright (C) 2013 Igalia S.L.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
# 1. Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND ITS CONTRIBUTORS ``AS
|
||||
# IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
||||
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR ITS
|
||||
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
|
||||
# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
||||
# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
|
||||
# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
|
||||
# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#[=======================================================================[.rst:
|
||||
FindWebP
|
||||
--------------
|
||||
|
||||
Find WebP headers and libraries.
|
||||
|
||||
Imported Targets
|
||||
^^^^^^^^^^^^^^^^
|
||||
|
||||
``webp``
|
||||
The WebP library, if found.
|
||||
|
||||
Result Variables
|
||||
^^^^^^^^^^^^^^^^
|
||||
|
||||
This will define the following variables in your project:
|
||||
|
||||
``WebP_FOUND``
|
||||
true if (the requested version of) WebP is available.
|
||||
``WebP_VERSION``
|
||||
the version of WebP.
|
||||
``WebP_LIBRARIES``
|
||||
the libraries to link against to use WebP.
|
||||
``WebP_INCLUDE_DIRS``
|
||||
where to find the WebP headers.
|
||||
``WebP_COMPILE_OPTIONS``
|
||||
this should be passed to target_compile_options(), if the
|
||||
target is not used for linking
|
||||
|
||||
#]=======================================================================]
|
||||
|
||||
find_package(PkgConfig QUIET)
|
||||
pkg_check_modules(PC_WEBP QUIET libwebp)
|
||||
set(WebP_COMPILE_OPTIONS ${PC_WEBP_CFLAGS_OTHER})
|
||||
set(WebP_VERSION ${PC_WEBP_CFLAGS_VERSION})
|
||||
|
||||
find_path(WebP_INCLUDE_DIR
|
||||
NAMES webp/decode.h
|
||||
HINTS ${PC_WEBP_INCLUDEDIR} ${PC_WEBP_INCLUDE_DIRS}
|
||||
)
|
||||
|
||||
find_library(WebP_LIBRARY
|
||||
NAMES ${WebP_NAMES} webp
|
||||
HINTS ${PC_WEBP_LIBDIR} ${PC_WEBP_LIBRARY_DIRS}
|
||||
)
|
||||
|
||||
include(FindPackageHandleStandardArgs)
|
||||
find_package_handle_standard_args(WebP
|
||||
FOUND_VAR WebP_FOUND
|
||||
REQUIRED_VARS WebP_INCLUDE_DIR WebP_LIBRARY
|
||||
VERSION_VAR WebP_VERSION
|
||||
)
|
||||
|
||||
if (WebP_LIBRARY AND NOT TARGET webp)
|
||||
add_library(webp UNKNOWN IMPORTED GLOBAL)
|
||||
set_target_properties(webp PROPERTIES
|
||||
IMPORTED_LOCATION "${WebP_LIBRARY}"
|
||||
INTERFACE_COMPILE_OPTIONS "${WebP_COMPILE_OPTIONS}"
|
||||
INTERFACE_INCLUDE_DIRECTORIES "${WebP_INCLUDE_DIR}"
|
||||
)
|
||||
endif ()
|
||||
|
||||
mark_as_advanced(
|
||||
WebP_INCLUDE_DIR
|
||||
WebP_LIBRARY
|
||||
)
|
||||
|
||||
if (WebP_FOUND)
|
||||
set(WebP_LIBRARIES ${WebP_LIBRARY})
|
||||
set(WebP_INCLUDE_DIRS ${WebP_INCLUDE_DIR})
|
||||
endif ()
|
|
@ -0,0 +1,159 @@
|
|||
#ifndef FILESYSTEM_UTILS_H
|
||||
#define FILESYSTEM_UTILS_H
|
||||
|
||||
#include <stdio.h>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <algorithm>
|
||||
|
||||
#if _WIN32
|
||||
#include <windows.h>
|
||||
#include "win32dirent.h"
|
||||
#else // _WIN32
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <unistd.h>
|
||||
#include <dirent.h>
|
||||
#endif // _WIN32
|
||||
|
||||
#if _WIN32
|
||||
typedef std::wstring path_t;
|
||||
#define PATHSTR(X) L##X
|
||||
#else
|
||||
typedef std::string path_t;
|
||||
#define PATHSTR(X) X
|
||||
#endif
|
||||
|
||||
#if _WIN32
|
||||
static bool path_is_directory(const path_t& path)
|
||||
{
|
||||
DWORD attr = GetFileAttributesW(path.c_str());
|
||||
return (attr != INVALID_FILE_ATTRIBUTES) && (attr & FILE_ATTRIBUTE_DIRECTORY);
|
||||
}
|
||||
|
||||
static int list_directory(const path_t& dirpath, std::vector<path_t>& imagepaths)
|
||||
{
|
||||
imagepaths.clear();
|
||||
|
||||
_WDIR* dir = _wopendir(dirpath.c_str());
|
||||
if (!dir)
|
||||
{
|
||||
fwprintf(stderr, L"opendir failed %ls\n", dirpath.c_str());
|
||||
return -1;
|
||||
}
|
||||
|
||||
struct _wdirent* ent = 0;
|
||||
while ((ent = _wreaddir(dir)))
|
||||
{
|
||||
if (ent->d_type != DT_REG)
|
||||
continue;
|
||||
|
||||
imagepaths.push_back(path_t(ent->d_name));
|
||||
}
|
||||
|
||||
_wclosedir(dir);
|
||||
std::sort(imagepaths.begin(), imagepaths.end());
|
||||
|
||||
return 0;
|
||||
}
|
||||
#else // _WIN32
|
||||
static bool path_is_directory(const path_t& path)
|
||||
{
|
||||
struct stat s;
|
||||
if (stat(path.c_str(), &s) != 0)
|
||||
return false;
|
||||
return S_ISDIR(s.st_mode);
|
||||
}
|
||||
|
||||
static int list_directory(const path_t& dirpath, std::vector<path_t>& imagepaths)
|
||||
{
|
||||
imagepaths.clear();
|
||||
|
||||
DIR* dir = opendir(dirpath.c_str());
|
||||
if (!dir)
|
||||
{
|
||||
fprintf(stderr, "opendir failed %s\n", dirpath.c_str());
|
||||
return -1;
|
||||
}
|
||||
|
||||
struct dirent* ent = 0;
|
||||
while ((ent = readdir(dir)))
|
||||
{
|
||||
if (ent->d_type != DT_REG)
|
||||
continue;
|
||||
|
||||
imagepaths.push_back(path_t(ent->d_name));
|
||||
}
|
||||
|
||||
closedir(dir);
|
||||
std::sort(imagepaths.begin(), imagepaths.end());
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif // _WIN32
|
||||
|
||||
static path_t get_file_name_without_extension(const path_t& path)
|
||||
{
|
||||
size_t dot = path.rfind(PATHSTR('.'));
|
||||
if (dot == path_t::npos)
|
||||
return path;
|
||||
|
||||
return path.substr(0, dot);
|
||||
}
|
||||
|
||||
static path_t get_file_extension(const path_t& path)
|
||||
{
|
||||
size_t dot = path.rfind(PATHSTR('.'));
|
||||
if (dot == path_t::npos)
|
||||
return path_t();
|
||||
|
||||
return path.substr(dot + 1);
|
||||
}
|
||||
|
||||
#if _WIN32
|
||||
static path_t get_executable_directory()
|
||||
{
|
||||
wchar_t filepath[256];
|
||||
GetModuleFileNameW(NULL, filepath, 256);
|
||||
|
||||
wchar_t* backslash = wcsrchr(filepath, L'\\');
|
||||
backslash[1] = L'\0';
|
||||
|
||||
return path_t(filepath);
|
||||
}
|
||||
#else // _WIN32
|
||||
static path_t get_executable_directory()
|
||||
{
|
||||
char filepath[256];
|
||||
readlink("/proc/self/exe", filepath, 256);
|
||||
|
||||
char* slash = strrchr(filepath, '/');
|
||||
slash[1] = '\0';
|
||||
|
||||
return path_t(filepath);
|
||||
}
|
||||
#endif // _WIN32
|
||||
|
||||
static bool filepath_is_readable(const path_t& path)
|
||||
{
|
||||
#if _WIN32
|
||||
FILE* fp = _wfopen(path.c_str(), L"rb");
|
||||
#else // _WIN32
|
||||
FILE* fp = fopen(path.c_str(), "rb");
|
||||
#endif // _WIN32
|
||||
if (!fp)
|
||||
return false;
|
||||
|
||||
fclose(fp);
|
||||
return true;
|
||||
}
|
||||
|
||||
static path_t sanitize_filepath(const path_t& path)
|
||||
{
|
||||
if (filepath_is_readable(path))
|
||||
return path;
|
||||
|
||||
return get_executable_directory() + path;
|
||||
}
|
||||
|
||||
#endif // FILESYSTEM_UTILS_H
|
|
@ -0,0 +1,22 @@
|
|||
|
||||
# must define SHADER_COMP_HEADER SHADER_SRC
|
||||
|
||||
file(READ ${SHADER_SRC} comp_data)
|
||||
|
||||
# skip leading comment
|
||||
string(FIND "${comp_data}" "#version" version_start)
|
||||
string(SUBSTRING "${comp_data}" ${version_start} -1 comp_data)
|
||||
|
||||
# remove whitespace
|
||||
string(REGEX REPLACE "\n +" "\n" comp_data "${comp_data}")
|
||||
|
||||
get_filename_component(SHADER_SRC_NAME_WE ${SHADER_SRC} NAME_WE)
|
||||
|
||||
# text to hex
|
||||
file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/${SHADER_SRC_NAME_WE}.text2hex.txt "${comp_data}")
|
||||
file(READ ${CMAKE_CURRENT_BINARY_DIR}/${SHADER_SRC_NAME_WE}.text2hex.txt comp_data_hex HEX)
|
||||
string(REGEX REPLACE "([0-9a-f][0-9a-f])" "0x\\1," comp_data_hex ${comp_data_hex})
|
||||
string(FIND "${comp_data_hex}" "," tail_comma REVERSE)
|
||||
string(SUBSTRING "${comp_data_hex}" 0 ${tail_comma} comp_data_hex)
|
||||
|
||||
file(WRITE ${SHADER_COMP_HEADER} "static const char ${SHADER_SRC_NAME_WE}_comp_data[] = {${comp_data_hex}};\n")
|
|
@ -0,0 +1,823 @@
|
|||
// rife implemented with ncnn library
|
||||
|
||||
#include <stdio.h>
|
||||
#include <algorithm>
|
||||
#include <queue>
|
||||
#include <vector>
|
||||
#include <clocale>
|
||||
|
||||
#if _WIN32
|
||||
// image decoder and encoder with wic
|
||||
#include "wic_image.h"
|
||||
#else // _WIN32
|
||||
// image decoder and encoder with stb
|
||||
#define STB_IMAGE_IMPLEMENTATION
|
||||
#define STBI_NO_PSD
|
||||
#define STBI_NO_TGA
|
||||
#define STBI_NO_GIF
|
||||
#define STBI_NO_HDR
|
||||
#define STBI_NO_PIC
|
||||
#define STBI_NO_STDIO
|
||||
#include "stb_image.h"
|
||||
#define STB_IMAGE_WRITE_IMPLEMENTATION
|
||||
#include "stb_image_write.h"
|
||||
#endif // _WIN32
|
||||
#include "webp_image.h"
|
||||
|
||||
#if _WIN32
|
||||
#include <wchar.h>
|
||||
static wchar_t* optarg = NULL;
|
||||
static int optind = 1;
|
||||
static wchar_t getopt(int argc, wchar_t* const argv[], const wchar_t* optstring)
|
||||
{
|
||||
if (optind >= argc || argv[optind][0] != L'-')
|
||||
return -1;
|
||||
|
||||
wchar_t opt = argv[optind][1];
|
||||
const wchar_t* p = wcschr(optstring, opt);
|
||||
if (p == NULL)
|
||||
return L'?';
|
||||
|
||||
optarg = NULL;
|
||||
|
||||
if (p[1] == L':')
|
||||
{
|
||||
optind++;
|
||||
if (optind >= argc)
|
||||
return L'?';
|
||||
|
||||
optarg = argv[optind];
|
||||
}
|
||||
|
||||
optind++;
|
||||
|
||||
return opt;
|
||||
}
|
||||
|
||||
static std::vector<int> parse_optarg_int_array(const wchar_t* optarg)
|
||||
{
|
||||
std::vector<int> array;
|
||||
array.push_back(_wtoi(optarg));
|
||||
|
||||
const wchar_t* p = wcschr(optarg, L',');
|
||||
while (p)
|
||||
{
|
||||
p++;
|
||||
array.push_back(_wtoi(p));
|
||||
p = wcschr(p, L',');
|
||||
}
|
||||
|
||||
return array;
|
||||
}
|
||||
#else // _WIN32
|
||||
#include <unistd.h> // getopt()
|
||||
|
||||
static std::vector<int> parse_optarg_int_array(const char* optarg)
|
||||
{
|
||||
std::vector<int> array;
|
||||
array.push_back(atoi(optarg));
|
||||
|
||||
const char* p = strchr(optarg, ',');
|
||||
while (p)
|
||||
{
|
||||
p++;
|
||||
array.push_back(atoi(p));
|
||||
p = strchr(p, ',');
|
||||
}
|
||||
|
||||
return array;
|
||||
}
|
||||
#endif // _WIN32
|
||||
|
||||
// ncnn
|
||||
#include "cpu.h"
|
||||
#include "gpu.h"
|
||||
#include "platform.h"
|
||||
#include "benchmark.h"
|
||||
|
||||
#include "rife.h"
|
||||
|
||||
#include "filesystem_utils.h"
|
||||
|
||||
static void print_usage()
|
||||
{
|
||||
fprintf(stderr, "Usage: rife-ncnn-vulkan -0 infile -1 infile1 -o outfile [options]...\n");
|
||||
fprintf(stderr, " rife-ncnn-vulkan -i indir -o outdir [options]...\n\n");
|
||||
fprintf(stderr, " -h show this help\n");
|
||||
fprintf(stderr, " -v verbose output\n");
|
||||
fprintf(stderr, " -0 input0-path input image0 path (jpg/png/webp)\n");
|
||||
fprintf(stderr, " -1 input1-path input image1 path (jpg/png/webp)\n");
|
||||
fprintf(stderr, " -i input-path input image directory (jpg/png/webp)\n");
|
||||
fprintf(stderr, " -o output-path output image path (jpg/png/webp) or directory\n");
|
||||
fprintf(stderr, " -t tile-size tile size (>=128, default=512) can be 256,256,128 for multi-gpu\n");
|
||||
fprintf(stderr, " -g gpu-id gpu device to use (default=auto) can be 0,1,2 for multi-gpu\n");
|
||||
fprintf(stderr, " -j load:proc:save thread count for load/proc/save (default=1:2:2) can be 1:2,2,2:2 for multi-gpu\n");
|
||||
fprintf(stderr, " -f format output image format (jpg/png/webp, default=ext/png)\n");
|
||||
}
|
||||
|
||||
static int decode_image(const path_t& imagepath, ncnn::Mat& image, int* webp)
|
||||
{
|
||||
*webp = 0;
|
||||
|
||||
unsigned char* pixeldata = 0;
|
||||
int w;
|
||||
int h;
|
||||
int c;
|
||||
|
||||
#if _WIN32
|
||||
FILE* fp = _wfopen(imagepath.c_str(), L"rb");
|
||||
#else
|
||||
FILE* fp = fopen(imagepath.c_str(), "rb");
|
||||
#endif
|
||||
if (fp)
|
||||
{
|
||||
// read whole file
|
||||
unsigned char* filedata = 0;
|
||||
int length = 0;
|
||||
{
|
||||
fseek(fp, 0, SEEK_END);
|
||||
length = ftell(fp);
|
||||
rewind(fp);
|
||||
filedata = (unsigned char*)malloc(length);
|
||||
if (filedata)
|
||||
{
|
||||
fread(filedata, 1, length, fp);
|
||||
}
|
||||
fclose(fp);
|
||||
}
|
||||
|
||||
if (filedata)
|
||||
{
|
||||
pixeldata = webp_load(filedata, length, &w, &h, &c);
|
||||
if (pixeldata)
|
||||
{
|
||||
*webp = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
// not webp, try jpg png etc.
|
||||
#if _WIN32
|
||||
pixeldata = wic_decode_image(imagepath.c_str(), &w, &h, &c);
|
||||
#else // _WIN32
|
||||
pixeldata = stbi_load_from_memory(filedata, length, &w, &h, &c, 3);
|
||||
c = 3;
|
||||
#endif // _WIN32
|
||||
}
|
||||
|
||||
free(filedata);
|
||||
}
|
||||
}
|
||||
|
||||
if (!pixeldata)
|
||||
{
|
||||
#if _WIN32
|
||||
fwprintf(stderr, L"decode image %ls failed\n", imagepath.c_str());
|
||||
#else // _WIN32
|
||||
fprintf(stderr, "decode image %s failed\n", imagepath.c_str());
|
||||
#endif // _WIN32
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
image = ncnn::Mat(w, h, (void*)pixeldata, (size_t)3, 3);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int encode_image(const path_t& imagepath, const ncnn::Mat& image)
|
||||
{
|
||||
int success = 0;
|
||||
|
||||
path_t ext = get_file_extension(imagepath);
|
||||
|
||||
if (ext == PATHSTR("webp") || ext == PATHSTR("WEBP"))
|
||||
{
|
||||
success = webp_save(imagepath.c_str(), image.w, image.h, image.elempack, (const unsigned char*)image.data);
|
||||
}
|
||||
else if (ext == PATHSTR("png") || ext == PATHSTR("PNG"))
|
||||
{
|
||||
#if _WIN32
|
||||
success = wic_encode_image(imagepath.c_str(), image.w, image.h, image.elempack, image.data);
|
||||
#else
|
||||
success = stbi_write_png(imagepath.c_str(), image.w, image.h, image.elempack, image.data, 0);
|
||||
#endif
|
||||
}
|
||||
else if (ext == PATHSTR("jpg") || ext == PATHSTR("JPG") || ext == PATHSTR("jpeg") || ext == PATHSTR("JPEG"))
|
||||
{
|
||||
#if _WIN32
|
||||
success = wic_encode_jpeg_image(imagepath.c_str(), image.w, image.h, image.elempack, image.data);
|
||||
#else
|
||||
success = stbi_write_jpg(imagepath.c_str(), image.w, image.h, image.elempack, image.data, 100);
|
||||
#endif
|
||||
}
|
||||
|
||||
if (!success)
|
||||
{
|
||||
#if _WIN32
|
||||
fwprintf(stderr, L"encode image %ls failed\n", imagepath.c_str());
|
||||
#else
|
||||
fprintf(stderr, "encode image %s failed\n", imagepath.c_str());
|
||||
#endif
|
||||
}
|
||||
|
||||
return success ? 0 : -1;
|
||||
}
|
||||
|
||||
class Task
|
||||
{
|
||||
public:
|
||||
int id;
|
||||
int webp0;
|
||||
int webp1;
|
||||
|
||||
path_t in0path;
|
||||
path_t in1path;
|
||||
path_t outpath;
|
||||
float timestep;
|
||||
|
||||
ncnn::Mat in0image;
|
||||
ncnn::Mat in1image;
|
||||
ncnn::Mat outimage;
|
||||
};
|
||||
|
||||
class TaskQueue
|
||||
{
|
||||
public:
|
||||
TaskQueue()
|
||||
{
|
||||
}
|
||||
|
||||
void put(const Task& v)
|
||||
{
|
||||
lock.lock();
|
||||
|
||||
while (tasks.size() >= 8) // FIXME hardcode queue length
|
||||
{
|
||||
condition.wait(lock);
|
||||
}
|
||||
|
||||
tasks.push(v);
|
||||
|
||||
lock.unlock();
|
||||
|
||||
condition.signal();
|
||||
}
|
||||
|
||||
void get(Task& v)
|
||||
{
|
||||
lock.lock();
|
||||
|
||||
while (tasks.size() == 0)
|
||||
{
|
||||
condition.wait(lock);
|
||||
}
|
||||
|
||||
v = tasks.front();
|
||||
tasks.pop();
|
||||
|
||||
lock.unlock();
|
||||
|
||||
condition.signal();
|
||||
}
|
||||
|
||||
private:
|
||||
ncnn::Mutex lock;
|
||||
ncnn::ConditionVariable condition;
|
||||
std::queue<Task> tasks;
|
||||
};
|
||||
|
||||
TaskQueue toproc;
|
||||
TaskQueue tosave;
|
||||
|
||||
class LoadThreadParams
|
||||
{
|
||||
public:
|
||||
int jobs_load;
|
||||
|
||||
// session data
|
||||
std::vector<path_t> input0_files;
|
||||
std::vector<path_t> input1_files;
|
||||
std::vector<path_t> output_files;
|
||||
std::vector<float> timesteps;
|
||||
};
|
||||
|
||||
void* load(void* args)
|
||||
{
|
||||
const LoadThreadParams* ltp = (const LoadThreadParams*)args;
|
||||
const int count = ltp->output_files.size();
|
||||
|
||||
#pragma omp parallel for schedule(static,1) num_threads(ltp->jobs_load)
|
||||
for (int i=0; i<count; i++)
|
||||
{
|
||||
const path_t& image0path = ltp->input0_files[i];
|
||||
const path_t& image1path = ltp->input1_files[i];
|
||||
|
||||
Task v;
|
||||
v.id = i;
|
||||
v.in0path = image0path;
|
||||
v.in1path = image1path;
|
||||
v.outpath = ltp->output_files[i];
|
||||
v.timestep = ltp->timesteps[i];
|
||||
|
||||
int ret0 = decode_image(image0path, v.in0image, &v.webp0);
|
||||
int ret1 = decode_image(image1path, v.in1image, &v.webp1);
|
||||
|
||||
if (ret0 != 0 || ret1 != 1)
|
||||
{
|
||||
v.outimage = ncnn::Mat(v.in0image.w, v.in0image.h, (size_t)3, 3);
|
||||
toproc.put(v);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
class ProcThreadParams
|
||||
{
|
||||
public:
|
||||
const RIFE* rife;
|
||||
};
|
||||
|
||||
void* proc(void* args)
|
||||
{
|
||||
const ProcThreadParams* ptp = (const ProcThreadParams*)args;
|
||||
const RIFE* rife = ptp->rife;
|
||||
|
||||
for (;;)
|
||||
{
|
||||
Task v;
|
||||
|
||||
toproc.get(v);
|
||||
|
||||
if (v.id == -233)
|
||||
break;
|
||||
|
||||
rife->process(v.in0image, v.in1image, v.timestep, v.outimage);
|
||||
|
||||
tosave.put(v);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
class SaveThreadParams
|
||||
{
|
||||
public:
|
||||
int verbose;
|
||||
};
|
||||
|
||||
void* save(void* args)
|
||||
{
|
||||
const SaveThreadParams* stp = (const SaveThreadParams*)args;
|
||||
const int verbose = stp->verbose;
|
||||
|
||||
for (;;)
|
||||
{
|
||||
Task v;
|
||||
|
||||
tosave.get(v);
|
||||
|
||||
if (v.id == -233)
|
||||
break;
|
||||
|
||||
int ret = encode_image(v.outpath, v.outimage);
|
||||
|
||||
// free input pixel data
|
||||
{
|
||||
unsigned char* pixeldata = (unsigned char*)v.in0image.data;
|
||||
if (v.webp0 == 1)
|
||||
{
|
||||
free(pixeldata);
|
||||
}
|
||||
else
|
||||
{
|
||||
#if _WIN32
|
||||
free(pixeldata);
|
||||
#else
|
||||
stbi_image_free(pixeldata);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
{
|
||||
unsigned char* pixeldata = (unsigned char*)v.in1image.data;
|
||||
if (v.webp1 == 1)
|
||||
{
|
||||
free(pixeldata);
|
||||
}
|
||||
else
|
||||
{
|
||||
#if _WIN32
|
||||
free(pixeldata);
|
||||
#else
|
||||
stbi_image_free(pixeldata);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
if (ret == 0)
|
||||
{
|
||||
if (verbose)
|
||||
{
|
||||
#if _WIN32
|
||||
fwprintf(stderr, L"%ls %ls %f -> %ls done\n", v.in0path.c_str(), v.in1path.c_str(), v.timestep, v.outpath.c_str());
|
||||
#else
|
||||
fprintf(stderr, "%s %s %f -> %s done\n", v.in0path.c_str(), v.in1path.c_str(), v.timestep, v.outpath.c_str());
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
#if _WIN32
|
||||
int wmain(int argc, wchar_t** argv)
|
||||
#else
|
||||
int main(int argc, char** argv)
|
||||
#endif
|
||||
{
|
||||
path_t input0path;
|
||||
path_t input1path;
|
||||
path_t inputpath;
|
||||
path_t outputpath;
|
||||
std::vector<int> tilesize;
|
||||
std::vector<int> gpuid;
|
||||
int jobs_load = 1;
|
||||
std::vector<int> jobs_proc;
|
||||
int jobs_save = 2;
|
||||
int verbose = 0;
|
||||
path_t format = PATHSTR("png");
|
||||
|
||||
#if _WIN32
|
||||
setlocale(LC_ALL, "");
|
||||
wchar_t opt;
|
||||
while ((opt = getopt(argc, argv, L"0:1:i:o:t:g:j:f:vh")) != (wchar_t)-1)
|
||||
{
|
||||
switch (opt)
|
||||
{
|
||||
case L'0':
|
||||
input0path = optarg;
|
||||
break;
|
||||
case L'1':
|
||||
input1path = optarg;
|
||||
break;
|
||||
case L'i':
|
||||
inputpath = optarg;
|
||||
break;
|
||||
case L'o':
|
||||
outputpath = optarg;
|
||||
break;
|
||||
case L't':
|
||||
tilesize = parse_optarg_int_array(optarg);
|
||||
break;
|
||||
case L'g':
|
||||
gpuid = parse_optarg_int_array(optarg);
|
||||
break;
|
||||
case L'j':
|
||||
swscanf(optarg, L"%d:%*[^:]:%d", &jobs_load, &jobs_save);
|
||||
jobs_proc = parse_optarg_int_array(wcschr(optarg, L':') + 1);
|
||||
break;
|
||||
case L'f':
|
||||
format = optarg;
|
||||
break;
|
||||
case L'v':
|
||||
verbose = 1;
|
||||
break;
|
||||
case L'h':
|
||||
default:
|
||||
print_usage();
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
#else // _WIN32
|
||||
int opt;
|
||||
while ((opt = getopt(argc, argv, "0:1:i:o:t:g:j:f:vh")) != -1)
|
||||
{
|
||||
switch (opt)
|
||||
{
|
||||
case '0':
|
||||
input0path = optarg;
|
||||
break;
|
||||
case '1':
|
||||
input1path = optarg;
|
||||
break;
|
||||
case 'i':
|
||||
inputpath = optarg;
|
||||
break;
|
||||
case 'o':
|
||||
outputpath = optarg;
|
||||
break;
|
||||
case 't':
|
||||
tilesize = parse_optarg_int_array(optarg);
|
||||
break;
|
||||
case 'g':
|
||||
gpuid = parse_optarg_int_array(optarg);
|
||||
break;
|
||||
case 'j':
|
||||
sscanf(optarg, "%d:%*[^:]:%d", &jobs_load, &jobs_save);
|
||||
jobs_proc = parse_optarg_int_array(strchr(optarg, ':') + 1);
|
||||
break;
|
||||
case 'f':
|
||||
format = optarg;
|
||||
break;
|
||||
case 'v':
|
||||
verbose = 1;
|
||||
break;
|
||||
case 'h':
|
||||
default:
|
||||
print_usage();
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
#endif // _WIN32
|
||||
|
||||
if (((input0path.empty() || input1path.empty()) && inputpath.empty()) || outputpath.empty())
|
||||
{
|
||||
print_usage();
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (tilesize.size() != (gpuid.empty() ? 1 : gpuid.size()) && !tilesize.empty())
|
||||
{
|
||||
fprintf(stderr, "invalid tilesize argument\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
for (int i=0; i<(int)tilesize.size(); i++)
|
||||
{
|
||||
if (tilesize[i] < 128 || tilesize[i] % 32 != 0)
|
||||
{
|
||||
fprintf(stderr, "invalid tilesize argument, must be >= 128, must be multiple of 32\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
if (jobs_load < 1 || jobs_save < 1)
|
||||
{
|
||||
fprintf(stderr, "invalid thread count argument\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (jobs_proc.size() != (gpuid.empty() ? 1 : gpuid.size()) && !jobs_proc.empty())
|
||||
{
|
||||
fprintf(stderr, "invalid jobs_proc thread count argument\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
for (int i=0; i<(int)jobs_proc.size(); i++)
|
||||
{
|
||||
if (jobs_proc[i] < 1)
|
||||
{
|
||||
fprintf(stderr, "invalid jobs_proc thread count argument\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
if (!path_is_directory(outputpath))
|
||||
{
|
||||
// guess format from outputpath no matter what format argument specified
|
||||
path_t ext = get_file_extension(outputpath);
|
||||
|
||||
if (ext == PATHSTR("png") || ext == PATHSTR("PNG"))
|
||||
{
|
||||
format = PATHSTR("png");
|
||||
}
|
||||
else if (ext == PATHSTR("webp") || ext == PATHSTR("WEBP"))
|
||||
{
|
||||
format = PATHSTR("webp");
|
||||
}
|
||||
else if (ext == PATHSTR("jpg") || ext == PATHSTR("JPG") || ext == PATHSTR("jpeg") || ext == PATHSTR("JPEG"))
|
||||
{
|
||||
format = PATHSTR("jpg");
|
||||
}
|
||||
else
|
||||
{
|
||||
fprintf(stderr, "invalid outputpath extension type\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
if (format != PATHSTR("png") && format != PATHSTR("webp") && format != PATHSTR("jpg"))
|
||||
{
|
||||
fprintf(stderr, "invalid format argument\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
// collect input and output filepath
|
||||
std::vector<path_t> input0_files;
|
||||
std::vector<path_t> input1_files;
|
||||
std::vector<path_t> output_files;
|
||||
std::vector<float> timesteps;
|
||||
{
|
||||
if (!inputpath.empty() && path_is_directory(inputpath) && path_is_directory(outputpath))
|
||||
{
|
||||
std::vector<path_t> filenames;
|
||||
int lr = list_directory(inputpath, filenames);
|
||||
if (lr != 0)
|
||||
return -1;
|
||||
|
||||
const int count = filenames.size();
|
||||
const int numframe = count * 2;
|
||||
|
||||
input0_files.resize(numframe);
|
||||
input1_files.resize(numframe);
|
||||
output_files.resize(numframe);
|
||||
timesteps.resize(numframe);
|
||||
|
||||
double scale = (double)count / numframe;
|
||||
for (int i=0; i<numframe; i++)
|
||||
{
|
||||
// TODO provide option to control timestep interpolate method
|
||||
// float fx = (float)((i + 0.5) * scale - 0.5);
|
||||
float fx = i * scale;
|
||||
int sx = static_cast<int>(floor(fx));
|
||||
fx -= sx;
|
||||
|
||||
if (sx < 0)
|
||||
{
|
||||
sx = 0;
|
||||
fx = 0.f;
|
||||
}
|
||||
if (sx >= count - 1)
|
||||
{
|
||||
sx = count - 2;
|
||||
fx = 1.f;
|
||||
}
|
||||
|
||||
// fprintf(stderr, "%d %f %d\n", i, fx, sx);
|
||||
|
||||
path_t filename0 = filenames[sx];
|
||||
path_t filename1 = filenames[sx + 1];
|
||||
|
||||
// TODO provide option to specify output filename scheme
|
||||
#if _WIN32
|
||||
wchar_t tmp[256];
|
||||
swprintf(tmp, L"%06d", i+1);
|
||||
#else
|
||||
char tmp[256];
|
||||
sprintf(tmp, "%06d", i+1); // ffmpeg start from 1
|
||||
#endif
|
||||
path_t output_filename = path_t(tmp) + PATHSTR('.') + format;
|
||||
|
||||
input0_files[i] = inputpath + PATHSTR('/') + filename0;
|
||||
input1_files[i] = inputpath + PATHSTR('/') + filename1;
|
||||
output_files[i] = outputpath + PATHSTR('/') + output_filename;
|
||||
timesteps[i] = fx;
|
||||
}
|
||||
}
|
||||
else if (inputpath.empty() && !path_is_directory(input0path) && !path_is_directory(input1path) && !path_is_directory(outputpath))
|
||||
{
|
||||
input0_files.push_back(input0path);
|
||||
input1_files.push_back(input1path);
|
||||
output_files.push_back(outputpath);
|
||||
timesteps.push_back(0.5f);
|
||||
}
|
||||
else
|
||||
{
|
||||
fprintf(stderr, "input0path, input1path and outputpath must be file at the same time\n");
|
||||
fprintf(stderr, "inputpath and outputpath must be directory at the same time\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
#if _WIN32
|
||||
CoInitializeEx(NULL, COINIT_MULTITHREADED);
|
||||
#endif
|
||||
|
||||
ncnn::create_gpu_instance();
|
||||
|
||||
if (gpuid.empty())
|
||||
{
|
||||
gpuid.push_back(ncnn::get_default_gpu_index());
|
||||
}
|
||||
|
||||
const int use_gpu_count = (int)gpuid.size();
|
||||
|
||||
if (jobs_proc.empty())
|
||||
{
|
||||
jobs_proc.resize(use_gpu_count, 2);
|
||||
}
|
||||
|
||||
if (tilesize.empty())
|
||||
{
|
||||
tilesize.resize(use_gpu_count, 512);
|
||||
}
|
||||
|
||||
int cpu_count = std::max(1, ncnn::get_cpu_count());
|
||||
jobs_load = std::min(jobs_load, cpu_count);
|
||||
jobs_save = std::min(jobs_save, cpu_count);
|
||||
|
||||
int gpu_count = ncnn::get_gpu_count();
|
||||
for (int i=0; i<use_gpu_count; i++)
|
||||
{
|
||||
if (gpuid[i] < 0 || gpuid[i] >= gpu_count)
|
||||
{
|
||||
fprintf(stderr, "invalid gpu device\n");
|
||||
|
||||
ncnn::destroy_gpu_instance();
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
int total_jobs_proc = 0;
|
||||
for (int i=0; i<use_gpu_count; i++)
|
||||
{
|
||||
int gpu_queue_count = ncnn::get_gpu_info(gpuid[i]).compute_queue_count;
|
||||
jobs_proc[i] = std::min(jobs_proc[i], gpu_queue_count);
|
||||
total_jobs_proc += jobs_proc[i];
|
||||
}
|
||||
|
||||
{
|
||||
std::vector<RIFE*> rife(use_gpu_count);
|
||||
|
||||
for (int i=0; i<use_gpu_count; i++)
|
||||
{
|
||||
rife[i] = new RIFE(gpuid[i]);
|
||||
|
||||
rife[i]->load();
|
||||
|
||||
rife[i]->tilesize = tilesize[i];
|
||||
}
|
||||
|
||||
// main routine
|
||||
{
|
||||
// load image
|
||||
LoadThreadParams ltp;
|
||||
ltp.jobs_load = jobs_load;
|
||||
ltp.input0_files = input0_files;
|
||||
ltp.input1_files = input1_files;
|
||||
ltp.output_files = output_files;
|
||||
ltp.timesteps = timesteps;
|
||||
|
||||
ncnn::Thread load_thread(load, (void*)<p);
|
||||
|
||||
// rife proc
|
||||
std::vector<ProcThreadParams> ptp(use_gpu_count);
|
||||
for (int i=0; i<use_gpu_count; i++)
|
||||
{
|
||||
ptp[i].rife = rife[i];
|
||||
}
|
||||
|
||||
std::vector<ncnn::Thread*> proc_threads(total_jobs_proc);
|
||||
{
|
||||
int total_jobs_proc_id = 0;
|
||||
for (int i=0; i<use_gpu_count; i++)
|
||||
{
|
||||
for (int j=0; j<jobs_proc[i]; j++)
|
||||
{
|
||||
proc_threads[total_jobs_proc_id++] = new ncnn::Thread(proc, (void*)&ptp[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// save image
|
||||
SaveThreadParams stp;
|
||||
stp.verbose = verbose;
|
||||
|
||||
std::vector<ncnn::Thread*> save_threads(jobs_save);
|
||||
for (int i=0; i<jobs_save; i++)
|
||||
{
|
||||
save_threads[i] = new ncnn::Thread(save, (void*)&stp);
|
||||
}
|
||||
|
||||
// end
|
||||
load_thread.join();
|
||||
|
||||
Task end;
|
||||
end.id = -233;
|
||||
|
||||
for (int i=0; i<total_jobs_proc; i++)
|
||||
{
|
||||
toproc.put(end);
|
||||
}
|
||||
|
||||
for (int i=0; i<total_jobs_proc; i++)
|
||||
{
|
||||
proc_threads[i]->join();
|
||||
delete proc_threads[i];
|
||||
}
|
||||
|
||||
for (int i=0; i<jobs_save; i++)
|
||||
{
|
||||
tosave.put(end);
|
||||
}
|
||||
|
||||
for (int i=0; i<jobs_save; i++)
|
||||
{
|
||||
save_threads[i]->join();
|
||||
delete save_threads[i];
|
||||
}
|
||||
}
|
||||
|
||||
for (int i=0; i<use_gpu_count; i++)
|
||||
{
|
||||
delete rife[i];
|
||||
}
|
||||
rife.clear();
|
||||
}
|
||||
|
||||
ncnn::destroy_gpu_instance();
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,409 @@
|
|||
// rife implemented with ncnn library
|
||||
|
||||
#include "rife.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
#include "benchmark.h"
|
||||
|
||||
#include "rife_preproc.comp.hex.h"
|
||||
#include "rife_postproc.comp.hex.h"
|
||||
|
||||
#include "rife_ops.h"
|
||||
|
||||
DEFINE_LAYER_CREATOR(Warp)
|
||||
|
||||
RIFE::RIFE(int gpuid)
|
||||
{
|
||||
tilesize = 4096;
|
||||
prepadding = 256;
|
||||
|
||||
vkdev = ncnn::get_gpu_device(gpuid);
|
||||
rife_preproc = 0;
|
||||
rife_postproc = 0;
|
||||
}
|
||||
|
||||
RIFE::~RIFE()
|
||||
{
|
||||
// cleanup preprocess and postprocess pipeline
|
||||
{
|
||||
delete rife_preproc;
|
||||
delete rife_postproc;
|
||||
}
|
||||
}
|
||||
|
||||
int RIFE::load()
|
||||
{
|
||||
ncnn::Option opt;
|
||||
opt.use_vulkan_compute = true;
|
||||
opt.use_fp16_packed = true;
|
||||
opt.use_fp16_storage = true;
|
||||
opt.use_fp16_arithmetic = false;
|
||||
opt.use_int8_storage = true;
|
||||
|
||||
flownet.opt = opt;
|
||||
contextnet.opt = opt;
|
||||
fusionnet.opt = opt;
|
||||
|
||||
flownet.set_vulkan_device(vkdev);
|
||||
contextnet.set_vulkan_device(vkdev);
|
||||
fusionnet.set_vulkan_device(vkdev);
|
||||
|
||||
flownet.register_custom_layer("rife.Warp", Warp_layer_creator);
|
||||
contextnet.register_custom_layer("rife.Warp", Warp_layer_creator);
|
||||
fusionnet.register_custom_layer("rife.Warp", Warp_layer_creator);
|
||||
|
||||
flownet.load_param("flownet.param");
|
||||
flownet.load_model("flownet.bin");
|
||||
|
||||
contextnet.load_param("contextnet.param");
|
||||
contextnet.load_model("contextnet.bin");
|
||||
|
||||
fusionnet.load_param("fusionnet.param");
|
||||
fusionnet.load_model("fusionnet.bin");
|
||||
|
||||
// initialize preprocess and postprocess pipeline
|
||||
{
|
||||
std::vector<ncnn::vk_specialization_type> specializations(1);
|
||||
#if _WIN32
|
||||
specializations[0].i = 1;
|
||||
#else
|
||||
specializations[0].i = 0;
|
||||
#endif
|
||||
|
||||
{
|
||||
static std::vector<uint32_t> spirv;
|
||||
static ncnn::Mutex lock;
|
||||
{
|
||||
ncnn::MutexLockGuard guard(lock);
|
||||
if (spirv.empty())
|
||||
{
|
||||
compile_spirv_module(rife_preproc_comp_data, sizeof(rife_preproc_comp_data), opt, spirv);
|
||||
}
|
||||
}
|
||||
|
||||
rife_preproc = new ncnn::Pipeline(vkdev);
|
||||
rife_preproc->set_optimal_local_size_xyz(8, 8, 3);
|
||||
rife_preproc->create(spirv.data(), spirv.size() * 4, specializations);
|
||||
}
|
||||
|
||||
{
|
||||
static std::vector<uint32_t> spirv;
|
||||
static ncnn::Mutex lock;
|
||||
{
|
||||
ncnn::MutexLockGuard guard(lock);
|
||||
if (spirv.empty())
|
||||
{
|
||||
compile_spirv_module(rife_postproc_comp_data, sizeof(rife_postproc_comp_data), opt, spirv);
|
||||
}
|
||||
}
|
||||
|
||||
rife_postproc = new ncnn::Pipeline(vkdev);
|
||||
rife_postproc->set_optimal_local_size_xyz(8, 8, 3);
|
||||
rife_postproc->create(spirv.data(), spirv.size() * 4, specializations);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int RIFE::process(const ncnn::Mat& in0image, const ncnn::Mat& in1image, float timestep, ncnn::Mat& outimage) const
|
||||
{
|
||||
if (timestep == 0.f)
|
||||
{
|
||||
outimage = in0image;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (timestep == 1.f)
|
||||
{
|
||||
outimage = in1image;
|
||||
return 0;
|
||||
}
|
||||
|
||||
const unsigned char* pixel0data = (const unsigned char*)in0image.data;
|
||||
const unsigned char* pixel1data = (const unsigned char*)in1image.data;
|
||||
const int w = in0image.w;
|
||||
const int h = in0image.h;
|
||||
const int channels = 3;//in0image.elempack;
|
||||
|
||||
const int TILE_SIZE_X = tilesize;
|
||||
const int TILE_SIZE_Y = tilesize;
|
||||
|
||||
// fprintf(stderr, "%d x %d\n", w, h);
|
||||
|
||||
ncnn::VkAllocator* blob_vkallocator = vkdev->acquire_blob_allocator();
|
||||
ncnn::VkAllocator* staging_vkallocator = vkdev->acquire_staging_allocator();
|
||||
|
||||
ncnn::Option opt = flownet.opt;
|
||||
opt.blob_vkallocator = blob_vkallocator;
|
||||
opt.workspace_vkallocator = blob_vkallocator;
|
||||
opt.staging_vkallocator = staging_vkallocator;
|
||||
|
||||
// pad to 32n
|
||||
int w_padded = (w + 31) / 32 * 32;
|
||||
int h_padded = (h + 31) / 32 * 32;
|
||||
|
||||
// each tile 100x100
|
||||
const int xtiles = (w_padded + TILE_SIZE_X - 1) / TILE_SIZE_X;
|
||||
const int ytiles = (h_padded + TILE_SIZE_Y - 1) / TILE_SIZE_Y;
|
||||
|
||||
// fprintf(stderr, "tiles %d %d\n", xtiles, ytiles);
|
||||
|
||||
const size_t in_out_tile_elemsize = opt.use_fp16_storage ? 2u : 4u;
|
||||
|
||||
//#pragma omp parallel for num_threads(2)
|
||||
for (int yi = 0; yi < ytiles; yi++)
|
||||
{
|
||||
int in_tile_y0 = std::max(yi * TILE_SIZE_Y - prepadding, 0);
|
||||
int in_tile_y1 = std::min((yi + 1) * TILE_SIZE_Y + prepadding, h);
|
||||
|
||||
// fprintf(stderr, "in_tile_y0 %d %d\n", in_tile_y0, in_tile_y1);
|
||||
|
||||
ncnn::Mat in0;
|
||||
ncnn::Mat in1;
|
||||
if (opt.use_fp16_storage && opt.use_int8_storage)
|
||||
{
|
||||
in0 = ncnn::Mat(w, (in_tile_y1 - in_tile_y0), (unsigned char*)pixel0data + in_tile_y0 * w * channels, (size_t)channels, 1);
|
||||
in1 = ncnn::Mat(w, (in_tile_y1 - in_tile_y0), (unsigned char*)pixel1data + in_tile_y0 * w * channels, (size_t)channels, 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
#if _WIN32
|
||||
in0 = ncnn::Mat::from_pixels(pixel0data + in_tile_y0 * w * channels, ncnn::Mat::PIXEL_BGR2RGB, w, (in_tile_y1 - in_tile_y0));
|
||||
in1 = ncnn::Mat::from_pixels(pixel1data + in_tile_y0 * w * channels, ncnn::Mat::PIXEL_BGR2RGB, w, (in_tile_y1 - in_tile_y0));
|
||||
#else
|
||||
in0 = ncnn::Mat::from_pixels(pixel0data + in_tile_y0 * w * channels, ncnn::Mat::PIXEL_RGB, w, (in_tile_y1 - in_tile_y0));
|
||||
in1 = ncnn::Mat::from_pixels(pixel1data + in_tile_y0 * w * channels, ncnn::Mat::PIXEL_RGB, w, (in_tile_y1 - in_tile_y0));
|
||||
#endif
|
||||
}
|
||||
|
||||
ncnn::VkCompute cmd(vkdev);
|
||||
|
||||
// upload
|
||||
ncnn::VkMat in0_gpu;
|
||||
ncnn::VkMat in1_gpu;
|
||||
{
|
||||
cmd.record_clone(in0, in0_gpu, opt);
|
||||
cmd.record_clone(in1, in1_gpu, opt);
|
||||
|
||||
if (xtiles > 1)
|
||||
{
|
||||
cmd.submit_and_wait();
|
||||
cmd.reset();
|
||||
}
|
||||
}
|
||||
|
||||
int out_tile_y0 = yi * TILE_SIZE_Y;
|
||||
int out_tile_y1 = std::min((yi + 1) * TILE_SIZE_Y, h);
|
||||
|
||||
ncnn::VkMat out_gpu;
|
||||
if (opt.use_fp16_storage && opt.use_int8_storage)
|
||||
{
|
||||
out_gpu.create(w, (out_tile_y1 - out_tile_y0), (size_t)channels, 1, blob_vkallocator);
|
||||
}
|
||||
else
|
||||
{
|
||||
out_gpu.create(w, (out_tile_y1 - out_tile_y0), channels, (size_t)4u, 1, blob_vkallocator);
|
||||
}
|
||||
|
||||
for (int xi = 0; xi < xtiles; xi++)
|
||||
{
|
||||
// preproc
|
||||
ncnn::VkMat in0_tile_gpu;
|
||||
ncnn::VkMat in1_tile_gpu;
|
||||
{
|
||||
// crop tile
|
||||
int tile_x0 = xi * TILE_SIZE_X - prepadding;
|
||||
int tile_x1 = std::min((xi + 1) * TILE_SIZE_X, w_padded) + prepadding;
|
||||
int tile_y0 = yi * TILE_SIZE_Y - prepadding;
|
||||
int tile_y1 = std::min((yi + 1) * TILE_SIZE_Y, h_padded) + prepadding;
|
||||
|
||||
in0_tile_gpu.create(tile_x1 - tile_x0, tile_y1 - tile_y0, 3, in_out_tile_elemsize, 1, blob_vkallocator);
|
||||
|
||||
std::vector<ncnn::VkMat> bindings(2);
|
||||
bindings[0] = in0_gpu;
|
||||
bindings[1] = in0_tile_gpu;
|
||||
|
||||
std::vector<ncnn::vk_constant_type> constants(9);
|
||||
constants[0].i = in0_gpu.w;
|
||||
constants[1].i = in0_gpu.h;
|
||||
constants[2].i = in0_gpu.cstep;
|
||||
constants[3].i = in0_tile_gpu.w;
|
||||
constants[4].i = in0_tile_gpu.h;
|
||||
constants[5].i = in0_tile_gpu.cstep;
|
||||
constants[6].i = prepadding;
|
||||
constants[7].i = std::max(prepadding - yi * TILE_SIZE_Y, 0);
|
||||
constants[8].i = xi * TILE_SIZE_X;
|
||||
|
||||
cmd.record_pipeline(rife_preproc, bindings, constants, in0_tile_gpu);
|
||||
}
|
||||
{
|
||||
// crop tile
|
||||
int tile_x0 = xi * TILE_SIZE_X - prepadding;
|
||||
int tile_x1 = std::min((xi + 1) * TILE_SIZE_X, w_padded) + prepadding;
|
||||
int tile_y0 = yi * TILE_SIZE_Y - prepadding;
|
||||
int tile_y1 = std::min((yi + 1) * TILE_SIZE_Y, h_padded) + prepadding;
|
||||
|
||||
in1_tile_gpu.create(tile_x1 - tile_x0, tile_y1 - tile_y0, 3, in_out_tile_elemsize, 1, blob_vkallocator);
|
||||
|
||||
std::vector<ncnn::VkMat> bindings(2);
|
||||
bindings[0] = in1_gpu;
|
||||
bindings[1] = in1_tile_gpu;
|
||||
|
||||
std::vector<ncnn::vk_constant_type> constants(9);
|
||||
constants[0].i = in1_gpu.w;
|
||||
constants[1].i = in1_gpu.h;
|
||||
constants[2].i = in1_gpu.cstep;
|
||||
constants[3].i = in1_tile_gpu.w;
|
||||
constants[4].i = in1_tile_gpu.h;
|
||||
constants[5].i = in1_tile_gpu.cstep;
|
||||
constants[6].i = prepadding;
|
||||
constants[7].i = std::max(prepadding - yi * TILE_SIZE_Y, 0);
|
||||
constants[8].i = xi * TILE_SIZE_X;
|
||||
|
||||
cmd.record_pipeline(rife_preproc, bindings, constants, in1_tile_gpu);
|
||||
}
|
||||
|
||||
// fprintf(stderr, "in0_tile_gpu %d %d\n", in0_tile_gpu.w, in0_tile_gpu.h);
|
||||
|
||||
// flownet
|
||||
ncnn::VkMat flow;
|
||||
{
|
||||
ncnn::Extractor ex = flownet.create_extractor();
|
||||
ex.set_blob_vkallocator(blob_vkallocator);
|
||||
ex.set_workspace_vkallocator(blob_vkallocator);
|
||||
ex.set_staging_vkallocator(staging_vkallocator);
|
||||
|
||||
ex.input("input0", in0_tile_gpu);
|
||||
ex.input("input1", in1_tile_gpu);
|
||||
ex.extract("758", flow, cmd);
|
||||
}
|
||||
|
||||
// contextnet
|
||||
ncnn::VkMat ctx0[4];
|
||||
ncnn::VkMat ctx1[4];
|
||||
{
|
||||
ncnn::Extractor ex = contextnet.create_extractor();
|
||||
ex.set_blob_vkallocator(blob_vkallocator);
|
||||
ex.set_workspace_vkallocator(blob_vkallocator);
|
||||
ex.set_staging_vkallocator(staging_vkallocator);
|
||||
|
||||
ex.input("input.1", in0_tile_gpu);
|
||||
ex.input("flow.0", flow);
|
||||
ex.extract("63", ctx0[0], cmd);
|
||||
ex.extract("97", ctx0[1], cmd);
|
||||
ex.extract("131", ctx0[2], cmd);
|
||||
ex.extract("165", ctx0[3], cmd);
|
||||
}
|
||||
{
|
||||
ncnn::Extractor ex = contextnet.create_extractor();
|
||||
ex.set_blob_vkallocator(blob_vkallocator);
|
||||
ex.set_workspace_vkallocator(blob_vkallocator);
|
||||
ex.set_staging_vkallocator(staging_vkallocator);
|
||||
|
||||
ex.input("input.1", in1_tile_gpu);
|
||||
ex.input("flow.1", flow);
|
||||
ex.extract("63", ctx1[0], cmd);
|
||||
ex.extract("97", ctx1[1], cmd);
|
||||
ex.extract("131", ctx1[2], cmd);
|
||||
ex.extract("165", ctx1[3], cmd);
|
||||
}
|
||||
|
||||
// fusionnet
|
||||
ncnn::VkMat warped_img0;
|
||||
ncnn::VkMat warped_img1;
|
||||
ncnn::VkMat refine_output;
|
||||
{
|
||||
ncnn::Extractor ex = fusionnet.create_extractor();
|
||||
ex.set_blob_vkallocator(blob_vkallocator);
|
||||
ex.set_workspace_vkallocator(blob_vkallocator);
|
||||
ex.set_staging_vkallocator(staging_vkallocator);
|
||||
|
||||
ex.input("img0", in0_tile_gpu);
|
||||
ex.input("img1", in1_tile_gpu);
|
||||
ex.input("flow", flow);
|
||||
ex.input("3", ctx0[0]);
|
||||
ex.input("4", ctx0[1]);
|
||||
ex.input("5", ctx0[2]);
|
||||
ex.input("6", ctx0[3]);
|
||||
ex.input("7", ctx1[0]);
|
||||
ex.input("8", ctx1[1]);
|
||||
ex.input("9", ctx1[2]);
|
||||
ex.input("10", ctx1[3]);
|
||||
ex.extract("70", warped_img0, cmd);
|
||||
ex.extract("77", warped_img1, cmd);
|
||||
ex.extract("162", refine_output, cmd);
|
||||
}
|
||||
|
||||
ncnn::VkMat out_gpu_padded;
|
||||
// TODO implement this in postproc
|
||||
// res = torch.sigmoid(refine_output[:, :3]) * 2 - 1
|
||||
// mask = torch.sigmoid(refine_output[:, 3:4])
|
||||
// merged_img = warped_img0 * mask + warped_img1 * (1 - mask)
|
||||
// pred = merged_img + res
|
||||
// pred = torch.clamp(pred, 0, 1)
|
||||
|
||||
// postproc
|
||||
{
|
||||
std::vector<ncnn::VkMat> bindings(2);
|
||||
bindings[0] = out_gpu_padded;
|
||||
bindings[1] = out_gpu;
|
||||
|
||||
std::vector<ncnn::vk_constant_type> constants(9);
|
||||
constants[0].i = out_gpu_padded.w;
|
||||
constants[1].i = out_gpu_padded.h;
|
||||
constants[2].i = out_gpu_padded.cstep;
|
||||
constants[3].i = out_gpu.w;
|
||||
constants[4].i = out_gpu.h;
|
||||
constants[5].i = out_gpu.cstep;
|
||||
constants[6].i = prepadding;
|
||||
constants[7].i = prepadding;
|
||||
constants[8].i = xi * TILE_SIZE_X;
|
||||
|
||||
ncnn::VkMat dispatcher;
|
||||
dispatcher.w = std::min((xi + 1) * TILE_SIZE_X, w) - xi * TILE_SIZE_X;
|
||||
dispatcher.h = out_gpu.h;
|
||||
dispatcher.c = 3;
|
||||
|
||||
cmd.record_pipeline(rife_postproc, bindings, constants, dispatcher);
|
||||
}
|
||||
|
||||
if (xtiles > 1)
|
||||
{
|
||||
cmd.submit_and_wait();
|
||||
cmd.reset();
|
||||
}
|
||||
|
||||
// fprintf(stderr, "%.2f%%\n", (float)(yi * xtiles + xi) / (ytiles * xtiles) * 100);
|
||||
}
|
||||
|
||||
// download
|
||||
{
|
||||
ncnn::Mat out;
|
||||
|
||||
if (opt.use_fp16_storage && opt.use_int8_storage)
|
||||
{
|
||||
out = ncnn::Mat(out_gpu.w, out_gpu.h, (unsigned char*)outimage.data + out_tile_y0 * w * channels, (size_t)channels, 1);
|
||||
}
|
||||
|
||||
cmd.record_clone(out_gpu, out, opt);
|
||||
|
||||
cmd.submit_and_wait();
|
||||
|
||||
if (!(opt.use_fp16_storage && opt.use_int8_storage))
|
||||
{
|
||||
#if _WIN32
|
||||
out.to_pixels((unsigned char*)outimage.data + out_tile_y0 * w * channels, ncnn::Mat::PIXEL_RGB2BGR);
|
||||
#else
|
||||
out.to_pixels((unsigned char*)outimage.data + out_tile_y0 * w * channels, ncnn::Mat::PIXEL_RGB);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
vkdev->reclaim_blob_allocator(blob_vkallocator);
|
||||
vkdev->reclaim_staging_allocator(staging_vkallocator);
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,35 @@
|
|||
// rife implemented with ncnn library
|
||||
|
||||
#ifndef RIFE_H
|
||||
#define RIFE_H
|
||||
|
||||
#include <string>
|
||||
|
||||
// ncnn
|
||||
#include "net.h"
|
||||
|
||||
class RIFE
|
||||
{
|
||||
public:
|
||||
RIFE(int gpuid);
|
||||
~RIFE();
|
||||
|
||||
int load();
|
||||
|
||||
int process(const ncnn::Mat& in0image, const ncnn::Mat& in1image, float timestep, ncnn::Mat& outimage) const;
|
||||
|
||||
public:
|
||||
// rife parameters
|
||||
int tilesize;
|
||||
int prepadding;
|
||||
|
||||
private:
|
||||
ncnn::VulkanDevice* vkdev;
|
||||
ncnn::Net flownet;
|
||||
ncnn::Net contextnet;
|
||||
ncnn::Net fusionnet;
|
||||
ncnn::Pipeline* rife_preproc;
|
||||
ncnn::Pipeline* rife_postproc;
|
||||
};
|
||||
|
||||
#endif // RIFE_H
|
|
@ -0,0 +1,26 @@
|
|||
// rife implemented with ncnn library
|
||||
|
||||
#ifndef RIFE_OPS_H
|
||||
#define RIFE_OPS_H
|
||||
|
||||
#include <vector>
|
||||
|
||||
// ncnn
|
||||
#include "layer.h"
|
||||
#include "pipeline.h"
|
||||
|
||||
class Warp : public ncnn::Layer
|
||||
{
|
||||
public:
|
||||
Warp();
|
||||
virtual int create_pipeline(const ncnn::Option& opt);
|
||||
virtual int destroy_pipeline(const ncnn::Option& opt);
|
||||
virtual int forward(const std::vector<ncnn::Mat>& bottom_blobs, std::vector<ncnn::Mat>& top_blobs, const ncnn::Option& opt) const;
|
||||
virtual int forward(const std::vector<ncnn::VkMat>& bottom_blobs, std::vector<ncnn::VkMat>& top_blobs, ncnn::VkCompute& cmd, const ncnn::Option& opt) const;
|
||||
|
||||
private:
|
||||
ncnn::Pipeline* pipeline_warp;
|
||||
ncnn::Pipeline* pipeline_warp_pack4;
|
||||
};
|
||||
|
||||
#endif // RIFE_OPS_H
|
|
@ -0,0 +1,71 @@
|
|||
// rife implemented with ncnn library
|
||||
|
||||
#version 450
|
||||
|
||||
#if NCNN_fp16_storage
|
||||
#extension GL_EXT_shader_16bit_storage: require
|
||||
#endif
|
||||
|
||||
#if NCNN_int8_storage
|
||||
#extension GL_EXT_shader_8bit_storage: require
|
||||
#endif
|
||||
|
||||
layout (constant_id = 0) const int bgr = 0;
|
||||
|
||||
layout (binding = 0) readonly buffer bottom_blob { sfp bottom_blob_data[]; };
|
||||
#if NCNN_int8_storage
|
||||
layout (binding = 1) writeonly buffer top_blob { uint8_t top_blob_data[]; };
|
||||
#else
|
||||
layout (binding = 1) writeonly buffer top_blob { float top_blob_data[]; };
|
||||
#endif
|
||||
|
||||
layout (push_constant) uniform parameter
|
||||
{
|
||||
int w;
|
||||
int h;
|
||||
int cstep;
|
||||
|
||||
int outw;
|
||||
int outh;
|
||||
int outcstep;
|
||||
|
||||
int pad_x;
|
||||
int pad_y;
|
||||
|
||||
int crop_x;
|
||||
} p;
|
||||
|
||||
void main()
|
||||
{
|
||||
int gx = int(gl_GlobalInvocationID.x);
|
||||
int gy = int(gl_GlobalInvocationID.y);
|
||||
int gz = int(gl_GlobalInvocationID.z);
|
||||
|
||||
if (gx >= p.outw || gy >= p.outh || gz >= 3)
|
||||
return;
|
||||
|
||||
int x = gx + p.pad_x;
|
||||
int y = gy + p.pad_y;
|
||||
|
||||
float v = float(bottom_blob_data[gz * p.cstep + y * p.w + x]);
|
||||
|
||||
const float denorm_val = 255.f;
|
||||
const float clip_eps = 0.5f;
|
||||
|
||||
v = v * denorm_val + clip_eps;
|
||||
|
||||
#if NCNN_int8_storage
|
||||
int v_offset = gy * p.outw + gx + p.crop_x;
|
||||
|
||||
uint v32 = clamp(uint(floor(v)), 0, 255);
|
||||
|
||||
if (bgr == 0)
|
||||
top_blob_data[v_offset * 3 + gz] = uint8_t(v32);
|
||||
else
|
||||
top_blob_data[v_offset * 3 + 2 - gz] = uint8_t(v32);
|
||||
#else
|
||||
int v_offset = gz * p.outcstep + gy * p.outw + gx + p.crop_x;
|
||||
|
||||
top_blob_data[v_offset] = v;
|
||||
#endif
|
||||
}
|
|
@ -0,0 +1,72 @@
|
|||
// rife implemented with ncnn library
|
||||
|
||||
#version 450
|
||||
|
||||
#if NCNN_fp16_storage
|
||||
#extension GL_EXT_shader_16bit_storage: require
|
||||
#endif
|
||||
|
||||
#if NCNN_int8_storage
|
||||
#extension GL_EXT_shader_8bit_storage: require
|
||||
#endif
|
||||
|
||||
layout (constant_id = 0) const int bgr = 0;
|
||||
|
||||
#if NCNN_int8_storage
|
||||
layout (binding = 0) readonly buffer bottom_blob { uint8_t bottom_blob_data[]; };
|
||||
#else
|
||||
layout (binding = 0) readonly buffer bottom_blob { float bottom_blob_data[]; };
|
||||
#endif
|
||||
layout (binding = 1) writeonly buffer top_blob { sfp top_blob_data[]; };
|
||||
|
||||
layout (push_constant) uniform parameter
|
||||
{
|
||||
int w;
|
||||
int h;
|
||||
int cstep;
|
||||
|
||||
int outw;
|
||||
int outh;
|
||||
int outcstep;
|
||||
|
||||
int pad_x;
|
||||
int pad_y;
|
||||
|
||||
int crop_x;
|
||||
} p;
|
||||
|
||||
void main()
|
||||
{
|
||||
int gx = int(gl_GlobalInvocationID.x);
|
||||
int gy = int(gl_GlobalInvocationID.y);
|
||||
int gz = int(gl_GlobalInvocationID.z);
|
||||
|
||||
if (gx >= p.outw || gy >= p.outh || gz >= 3)
|
||||
return;
|
||||
|
||||
int x = gx - p.pad_x + p.crop_x;
|
||||
int y = gy - p.pad_y;
|
||||
|
||||
// border replicate
|
||||
x = clamp(x, 0, p.w - 1);
|
||||
y = clamp(y, 0, p.h - 1);
|
||||
|
||||
#if NCNN_int8_storage
|
||||
int v_offset = y * p.w + x;
|
||||
|
||||
float v;
|
||||
|
||||
if (bgr == 0)
|
||||
v = float(uint(bottom_blob_data[v_offset * 3 + gz]));
|
||||
else
|
||||
v = float(uint(bottom_blob_data[v_offset * 3 + 2 - gz]));
|
||||
#else
|
||||
int v_offset = gz * p.cstep + y * p.w + x;
|
||||
|
||||
float v = bottom_blob_data[v_offset];
|
||||
#endif
|
||||
|
||||
const float norm_val = 1 / 255.f;
|
||||
|
||||
top_blob_data[gz * p.outcstep + gy * p.outw + gx] = sfp(v * norm_val);
|
||||
}
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,71 @@
|
|||
// rife implemented with ncnn library
|
||||
|
||||
#version 450
|
||||
|
||||
#if NCNN_fp16_storage
|
||||
#extension GL_EXT_shader_16bit_storage: require
|
||||
#endif
|
||||
#if NCNN_fp16_arithmetic
|
||||
#extension GL_EXT_shader_explicit_arithmetic_types_float16: require
|
||||
#endif
|
||||
|
||||
layout (binding = 0) readonly buffer image_blob { sfp image_blob_data[]; };
|
||||
layout (binding = 1) readonly buffer flow_blob { sfp flow_blob_data[]; };
|
||||
layout (binding = 2) writeonly buffer top_blob { sfp top_blob_data[]; };
|
||||
|
||||
layout (push_constant) uniform parameter
|
||||
{
|
||||
int w;
|
||||
int h;
|
||||
int c;
|
||||
int cstep;
|
||||
} p;
|
||||
|
||||
void main()
|
||||
{
|
||||
int gx = int(gl_GlobalInvocationID.x);
|
||||
int gy = int(gl_GlobalInvocationID.y);
|
||||
int gz = int(gl_GlobalInvocationID.z);
|
||||
|
||||
if (gx >= p.w || gy >= p.h || gz >= p.c)
|
||||
return;
|
||||
|
||||
afp flow_x = buffer_ld1(flow_blob_data, gy * p.w + gx);
|
||||
afp flow_y = buffer_ld1(flow_blob_data, p.cstep + gy * p.w + gx);
|
||||
|
||||
afp sample_x = afp(gx) + flow_x;
|
||||
afp sample_y = afp(gy) + flow_y;
|
||||
|
||||
// bilinear interpolate
|
||||
afp v;
|
||||
{
|
||||
int x0 = int(floor(sample_x));
|
||||
int y0 = int(floor(sample_y));
|
||||
int x1 = x0 + 1;
|
||||
int y1 = y0 + 1;
|
||||
|
||||
if (x0 < 0 || y0 < 0 || x0 >= p.w - 1 || y0 >= p.h - 1)
|
||||
{
|
||||
v = afp(0.f);
|
||||
}
|
||||
else
|
||||
{
|
||||
afp alpha = sample_x - afp(x0);
|
||||
afp beta = sample_y - afp(y0);
|
||||
|
||||
afp v0 = buffer_ld1(image_blob_data, gz * p.cstep + y0 * p.w + x0);
|
||||
afp v1 = buffer_ld1(image_blob_data, gz * p.cstep + y0 * p.w + x1);
|
||||
afp v2 = buffer_ld1(image_blob_data, gz * p.cstep + y1 * p.w + x0);
|
||||
afp v3 = buffer_ld1(image_blob_data, gz * p.cstep + y1 * p.w + x1);
|
||||
|
||||
afp v4 = v0 * (afp(1.f) - alpha) + v1 * alpha;
|
||||
afp v5 = v2 * (afp(1.f) - alpha) + v3 * alpha;
|
||||
|
||||
v = v4 * (afp(1.f) - beta) + v5 * beta;
|
||||
}
|
||||
}
|
||||
|
||||
const int gi = gz * p.cstep + gy * p.w + gx;
|
||||
|
||||
buffer_st1(top_blob_data, gi, v);
|
||||
}
|
|
@ -0,0 +1,181 @@
|
|||
// rife implemented with ncnn library
|
||||
|
||||
#include "rife_ops.h"
|
||||
|
||||
#include "warp.comp.hex.h"
|
||||
#include "warp_pack4.comp.hex.h"
|
||||
|
||||
using namespace ncnn;
|
||||
|
||||
Warp::Warp()
|
||||
{
|
||||
support_vulkan = true;
|
||||
|
||||
pipeline_warp = 0;
|
||||
pipeline_warp_pack4 = 0;
|
||||
}
|
||||
|
||||
int Warp::create_pipeline(const Option& opt)
|
||||
{
|
||||
std::vector<vk_specialization_type> specializations(0 + 0);
|
||||
|
||||
// pack1
|
||||
{
|
||||
static std::vector<uint32_t> spirv;
|
||||
static ncnn::Mutex lock;
|
||||
{
|
||||
ncnn::MutexLockGuard guard(lock);
|
||||
if (spirv.empty())
|
||||
{
|
||||
compile_spirv_module(warp_comp_data, sizeof(warp_comp_data), opt, spirv);
|
||||
}
|
||||
}
|
||||
|
||||
pipeline_warp = new Pipeline(vkdev);
|
||||
pipeline_warp->set_optimal_local_size_xyz();
|
||||
pipeline_warp->create(spirv.data(), spirv.size() * 4, specializations);
|
||||
}
|
||||
|
||||
// pack4
|
||||
{
|
||||
static std::vector<uint32_t> spirv;
|
||||
static ncnn::Mutex lock;
|
||||
{
|
||||
ncnn::MutexLockGuard guard(lock);
|
||||
if (spirv.empty())
|
||||
{
|
||||
compile_spirv_module(warp_pack4_comp_data, sizeof(warp_pack4_comp_data), opt, spirv);
|
||||
}
|
||||
}
|
||||
|
||||
pipeline_warp_pack4 = new Pipeline(vkdev);
|
||||
pipeline_warp_pack4->set_optimal_local_size_xyz();
|
||||
pipeline_warp_pack4->create(spirv.data(), spirv.size() * 4, specializations);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int Warp::destroy_pipeline(const Option& opt)
|
||||
{
|
||||
delete pipeline_warp;
|
||||
pipeline_warp = 0;
|
||||
|
||||
delete pipeline_warp_pack4;
|
||||
pipeline_warp_pack4 = 0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int Warp::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const
|
||||
{
|
||||
const Mat& image_blob = bottom_blobs[0];
|
||||
const Mat& flow_blob = bottom_blobs[1];
|
||||
|
||||
int w = image_blob.w;
|
||||
int h = image_blob.h;
|
||||
int channels = image_blob.c;
|
||||
|
||||
Mat& top_blob = top_blobs[0];
|
||||
top_blob.create(w, h, channels);
|
||||
if (top_blob.empty())
|
||||
return -100;
|
||||
|
||||
#pragma omp parallel for
|
||||
for (int q = 0; q < channels; q++)
|
||||
{
|
||||
float* outptr = top_blob.channel(q);
|
||||
|
||||
const Mat image = image_blob.channel(q);
|
||||
|
||||
const float* fxptr = flow_blob.channel(0);
|
||||
const float* fyptr = flow_blob.channel(1);
|
||||
|
||||
for (int y = 0; y < h; y++)
|
||||
{
|
||||
for (int x = 0; x < w; x++)
|
||||
{
|
||||
float flow_x = fxptr[0];
|
||||
float flow_y = fyptr[0];
|
||||
|
||||
float sample_x = x + flow_x;
|
||||
float sample_y = y + flow_y;
|
||||
|
||||
// bilinear interpolate
|
||||
float v;
|
||||
{
|
||||
int x0 = floor(sample_x);
|
||||
int y0 = floor(sample_y);
|
||||
|
||||
if (x0 < 0 || y0 < 0 || x0 >= w - 1 || y0 >= h - 1)
|
||||
{
|
||||
v = 0.f;
|
||||
}
|
||||
else
|
||||
{
|
||||
float alpha = sample_x - x0;
|
||||
float beta = sample_y - y0;
|
||||
|
||||
float v0 = image.row(y0)[x0];
|
||||
float v1 = image.row(y0)[x0 + 1];
|
||||
float v2 = image.row(y0 + 1)[x0];
|
||||
float v3 = image.row(y0 + 1)[x0 + 1];
|
||||
|
||||
float v4 = v0 * (1 - alpha) + v1 * alpha;
|
||||
float v5 = v2 * (1 - alpha) + v3 * alpha;
|
||||
|
||||
v = v4 * (1 - beta) + v5 * beta;
|
||||
}
|
||||
}
|
||||
|
||||
outptr[0] = v;
|
||||
|
||||
outptr += 1;
|
||||
|
||||
fxptr += 1;
|
||||
fyptr += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int Warp::forward(const std::vector<VkMat>& bottom_blobs, std::vector<VkMat>& top_blobs, VkCompute& cmd, const Option& opt) const
|
||||
{
|
||||
const VkMat& image_blob = bottom_blobs[0];
|
||||
const VkMat& flow_blob = bottom_blobs[1];
|
||||
|
||||
int w = image_blob.w;
|
||||
int h = image_blob.h;
|
||||
int channels = image_blob.c;
|
||||
size_t elemsize = image_blob.elemsize;
|
||||
int elempack = image_blob.elempack;
|
||||
|
||||
VkMat& top_blob = top_blobs[0];
|
||||
top_blob.create(w, h, channels, elemsize, elempack, opt.blob_vkallocator);
|
||||
if (top_blob.empty())
|
||||
return -100;
|
||||
|
||||
std::vector<VkMat> bindings(3);
|
||||
bindings[0] = image_blob;
|
||||
bindings[1] = flow_blob;
|
||||
bindings[2] = top_blob;
|
||||
|
||||
std::vector<vk_constant_type> constants(4);
|
||||
constants[0].i = top_blob.w;
|
||||
constants[1].i = top_blob.h;
|
||||
constants[2].i = top_blob.c;
|
||||
constants[3].i = top_blob.cstep;
|
||||
|
||||
if (elempack == 4)
|
||||
{
|
||||
cmd.record_pipeline(pipeline_warp_pack4, bindings, constants, top_blob);
|
||||
}
|
||||
else // if (elempack == 1)
|
||||
{
|
||||
cmd.record_pipeline(pipeline_warp, bindings, constants, top_blob);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,71 @@
|
|||
// rife implemented with ncnn library
|
||||
|
||||
#version 450
|
||||
|
||||
#if NCNN_fp16_storage
|
||||
#extension GL_EXT_shader_16bit_storage: require
|
||||
#endif
|
||||
#if NCNN_fp16_arithmetic
|
||||
#extension GL_EXT_shader_explicit_arithmetic_types_float16: require
|
||||
#endif
|
||||
|
||||
layout (binding = 0) readonly buffer image_blob { sfpvec4 image_blob_data[]; };
|
||||
layout (binding = 1) readonly buffer flow_blob { sfp flow_blob_data[]; };
|
||||
layout (binding = 2) writeonly buffer top_blob { sfpvec4 top_blob_data[]; };
|
||||
|
||||
layout (push_constant) uniform parameter
|
||||
{
|
||||
int w;
|
||||
int h;
|
||||
int c;
|
||||
int cstep;
|
||||
} p;
|
||||
|
||||
void main()
|
||||
{
|
||||
int gx = int(gl_GlobalInvocationID.x);
|
||||
int gy = int(gl_GlobalInvocationID.y);
|
||||
int gz = int(gl_GlobalInvocationID.z);
|
||||
|
||||
if (gx >= p.w || gy >= p.h || gz >= p.c)
|
||||
return;
|
||||
|
||||
afp flow_x = buffer_ld1(flow_blob_data, gy * p.w + gx);
|
||||
afp flow_y = buffer_ld1(flow_blob_data, p.cstep + gy * p.w + gx);
|
||||
|
||||
afp sample_x = afp(gx) + flow_x;
|
||||
afp sample_y = afp(gy) + flow_y;
|
||||
|
||||
// bilinear interpolate
|
||||
afpvec4 v;
|
||||
{
|
||||
int x0 = int(floor(sample_x));
|
||||
int y0 = int(floor(sample_y));
|
||||
int x1 = x0 + 1;
|
||||
int y1 = y0 + 1;
|
||||
|
||||
if (x0 < 0 || y0 < 0 || x0 >= p.w - 1 || y0 >= p.h - 1)
|
||||
{
|
||||
v = afpvec4(0.f);
|
||||
}
|
||||
else
|
||||
{
|
||||
afp alpha = sample_x - afp(x0);
|
||||
afp beta = sample_y - afp(y0);
|
||||
|
||||
afpvec4 v0 = buffer_ld4(image_blob_data, gz * p.cstep + y0 * p.w + x0);
|
||||
afpvec4 v1 = buffer_ld4(image_blob_data, gz * p.cstep + y0 * p.w + x1);
|
||||
afpvec4 v2 = buffer_ld4(image_blob_data, gz * p.cstep + y1 * p.w + x0);
|
||||
afpvec4 v3 = buffer_ld4(image_blob_data, gz * p.cstep + y1 * p.w + x1);
|
||||
|
||||
afpvec4 v4 = v0 * (afp(1.f) - alpha) + v1 * alpha;
|
||||
afpvec4 v5 = v2 * (afp(1.f) - alpha) + v3 * alpha;
|
||||
|
||||
v = v4 * (afp(1.f) - beta) + v5 * beta;
|
||||
}
|
||||
}
|
||||
|
||||
const int gi = gz * p.cstep + gy * p.w + gx;
|
||||
|
||||
buffer_st4(top_blob_data, gi, v);
|
||||
}
|
|
@ -0,0 +1,106 @@
|
|||
#ifndef WEBP_IMAGE_H
|
||||
#define WEBP_IMAGE_H
|
||||
|
||||
// webp image decoder and encoder with libwebp
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include "webp/decode.h"
|
||||
#include "webp/encode.h"
|
||||
|
||||
unsigned char* webp_load(const unsigned char* buffer, int len, int* w, int* h, int* c)
|
||||
{
|
||||
unsigned char* pixeldata = 0;
|
||||
|
||||
WebPDecoderConfig config;
|
||||
WebPInitDecoderConfig(&config);
|
||||
|
||||
if (WebPGetFeatures(buffer, len, &config.input) != VP8_STATUS_OK)
|
||||
return NULL;
|
||||
|
||||
int width = config.input.width;
|
||||
int height = config.input.height;
|
||||
int channels = config.input.has_alpha ? 4 : 3;
|
||||
|
||||
pixeldata = (unsigned char*)malloc(width * height * channels);
|
||||
|
||||
#if _WIN32
|
||||
config.output.colorspace = channels == 4 ? MODE_BGRA : MODE_BGR;
|
||||
#else
|
||||
config.output.colorspace = channels == 4 ? MODE_RGBA : MODE_RGB;
|
||||
#endif
|
||||
|
||||
config.output.u.RGBA.stride = width * channels;
|
||||
config.output.u.RGBA.size = width * height * channels;
|
||||
config.output.u.RGBA.rgba = pixeldata;
|
||||
config.output.is_external_memory = 1;
|
||||
|
||||
if (WebPDecode(buffer, len, &config) != VP8_STATUS_OK)
|
||||
{
|
||||
free(pixeldata);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
*w = width;
|
||||
*h = height;
|
||||
*c = channels;
|
||||
|
||||
return pixeldata;
|
||||
}
|
||||
|
||||
#if _WIN32
|
||||
int webp_save(const wchar_t* filepath, int w, int h, int c, const unsigned char* pixeldata)
|
||||
#else
|
||||
int webp_save(const char* filepath, int w, int h, int c, const unsigned char* pixeldata)
|
||||
#endif
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
unsigned char* output = 0;
|
||||
size_t length = 0;
|
||||
|
||||
FILE* fp = 0;
|
||||
|
||||
if (c == 3)
|
||||
{
|
||||
#if _WIN32
|
||||
length = WebPEncodeLosslessBGR(pixeldata, w, h, w * 3, &output);
|
||||
#else
|
||||
length = WebPEncodeLosslessRGB(pixeldata, w, h, w * 3, &output);
|
||||
#endif
|
||||
}
|
||||
else if (c == 4)
|
||||
{
|
||||
#if _WIN32
|
||||
length = WebPEncodeLosslessBGRA(pixeldata, w, h, w * 4, &output);
|
||||
#else
|
||||
length = WebPEncodeLosslessRGBA(pixeldata, w, h, w * 4, &output);
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
// unsupported channel type
|
||||
}
|
||||
|
||||
if (length == 0)
|
||||
goto RETURN;
|
||||
|
||||
#if _WIN32
|
||||
fp = _wfopen(filepath, L"wb");
|
||||
#else
|
||||
fp = fopen(filepath, "wb");
|
||||
#endif
|
||||
if (!fp)
|
||||
goto RETURN;
|
||||
|
||||
fwrite(output, 1, length, fp);
|
||||
|
||||
ret = 1;
|
||||
|
||||
RETURN:
|
||||
if (output) WebPFree(output);
|
||||
if (fp) fclose(fp);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
#endif // WEBP_IMAGE_H
|
|
@ -0,0 +1,251 @@
|
|||
#ifndef WIC_IMAGE_H
|
||||
#define WIC_IMAGE_H
|
||||
|
||||
// image decoder and encoder with WIC
|
||||
#include <wincodec.h>
|
||||
|
||||
unsigned char* wic_decode_image(const wchar_t* filepath, int* w, int* h, int* c)
|
||||
{
|
||||
IWICImagingFactory* factory = 0;
|
||||
IWICBitmapDecoder* decoder = 0;
|
||||
IWICBitmapFrameDecode* frame = 0;
|
||||
WICPixelFormatGUID pixel_format;
|
||||
IWICFormatConverter* converter = 0;
|
||||
IWICBitmap* bitmap = 0;
|
||||
IWICBitmapLock* lock = 0;
|
||||
int width = 0;
|
||||
int height = 0;
|
||||
int channels = 0;
|
||||
WICRect rect = { 0, 0, 0, 0 };
|
||||
unsigned int datasize = 0;
|
||||
unsigned char* data = 0;
|
||||
int stride = 0;
|
||||
unsigned char* bgrdata = 0;
|
||||
|
||||
if (CoCreateInstance(CLSID_WICImagingFactory1, 0, CLSCTX_INPROC_SERVER, IID_PPV_ARGS(&factory)))
|
||||
goto RETURN;
|
||||
|
||||
if (factory->CreateDecoderFromFilename(filepath, 0, GENERIC_READ, WICDecodeMetadataCacheOnDemand, &decoder))
|
||||
goto RETURN;
|
||||
|
||||
if (decoder->GetFrame(0, &frame))
|
||||
goto RETURN;
|
||||
|
||||
if (factory->CreateFormatConverter(&converter))
|
||||
goto RETURN;
|
||||
|
||||
if (frame->GetPixelFormat(&pixel_format))
|
||||
goto RETURN;
|
||||
|
||||
if (!IsEqualGUID(pixel_format, GUID_WICPixelFormat32bppBGRA))
|
||||
pixel_format = GUID_WICPixelFormat24bppBGR;
|
||||
|
||||
channels = IsEqualGUID(pixel_format, GUID_WICPixelFormat32bppBGRA) ? 4 : 3;
|
||||
|
||||
if (converter->Initialize(frame, pixel_format, WICBitmapDitherTypeNone, 0, 0.0, WICBitmapPaletteTypeCustom))
|
||||
goto RETURN;
|
||||
|
||||
if (factory->CreateBitmapFromSource(converter, WICBitmapCacheOnDemand, &bitmap))
|
||||
goto RETURN;
|
||||
|
||||
if (bitmap->GetSize((UINT*)&width, (UINT*)&height))
|
||||
goto RETURN;
|
||||
|
||||
rect.Width = width;
|
||||
rect.Height = height;
|
||||
if (bitmap->Lock(&rect, WICBitmapLockRead, &lock))
|
||||
goto RETURN;
|
||||
|
||||
if (lock->GetDataPointer(&datasize, &data))
|
||||
goto RETURN;
|
||||
|
||||
if (lock->GetStride((UINT*)&stride))
|
||||
goto RETURN;
|
||||
|
||||
bgrdata = (unsigned char*)malloc(width * height * channels);
|
||||
if (!bgrdata)
|
||||
goto RETURN;
|
||||
|
||||
for (int y = 0; y < height; y++)
|
||||
{
|
||||
const unsigned char* ptr = data + y * stride;
|
||||
unsigned char* bgrptr = bgrdata + y * width * channels;
|
||||
memcpy(bgrptr, ptr, width * channels);
|
||||
}
|
||||
|
||||
*w = width;
|
||||
*h = height;
|
||||
*c = channels;
|
||||
|
||||
RETURN:
|
||||
if (lock) lock->Release();
|
||||
if (bitmap) bitmap->Release();
|
||||
if (decoder) decoder->Release();
|
||||
if (frame) frame->Release();
|
||||
if (converter) converter->Release();
|
||||
if (factory) factory->Release();
|
||||
|
||||
return bgrdata;
|
||||
}
|
||||
|
||||
int wic_encode_image(const wchar_t* filepath, int w, int h, int c, void* bgrdata)
|
||||
{
|
||||
IWICImagingFactory* factory = 0;
|
||||
IWICStream* stream = 0;
|
||||
IWICBitmapEncoder* encoder = 0;
|
||||
IWICBitmapFrameEncode* frame = 0;
|
||||
WICPixelFormatGUID format = c == 4 ? GUID_WICPixelFormat32bppBGRA : GUID_WICPixelFormat24bppBGR;
|
||||
int stride = (w * c * 8 + 7) / 8;
|
||||
unsigned char* data = 0;
|
||||
int ret = 0;
|
||||
|
||||
if (CoCreateInstance(CLSID_WICImagingFactory1, 0, CLSCTX_INPROC_SERVER, IID_PPV_ARGS(&factory)))
|
||||
goto RETURN;
|
||||
|
||||
if (factory->CreateStream(&stream))
|
||||
goto RETURN;
|
||||
|
||||
if (stream->InitializeFromFilename(filepath, GENERIC_WRITE))
|
||||
goto RETURN;
|
||||
|
||||
if (factory->CreateEncoder(GUID_ContainerFormatPng, 0, &encoder))
|
||||
goto RETURN;
|
||||
|
||||
if (encoder->Initialize(stream, WICBitmapEncoderNoCache))
|
||||
goto RETURN;
|
||||
|
||||
if (encoder->CreateNewFrame(&frame, 0))
|
||||
goto RETURN;
|
||||
|
||||
if (frame->Initialize(0))
|
||||
goto RETURN;
|
||||
|
||||
if (frame->SetSize((UINT)w, (UINT)h))
|
||||
goto RETURN;
|
||||
|
||||
if (frame->SetPixelFormat(&format))
|
||||
goto RETURN;
|
||||
|
||||
if (!IsEqualGUID(format, c == 4 ? GUID_WICPixelFormat32bppBGRA : GUID_WICPixelFormat24bppBGR))
|
||||
goto RETURN;
|
||||
|
||||
data = (unsigned char*)malloc(h * stride);
|
||||
if (!data)
|
||||
goto RETURN;
|
||||
|
||||
for (int y = 0; y < h; y++)
|
||||
{
|
||||
const unsigned char* bgrptr = (const unsigned char*)bgrdata + y * w * c;
|
||||
unsigned char* ptr = data + y * stride;
|
||||
memcpy(ptr, bgrptr, w * c);
|
||||
}
|
||||
|
||||
if (frame->WritePixels(h, stride, h * stride, data))
|
||||
goto RETURN;
|
||||
|
||||
if (frame->Commit())
|
||||
goto RETURN;
|
||||
|
||||
if (encoder->Commit())
|
||||
goto RETURN;
|
||||
|
||||
ret = 1;
|
||||
|
||||
RETURN:
|
||||
if (data) free(data);
|
||||
if (encoder) encoder->Release();
|
||||
if (frame) frame->Release();
|
||||
if (stream) stream->Release();
|
||||
if (factory) factory->Release();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int wic_encode_jpeg_image(const wchar_t* filepath, int w, int h, int c, void* bgrdata)
|
||||
{
|
||||
// assert c == 3
|
||||
|
||||
IWICImagingFactory* factory = 0;
|
||||
IWICStream* stream = 0;
|
||||
IWICBitmapEncoder* encoder = 0;
|
||||
IWICBitmapFrameEncode* frame = 0;
|
||||
IPropertyBag2* propertybag = 0;
|
||||
WICPixelFormatGUID format = GUID_WICPixelFormat24bppBGR;
|
||||
int stride = (w * c * 8 + 7) / 8;
|
||||
unsigned char* data = 0;
|
||||
int ret = 0;
|
||||
|
||||
PROPBAG2 option = { 0 };
|
||||
option.pstrName = L"ImageQuality";
|
||||
VARIANT varValue;
|
||||
VariantInit(&varValue);
|
||||
varValue.vt = VT_R4;
|
||||
varValue.fltVal = 1.0f;
|
||||
|
||||
if (CoCreateInstance(CLSID_WICImagingFactory1, 0, CLSCTX_INPROC_SERVER, IID_PPV_ARGS(&factory)))
|
||||
goto RETURN;
|
||||
|
||||
if (factory->CreateStream(&stream))
|
||||
goto RETURN;
|
||||
|
||||
if (stream->InitializeFromFilename(filepath, GENERIC_WRITE))
|
||||
goto RETURN;
|
||||
|
||||
if (factory->CreateEncoder(GUID_ContainerFormatJpeg, 0, &encoder))
|
||||
goto RETURN;
|
||||
|
||||
if (encoder->Initialize(stream, WICBitmapEncoderNoCache))
|
||||
goto RETURN;
|
||||
|
||||
if (encoder->CreateNewFrame(&frame, &propertybag))
|
||||
goto RETURN;
|
||||
|
||||
if (propertybag->Write(1, &option, &varValue))
|
||||
goto RETURN;
|
||||
|
||||
if (frame->Initialize(propertybag))
|
||||
goto RETURN;
|
||||
|
||||
if (frame->SetSize((UINT)w, (UINT)h))
|
||||
goto RETURN;
|
||||
|
||||
if (frame->SetPixelFormat(&format))
|
||||
goto RETURN;
|
||||
|
||||
if (!IsEqualGUID(format, GUID_WICPixelFormat24bppBGR))
|
||||
goto RETURN;
|
||||
|
||||
data = (unsigned char*)malloc(h * stride);
|
||||
if (!data)
|
||||
goto RETURN;
|
||||
|
||||
for (int y = 0; y < h; y++)
|
||||
{
|
||||
const unsigned char* bgrptr = (const unsigned char*)bgrdata + y * w * c;
|
||||
unsigned char* ptr = data + y * stride;
|
||||
memcpy(ptr, bgrptr, w * c);
|
||||
}
|
||||
|
||||
if (frame->WritePixels(h, stride, h * stride, data))
|
||||
goto RETURN;
|
||||
|
||||
if (frame->Commit())
|
||||
goto RETURN;
|
||||
|
||||
if (encoder->Commit())
|
||||
goto RETURN;
|
||||
|
||||
ret = 1;
|
||||
|
||||
RETURN:
|
||||
if (data) free(data);
|
||||
if (encoder) encoder->Release();
|
||||
if (frame) frame->Release();
|
||||
if (propertybag) propertybag->Release();
|
||||
if (stream) stream->Release();
|
||||
if (factory) factory->Release();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
#endif // WIC_IMAGE_H
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue