1
mirror of https://code.videolan.org/videolan/x264 synced 2024-09-26 01:39:54 +02:00

NV21 input support

Eliminates an extra copy when encoding Android camera preview images.

Checkasm test by Janne Grunau.
ARM assembly with improvements from Janne Grunau.
This commit is contained in:
Yu Xiaolei 2014-06-06 16:05:27 +08:00 committed by Henrik Gramner
parent 6ee94dc898
commit 627f891c57
12 changed files with 91 additions and 13 deletions

View File

@ -1566,6 +1566,30 @@ blocki:
pop {r4-r7, pc}
endfunc
function x264_plane_copy_swap_neon
push {r4-r5, lr}
ldrd r4, r5, [sp, #12]
add lr, r4, #15
bic lr, lr, #15
sub r1, r1, lr, lsl #1
sub r3, r3, lr, lsl #1
1:
vld1.8 {q0, q1}, [r2]!
subs lr, lr, #16
vrev16.8 q0, q0
vrev16.8 q1, q1
vst1.8 {q0, q1}, [r0]!
bgt 1b
subs r5, r5, #1
add r0, r0, r1
add r2, r2, r3
mov lr, r4
bgt 1b
pop {r4-r5, pc}
endfunc
function x264_store_interleave_chroma_neon
push {lr}
ldr lr, [sp, #4]

View File

@ -57,6 +57,8 @@ void x264_plane_copy_deinterleave_rgb_neon( pixel *dsta, intptr_t i_dsta,
void x264_plane_copy_interleave_neon( pixel *dst, intptr_t i_dst,
pixel *srcu, intptr_t i_srcu,
pixel *srcv, intptr_t i_srcv, int w, int h );
void x264_plane_copy_swap_neon( pixel *dst, intptr_t i_dst,
pixel *src, intptr_t i_src, int w, int h );
void x264_store_interleave_chroma_neon( pixel *dst, intptr_t i_dst, pixel *srcu, pixel *srcv, int height );
void x264_load_deinterleave_chroma_fdec_neon( pixel *dst, pixel *src, intptr_t i_src, int height );
@ -240,6 +242,7 @@ void x264_mc_init_arm( int cpu, x264_mc_functions_t *pf )
pf->plane_copy_deinterleave = x264_plane_copy_deinterleave_neon;
pf->plane_copy_deinterleave_rgb = x264_plane_copy_deinterleave_rgb_neon;
pf->plane_copy_interleave = x264_plane_copy_interleave_neon;
pf->plane_copy_swap = x264_plane_copy_swap_neon;
pf->store_interleave_chroma = x264_store_interleave_chroma_neon;
pf->load_deinterleave_chroma_fdec = x264_load_deinterleave_chroma_fdec_neon;

View File

@ -1142,6 +1142,7 @@ int x264_picture_alloc( x264_picture_t *pic, int i_csp, int i_width, int i_heigh
[X264_CSP_I420] = { 3, { 256*1, 256/2, 256/2 }, { 256*1, 256/2, 256/2 } },
[X264_CSP_YV12] = { 3, { 256*1, 256/2, 256/2 }, { 256*1, 256/2, 256/2 } },
[X264_CSP_NV12] = { 2, { 256*1, 256*1 }, { 256*1, 256/2 }, },
[X264_CSP_NV21] = { 2, { 256*1, 256*1 }, { 256*1, 256/2 }, },
[X264_CSP_I422] = { 3, { 256*1, 256/2, 256/2 }, { 256*1, 256*1, 256*1 } },
[X264_CSP_YV16] = { 3, { 256*1, 256/2, 256/2 }, { 256*1, 256*1, 256*1 } },
[X264_CSP_NV16] = { 2, { 256*1, 256*1 }, { 256*1, 256*1 }, },

View File

@ -47,6 +47,7 @@ static int x264_frame_internal_csp( int external_csp )
switch( external_csp & X264_CSP_MASK )
{
case X264_CSP_NV12:
case X264_CSP_NV21:
case X264_CSP_I420:
case X264_CSP_YV12:
return X264_CSP_NV12;
@ -435,6 +436,12 @@ int x264_frame_copy_picture( x264_t *h, x264_frame_t *dst, x264_picture_t *src )
h->mc.plane_copy( dst->plane[1], dst->i_stride[1], (pixel*)pix[1],
stride[1]/sizeof(pixel), h->param.i_width, h->param.i_height>>v_shift );
}
else if( i_csp == X264_CSP_NV21 )
{
get_plane_ptr( h, src, &pix[1], &stride[1], 1, 0, v_shift );
h->mc.plane_copy_swap( dst->plane[1], dst->i_stride[1], (pixel*)pix[1],
stride[1]/sizeof(pixel), h->param.i_width>>1, h->param.i_height>>v_shift );
}
else if( i_csp == X264_CSP_I420 || i_csp == X264_CSP_I422 || i_csp == X264_CSP_YV12 || i_csp == X264_CSP_YV16 )
{
int uv_swap = i_csp == X264_CSP_YV12 || i_csp == X264_CSP_YV16;

View File

@ -299,6 +299,17 @@ void x264_plane_copy_c( pixel *dst, intptr_t i_dst,
}
}
void x264_plane_copy_swap_c( pixel *dst, intptr_t i_dst,
pixel *src, intptr_t i_src, int w, int h )
{
for( int y=0; y<h; y++, dst+=i_dst, src+=i_src )
for( int x=0; x<2*w; x+=2 )
{
dst[x] = src[x+1];
dst[x+1] = src[x];
}
}
void x264_plane_copy_interleave_c( pixel *dst, intptr_t i_dst,
pixel *srcu, intptr_t i_srcu,
pixel *srcv, intptr_t i_srcv, int w, int h )
@ -612,6 +623,7 @@ void x264_mc_init( int cpu, x264_mc_functions_t *pf, int cpu_independent )
pf->load_deinterleave_chroma_fdec = load_deinterleave_chroma_fdec;
pf->plane_copy = x264_plane_copy_c;
pf->plane_copy_swap = x264_plane_copy_swap_c;
pf->plane_copy_interleave = x264_plane_copy_interleave_c;
pf->plane_copy_deinterleave = x264_plane_copy_deinterleave_c;
pf->plane_copy_deinterleave_rgb = x264_plane_copy_deinterleave_rgb_c;

View File

@ -88,6 +88,7 @@ typedef struct
void (*load_deinterleave_chroma_fdec)( pixel *dst, pixel *src, intptr_t i_src, int height );
void (*plane_copy)( pixel *dst, intptr_t i_dst, pixel *src, intptr_t i_src, int w, int h );
void (*plane_copy_swap)( pixel *dst, intptr_t i_dst, pixel *src, intptr_t i_src, int w, int h );
void (*plane_copy_interleave)( pixel *dst, intptr_t i_dst, pixel *srcu, intptr_t i_srcu,
pixel *srcv, intptr_t i_srcv, int w, int h );
/* may write up to 15 pixels off the end of each plane */

View File

@ -492,7 +492,7 @@ static int x264_validate_parameters( x264_t *h, int b_open )
#endif
if( i_csp <= X264_CSP_NONE || i_csp >= X264_CSP_MAX )
{
x264_log( h, X264_LOG_ERROR, "invalid CSP (only I420/YV12/NV12/I422/YV16/NV16/I444/YV24/BGR/BGRA/RGB supported)\n" );
x264_log( h, X264_LOG_ERROR, "invalid CSP (only I420/YV12/NV12/NV21/I422/YV16/NV16/I444/YV24/BGR/BGRA/RGB supported)\n" );
return -1;
}

View File

@ -50,6 +50,7 @@ static int depth_filter_csp_is_supported( int csp )
csp_mask == X264_CSP_YV16 ||
csp_mask == X264_CSP_YV24 ||
csp_mask == X264_CSP_NV12 ||
csp_mask == X264_CSP_NV21 ||
csp_mask == X264_CSP_NV16 ||
csp_mask == X264_CSP_BGR ||
csp_mask == X264_CSP_RGB ||
@ -59,7 +60,7 @@ static int depth_filter_csp_is_supported( int csp )
static int csp_num_interleaved( int csp, int plane )
{
int csp_mask = csp & X264_CSP_MASK;
return (csp_mask == X264_CSP_NV12 || csp_mask == X264_CSP_NV16) && plane == 1 ? 2 :
return (csp_mask == X264_CSP_NV12 || csp_mask == X264_CSP_NV21 || csp_mask == X264_CSP_NV16) && plane == 1 ? 2 :
csp_mask == X264_CSP_BGR || csp_mask == X264_CSP_RGB ? 3 :
csp_mask == X264_CSP_BGRA ? 4 :
1;

View File

@ -156,6 +156,7 @@ static int convert_csp_to_pix_fmt( int csp )
case X264_CSP_BGRA: return csp&X264_CSP_HIGH_DEPTH ? AV_PIX_FMT_BGRA64 : AV_PIX_FMT_BGRA;
/* the next csp has no equivalent 16bit depth in swscale */
case X264_CSP_NV12: return csp&X264_CSP_HIGH_DEPTH ? AV_PIX_FMT_NONE : AV_PIX_FMT_NV12;
case X264_CSP_NV21: return csp&X264_CSP_HIGH_DEPTH ? AV_PIX_FMT_NONE : AV_PIX_FMT_NV21;
/* the next csp is no supported by swscale at all */
case X264_CSP_NV16:
default: return AV_PIX_FMT_NONE;

View File

@ -33,6 +33,7 @@ const x264_cli_csp_t x264_cli_csps[] = {
[X264_CSP_YV16] = { "yv16", 3, { 1, .5, .5 }, { 1, 1, 1 }, 2, 1 },
[X264_CSP_YV24] = { "yv24", 3, { 1, 1, 1 }, { 1, 1, 1 }, 1, 1 },
[X264_CSP_NV12] = { "nv12", 2, { 1, 1 }, { 1, .5 }, 2, 2 },
[X264_CSP_NV21] = { "nv21", 2, { 1, 1 }, { 1, .5 }, 2, 2 },
[X264_CSP_NV16] = { "nv16", 2, { 1, 1 }, { 1, 1 }, 2, 1 },
[X264_CSP_BGR] = { "bgr", 1, { 3 }, { 1 }, 1, 1 },
[X264_CSP_BGRA] = { "bgra", 1, { 4 }, { 1 }, 1, 1 },

View File

@ -1413,6 +1413,32 @@ static int check_mc( int cpu_ref, int cpu_new )
}
}
if( mc_a.plane_copy_swap != mc_ref.plane_copy_swap )
{
set_func_name( "plane_copy_swap" );
used_asm = 1;
for( int i = 0; i < sizeof(plane_specs)/sizeof(*plane_specs); i++ )
{
int w = (plane_specs[i].w + 1) >> 1;
int h = plane_specs[i].h;
intptr_t src_stride = plane_specs[i].src_stride;
intptr_t dst_stride = (2*w + 127) & ~63;
assert( dst_stride * h <= 0x1000 );
pixel *src1 = pbuf1 + X264_MAX(0, -src_stride) * (h-1);
memset( pbuf3, 0, 0x1000*sizeof(pixel) );
memset( pbuf4, 0, 0x1000*sizeof(pixel) );
call_c( mc_c.plane_copy_swap, pbuf3, dst_stride, src1, src_stride, w, h );
call_a( mc_a.plane_copy_swap, pbuf4, dst_stride, src1, src_stride, w, h );
for( int y = 0; y < h; y++ )
if( memcmp( pbuf3+y*dst_stride, pbuf4+y*dst_stride, 2*w*sizeof(pixel) ) )
{
ok = 0;
fprintf( stderr, "plane_copy_swap FAILED: w=%d h=%d stride=%d\n", w, h, (int)src_stride );
break;
}
}
}
if( mc_a.plane_copy_interleave != mc_ref.plane_copy_interleave )
{
set_func_name( "plane_copy_interleave" );

23
x264.h
View File

@ -41,7 +41,7 @@
#include "x264_config.h"
#define X264_BUILD 146
#define X264_BUILD 147
/* Application developers planning to link against a shared library version of
* libx264 from a Microsoft Visual Studio or similar development environment
@ -214,16 +214,17 @@ static const char * const x264_nal_hrd_names[] = { "none", "vbr", "cbr", 0 };
#define X264_CSP_I420 0x0001 /* yuv 4:2:0 planar */
#define X264_CSP_YV12 0x0002 /* yvu 4:2:0 planar */
#define X264_CSP_NV12 0x0003 /* yuv 4:2:0, with one y plane and one packed u+v */
#define X264_CSP_I422 0x0004 /* yuv 4:2:2 planar */
#define X264_CSP_YV16 0x0005 /* yvu 4:2:2 planar */
#define X264_CSP_NV16 0x0006 /* yuv 4:2:2, with one y plane and one packed u+v */
#define X264_CSP_V210 0x0007 /* 10-bit yuv 4:2:2 packed in 32 */
#define X264_CSP_I444 0x0008 /* yuv 4:4:4 planar */
#define X264_CSP_YV24 0x0009 /* yvu 4:4:4 planar */
#define X264_CSP_BGR 0x000a /* packed bgr 24bits */
#define X264_CSP_BGRA 0x000b /* packed bgr 32bits */
#define X264_CSP_RGB 0x000c /* packed rgb 24bits */
#define X264_CSP_MAX 0x000d /* end of list */
#define X264_CSP_NV21 0x0004 /* yuv 4:2:0, with one y plane and one packed v+u */
#define X264_CSP_I422 0x0005 /* yuv 4:2:2 planar */
#define X264_CSP_YV16 0x0006 /* yvu 4:2:2 planar */
#define X264_CSP_NV16 0x0007 /* yuv 4:2:2, with one y plane and one packed u+v */
#define X264_CSP_V210 0x0008 /* 10-bit yuv 4:2:2 packed in 32 */
#define X264_CSP_I444 0x0009 /* yuv 4:4:4 planar */
#define X264_CSP_YV24 0x000a /* yvu 4:4:4 planar */
#define X264_CSP_BGR 0x000b /* packed bgr 24bits */
#define X264_CSP_BGRA 0x000c /* packed bgr 32bits */
#define X264_CSP_RGB 0x000d /* packed rgb 24bits */
#define X264_CSP_MAX 0x000e /* end of list */
#define X264_CSP_VFLIP 0x1000 /* the csp is vertically flipped */
#define X264_CSP_HIGH_DEPTH 0x2000 /* the csp has a depth of 16 bits per pixel component */