mirror of
https://code.videolan.org/videolan/x264
synced 2024-09-26 01:39:54 +02:00
NV21 input support
Eliminates an extra copy when encoding Android camera preview images. Checkasm test by Janne Grunau. ARM assembly with improvements from Janne Grunau.
This commit is contained in:
parent
6ee94dc898
commit
627f891c57
@ -1566,6 +1566,30 @@ blocki:
|
||||
pop {r4-r7, pc}
|
||||
endfunc
|
||||
|
||||
function x264_plane_copy_swap_neon
|
||||
push {r4-r5, lr}
|
||||
ldrd r4, r5, [sp, #12]
|
||||
add lr, r4, #15
|
||||
bic lr, lr, #15
|
||||
sub r1, r1, lr, lsl #1
|
||||
sub r3, r3, lr, lsl #1
|
||||
1:
|
||||
vld1.8 {q0, q1}, [r2]!
|
||||
subs lr, lr, #16
|
||||
vrev16.8 q0, q0
|
||||
vrev16.8 q1, q1
|
||||
vst1.8 {q0, q1}, [r0]!
|
||||
bgt 1b
|
||||
|
||||
subs r5, r5, #1
|
||||
add r0, r0, r1
|
||||
add r2, r2, r3
|
||||
mov lr, r4
|
||||
bgt 1b
|
||||
|
||||
pop {r4-r5, pc}
|
||||
endfunc
|
||||
|
||||
function x264_store_interleave_chroma_neon
|
||||
push {lr}
|
||||
ldr lr, [sp, #4]
|
||||
|
@ -57,6 +57,8 @@ void x264_plane_copy_deinterleave_rgb_neon( pixel *dsta, intptr_t i_dsta,
|
||||
void x264_plane_copy_interleave_neon( pixel *dst, intptr_t i_dst,
|
||||
pixel *srcu, intptr_t i_srcu,
|
||||
pixel *srcv, intptr_t i_srcv, int w, int h );
|
||||
void x264_plane_copy_swap_neon( pixel *dst, intptr_t i_dst,
|
||||
pixel *src, intptr_t i_src, int w, int h );
|
||||
|
||||
void x264_store_interleave_chroma_neon( pixel *dst, intptr_t i_dst, pixel *srcu, pixel *srcv, int height );
|
||||
void x264_load_deinterleave_chroma_fdec_neon( pixel *dst, pixel *src, intptr_t i_src, int height );
|
||||
@ -240,6 +242,7 @@ void x264_mc_init_arm( int cpu, x264_mc_functions_t *pf )
|
||||
pf->plane_copy_deinterleave = x264_plane_copy_deinterleave_neon;
|
||||
pf->plane_copy_deinterleave_rgb = x264_plane_copy_deinterleave_rgb_neon;
|
||||
pf->plane_copy_interleave = x264_plane_copy_interleave_neon;
|
||||
pf->plane_copy_swap = x264_plane_copy_swap_neon;
|
||||
|
||||
pf->store_interleave_chroma = x264_store_interleave_chroma_neon;
|
||||
pf->load_deinterleave_chroma_fdec = x264_load_deinterleave_chroma_fdec_neon;
|
||||
|
@ -1142,6 +1142,7 @@ int x264_picture_alloc( x264_picture_t *pic, int i_csp, int i_width, int i_heigh
|
||||
[X264_CSP_I420] = { 3, { 256*1, 256/2, 256/2 }, { 256*1, 256/2, 256/2 } },
|
||||
[X264_CSP_YV12] = { 3, { 256*1, 256/2, 256/2 }, { 256*1, 256/2, 256/2 } },
|
||||
[X264_CSP_NV12] = { 2, { 256*1, 256*1 }, { 256*1, 256/2 }, },
|
||||
[X264_CSP_NV21] = { 2, { 256*1, 256*1 }, { 256*1, 256/2 }, },
|
||||
[X264_CSP_I422] = { 3, { 256*1, 256/2, 256/2 }, { 256*1, 256*1, 256*1 } },
|
||||
[X264_CSP_YV16] = { 3, { 256*1, 256/2, 256/2 }, { 256*1, 256*1, 256*1 } },
|
||||
[X264_CSP_NV16] = { 2, { 256*1, 256*1 }, { 256*1, 256*1 }, },
|
||||
|
@ -47,6 +47,7 @@ static int x264_frame_internal_csp( int external_csp )
|
||||
switch( external_csp & X264_CSP_MASK )
|
||||
{
|
||||
case X264_CSP_NV12:
|
||||
case X264_CSP_NV21:
|
||||
case X264_CSP_I420:
|
||||
case X264_CSP_YV12:
|
||||
return X264_CSP_NV12;
|
||||
@ -435,6 +436,12 @@ int x264_frame_copy_picture( x264_t *h, x264_frame_t *dst, x264_picture_t *src )
|
||||
h->mc.plane_copy( dst->plane[1], dst->i_stride[1], (pixel*)pix[1],
|
||||
stride[1]/sizeof(pixel), h->param.i_width, h->param.i_height>>v_shift );
|
||||
}
|
||||
else if( i_csp == X264_CSP_NV21 )
|
||||
{
|
||||
get_plane_ptr( h, src, &pix[1], &stride[1], 1, 0, v_shift );
|
||||
h->mc.plane_copy_swap( dst->plane[1], dst->i_stride[1], (pixel*)pix[1],
|
||||
stride[1]/sizeof(pixel), h->param.i_width>>1, h->param.i_height>>v_shift );
|
||||
}
|
||||
else if( i_csp == X264_CSP_I420 || i_csp == X264_CSP_I422 || i_csp == X264_CSP_YV12 || i_csp == X264_CSP_YV16 )
|
||||
{
|
||||
int uv_swap = i_csp == X264_CSP_YV12 || i_csp == X264_CSP_YV16;
|
||||
|
12
common/mc.c
12
common/mc.c
@ -299,6 +299,17 @@ void x264_plane_copy_c( pixel *dst, intptr_t i_dst,
|
||||
}
|
||||
}
|
||||
|
||||
void x264_plane_copy_swap_c( pixel *dst, intptr_t i_dst,
|
||||
pixel *src, intptr_t i_src, int w, int h )
|
||||
{
|
||||
for( int y=0; y<h; y++, dst+=i_dst, src+=i_src )
|
||||
for( int x=0; x<2*w; x+=2 )
|
||||
{
|
||||
dst[x] = src[x+1];
|
||||
dst[x+1] = src[x];
|
||||
}
|
||||
}
|
||||
|
||||
void x264_plane_copy_interleave_c( pixel *dst, intptr_t i_dst,
|
||||
pixel *srcu, intptr_t i_srcu,
|
||||
pixel *srcv, intptr_t i_srcv, int w, int h )
|
||||
@ -612,6 +623,7 @@ void x264_mc_init( int cpu, x264_mc_functions_t *pf, int cpu_independent )
|
||||
pf->load_deinterleave_chroma_fdec = load_deinterleave_chroma_fdec;
|
||||
|
||||
pf->plane_copy = x264_plane_copy_c;
|
||||
pf->plane_copy_swap = x264_plane_copy_swap_c;
|
||||
pf->plane_copy_interleave = x264_plane_copy_interleave_c;
|
||||
pf->plane_copy_deinterleave = x264_plane_copy_deinterleave_c;
|
||||
pf->plane_copy_deinterleave_rgb = x264_plane_copy_deinterleave_rgb_c;
|
||||
|
@ -88,6 +88,7 @@ typedef struct
|
||||
void (*load_deinterleave_chroma_fdec)( pixel *dst, pixel *src, intptr_t i_src, int height );
|
||||
|
||||
void (*plane_copy)( pixel *dst, intptr_t i_dst, pixel *src, intptr_t i_src, int w, int h );
|
||||
void (*plane_copy_swap)( pixel *dst, intptr_t i_dst, pixel *src, intptr_t i_src, int w, int h );
|
||||
void (*plane_copy_interleave)( pixel *dst, intptr_t i_dst, pixel *srcu, intptr_t i_srcu,
|
||||
pixel *srcv, intptr_t i_srcv, int w, int h );
|
||||
/* may write up to 15 pixels off the end of each plane */
|
||||
|
@ -492,7 +492,7 @@ static int x264_validate_parameters( x264_t *h, int b_open )
|
||||
#endif
|
||||
if( i_csp <= X264_CSP_NONE || i_csp >= X264_CSP_MAX )
|
||||
{
|
||||
x264_log( h, X264_LOG_ERROR, "invalid CSP (only I420/YV12/NV12/I422/YV16/NV16/I444/YV24/BGR/BGRA/RGB supported)\n" );
|
||||
x264_log( h, X264_LOG_ERROR, "invalid CSP (only I420/YV12/NV12/NV21/I422/YV16/NV16/I444/YV24/BGR/BGRA/RGB supported)\n" );
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
@ -50,6 +50,7 @@ static int depth_filter_csp_is_supported( int csp )
|
||||
csp_mask == X264_CSP_YV16 ||
|
||||
csp_mask == X264_CSP_YV24 ||
|
||||
csp_mask == X264_CSP_NV12 ||
|
||||
csp_mask == X264_CSP_NV21 ||
|
||||
csp_mask == X264_CSP_NV16 ||
|
||||
csp_mask == X264_CSP_BGR ||
|
||||
csp_mask == X264_CSP_RGB ||
|
||||
@ -59,7 +60,7 @@ static int depth_filter_csp_is_supported( int csp )
|
||||
static int csp_num_interleaved( int csp, int plane )
|
||||
{
|
||||
int csp_mask = csp & X264_CSP_MASK;
|
||||
return (csp_mask == X264_CSP_NV12 || csp_mask == X264_CSP_NV16) && plane == 1 ? 2 :
|
||||
return (csp_mask == X264_CSP_NV12 || csp_mask == X264_CSP_NV21 || csp_mask == X264_CSP_NV16) && plane == 1 ? 2 :
|
||||
csp_mask == X264_CSP_BGR || csp_mask == X264_CSP_RGB ? 3 :
|
||||
csp_mask == X264_CSP_BGRA ? 4 :
|
||||
1;
|
||||
|
@ -156,6 +156,7 @@ static int convert_csp_to_pix_fmt( int csp )
|
||||
case X264_CSP_BGRA: return csp&X264_CSP_HIGH_DEPTH ? AV_PIX_FMT_BGRA64 : AV_PIX_FMT_BGRA;
|
||||
/* the next csp has no equivalent 16bit depth in swscale */
|
||||
case X264_CSP_NV12: return csp&X264_CSP_HIGH_DEPTH ? AV_PIX_FMT_NONE : AV_PIX_FMT_NV12;
|
||||
case X264_CSP_NV21: return csp&X264_CSP_HIGH_DEPTH ? AV_PIX_FMT_NONE : AV_PIX_FMT_NV21;
|
||||
/* the next csp is no supported by swscale at all */
|
||||
case X264_CSP_NV16:
|
||||
default: return AV_PIX_FMT_NONE;
|
||||
|
@ -33,6 +33,7 @@ const x264_cli_csp_t x264_cli_csps[] = {
|
||||
[X264_CSP_YV16] = { "yv16", 3, { 1, .5, .5 }, { 1, 1, 1 }, 2, 1 },
|
||||
[X264_CSP_YV24] = { "yv24", 3, { 1, 1, 1 }, { 1, 1, 1 }, 1, 1 },
|
||||
[X264_CSP_NV12] = { "nv12", 2, { 1, 1 }, { 1, .5 }, 2, 2 },
|
||||
[X264_CSP_NV21] = { "nv21", 2, { 1, 1 }, { 1, .5 }, 2, 2 },
|
||||
[X264_CSP_NV16] = { "nv16", 2, { 1, 1 }, { 1, 1 }, 2, 1 },
|
||||
[X264_CSP_BGR] = { "bgr", 1, { 3 }, { 1 }, 1, 1 },
|
||||
[X264_CSP_BGRA] = { "bgra", 1, { 4 }, { 1 }, 1, 1 },
|
||||
|
@ -1413,6 +1413,32 @@ static int check_mc( int cpu_ref, int cpu_new )
|
||||
}
|
||||
}
|
||||
|
||||
if( mc_a.plane_copy_swap != mc_ref.plane_copy_swap )
|
||||
{
|
||||
set_func_name( "plane_copy_swap" );
|
||||
used_asm = 1;
|
||||
for( int i = 0; i < sizeof(plane_specs)/sizeof(*plane_specs); i++ )
|
||||
{
|
||||
int w = (plane_specs[i].w + 1) >> 1;
|
||||
int h = plane_specs[i].h;
|
||||
intptr_t src_stride = plane_specs[i].src_stride;
|
||||
intptr_t dst_stride = (2*w + 127) & ~63;
|
||||
assert( dst_stride * h <= 0x1000 );
|
||||
pixel *src1 = pbuf1 + X264_MAX(0, -src_stride) * (h-1);
|
||||
memset( pbuf3, 0, 0x1000*sizeof(pixel) );
|
||||
memset( pbuf4, 0, 0x1000*sizeof(pixel) );
|
||||
call_c( mc_c.plane_copy_swap, pbuf3, dst_stride, src1, src_stride, w, h );
|
||||
call_a( mc_a.plane_copy_swap, pbuf4, dst_stride, src1, src_stride, w, h );
|
||||
for( int y = 0; y < h; y++ )
|
||||
if( memcmp( pbuf3+y*dst_stride, pbuf4+y*dst_stride, 2*w*sizeof(pixel) ) )
|
||||
{
|
||||
ok = 0;
|
||||
fprintf( stderr, "plane_copy_swap FAILED: w=%d h=%d stride=%d\n", w, h, (int)src_stride );
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if( mc_a.plane_copy_interleave != mc_ref.plane_copy_interleave )
|
||||
{
|
||||
set_func_name( "plane_copy_interleave" );
|
||||
|
23
x264.h
23
x264.h
@ -41,7 +41,7 @@
|
||||
|
||||
#include "x264_config.h"
|
||||
|
||||
#define X264_BUILD 146
|
||||
#define X264_BUILD 147
|
||||
|
||||
/* Application developers planning to link against a shared library version of
|
||||
* libx264 from a Microsoft Visual Studio or similar development environment
|
||||
@ -214,16 +214,17 @@ static const char * const x264_nal_hrd_names[] = { "none", "vbr", "cbr", 0 };
|
||||
#define X264_CSP_I420 0x0001 /* yuv 4:2:0 planar */
|
||||
#define X264_CSP_YV12 0x0002 /* yvu 4:2:0 planar */
|
||||
#define X264_CSP_NV12 0x0003 /* yuv 4:2:0, with one y plane and one packed u+v */
|
||||
#define X264_CSP_I422 0x0004 /* yuv 4:2:2 planar */
|
||||
#define X264_CSP_YV16 0x0005 /* yvu 4:2:2 planar */
|
||||
#define X264_CSP_NV16 0x0006 /* yuv 4:2:2, with one y plane and one packed u+v */
|
||||
#define X264_CSP_V210 0x0007 /* 10-bit yuv 4:2:2 packed in 32 */
|
||||
#define X264_CSP_I444 0x0008 /* yuv 4:4:4 planar */
|
||||
#define X264_CSP_YV24 0x0009 /* yvu 4:4:4 planar */
|
||||
#define X264_CSP_BGR 0x000a /* packed bgr 24bits */
|
||||
#define X264_CSP_BGRA 0x000b /* packed bgr 32bits */
|
||||
#define X264_CSP_RGB 0x000c /* packed rgb 24bits */
|
||||
#define X264_CSP_MAX 0x000d /* end of list */
|
||||
#define X264_CSP_NV21 0x0004 /* yuv 4:2:0, with one y plane and one packed v+u */
|
||||
#define X264_CSP_I422 0x0005 /* yuv 4:2:2 planar */
|
||||
#define X264_CSP_YV16 0x0006 /* yvu 4:2:2 planar */
|
||||
#define X264_CSP_NV16 0x0007 /* yuv 4:2:2, with one y plane and one packed u+v */
|
||||
#define X264_CSP_V210 0x0008 /* 10-bit yuv 4:2:2 packed in 32 */
|
||||
#define X264_CSP_I444 0x0009 /* yuv 4:4:4 planar */
|
||||
#define X264_CSP_YV24 0x000a /* yvu 4:4:4 planar */
|
||||
#define X264_CSP_BGR 0x000b /* packed bgr 24bits */
|
||||
#define X264_CSP_BGRA 0x000c /* packed bgr 32bits */
|
||||
#define X264_CSP_RGB 0x000d /* packed rgb 24bits */
|
||||
#define X264_CSP_MAX 0x000e /* end of list */
|
||||
#define X264_CSP_VFLIP 0x1000 /* the csp is vertically flipped */
|
||||
#define X264_CSP_HIGH_DEPTH 0x2000 /* the csp has a depth of 16 bits per pixel component */
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user