mirror of
https://code.videolan.org/videolan/x264
synced 2024-11-15 03:32:13 +01:00
Merge zero buffers
Improves cache efficiency.
This commit is contained in:
parent
d75b93b0e8
commit
b019515ef4
@ -155,7 +155,7 @@ static weight_fn_t mc_weight_wtab[6] =
|
||||
mc_weight_w16,
|
||||
mc_weight_w20,
|
||||
};
|
||||
const x264_weight_t x264_weight_none[3] = { {{0}} };
|
||||
|
||||
static void mc_copy( pixel *src, intptr_t i_src_stride, pixel *dst, intptr_t i_dst_stride, int i_width, int i_height )
|
||||
{
|
||||
for( int y = 0; y < i_height; y++ )
|
||||
|
@ -244,8 +244,7 @@ typedef struct x264_weight_t
|
||||
weight_fn_t *weightfn;
|
||||
} ALIGNED_16( x264_weight_t );
|
||||
|
||||
#define x264_weight_none x264_template(weight_none)
|
||||
extern const x264_weight_t x264_weight_none[3];
|
||||
#define x264_weight_none ((const x264_weight_t*)x264_zero)
|
||||
|
||||
#define SET_WEIGHT( w, b, s, d, o )\
|
||||
{\
|
||||
|
@ -2534,3 +2534,6 @@ const vlc_t x264_run_before_init[7][16] =
|
||||
{ 0x1, 11 }, /* str=00000000001 */
|
||||
},
|
||||
};
|
||||
|
||||
/* psy_trellis_init() has the largest size requirement of 16*FDEC_STRIDE*sizeof(pixel) */
|
||||
ALIGNED_64( uint8_t x264_zero[1024] ) = { 0 };
|
||||
|
@ -94,4 +94,6 @@ extern const vlc_t x264_total_zeros_2x2_dc[3][4];
|
||||
extern const vlc_t x264_total_zeros_2x4_dc[7][8];
|
||||
extern const vlc_t x264_run_before_init[7][16];
|
||||
|
||||
extern uint8_t x264_zero[1024];
|
||||
|
||||
#endif
|
||||
|
@ -558,12 +558,10 @@ static ALWAYS_INLINE const int8_t *predict_4x4_mode_available( int force_intra,
|
||||
/* For trellis=2, we need to do this for both sizes of DCT, for trellis=1 we only need to use it on the chosen mode. */
|
||||
static void inline psy_trellis_init( x264_t *h, int do_both_dct )
|
||||
{
|
||||
ALIGNED_64( static pixel zero[16*FDEC_STRIDE] ) = {0};
|
||||
|
||||
if( do_both_dct || h->mb.b_transform_8x8 )
|
||||
h->dctf.sub16x16_dct8( h->mb.pic.fenc_dct8, h->mb.pic.p_fenc[0], zero );
|
||||
h->dctf.sub16x16_dct8( h->mb.pic.fenc_dct8, h->mb.pic.p_fenc[0], (pixel*)x264_zero );
|
||||
if( do_both_dct || !h->mb.b_transform_8x8 )
|
||||
h->dctf.sub16x16_dct( h->mb.pic.fenc_dct4, h->mb.pic.p_fenc[0], zero );
|
||||
h->dctf.sub16x16_dct( h->mb.pic.fenc_dct4, h->mb.pic.p_fenc[0], (pixel*)x264_zero );
|
||||
}
|
||||
|
||||
/* Reset fenc satd scores cache for psy RD */
|
||||
|
@ -633,7 +633,6 @@ void x264_me_search_ref( x264_t *h, x264_me_t *m, int16_t (*mvc)[2], int i_mvc,
|
||||
/* successive elimination by comparing DC before a full SAD,
|
||||
* because sum(abs(diff)) >= abs(diff(sum)). */
|
||||
uint16_t *sums_base = m->integral;
|
||||
ALIGNED_16( static pixel zero[8*FENC_STRIDE] ) = {0};
|
||||
ALIGNED_ARRAY_16( int, enc_dc,[4] );
|
||||
int sad_size = i_pixel <= PIXEL_8x8 ? PIXEL_8x8 : PIXEL_4x4;
|
||||
int delta = x264_pixel_size[sad_size].w;
|
||||
@ -641,7 +640,7 @@ void x264_me_search_ref( x264_t *h, x264_me_t *m, int16_t (*mvc)[2], int i_mvc,
|
||||
int xn;
|
||||
uint16_t *cost_fpel_mvx = h->cost_mv_fpel[h->mb.i_qp][-m->mvp[0]&3] + (-m->mvp[0]>>2);
|
||||
|
||||
h->pixf.sad_x4[sad_size]( zero, p_fenc, p_fenc+delta,
|
||||
h->pixf.sad_x4[sad_size]( (pixel*)x264_zero, p_fenc, p_fenc+delta,
|
||||
p_fenc+delta*FENC_STRIDE, p_fenc+delta+delta*FENC_STRIDE,
|
||||
FENC_STRIDE, enc_dc );
|
||||
if( delta == 4 )
|
||||
|
@ -96,7 +96,6 @@ static ALWAYS_INLINE int cached_satd( x264_t *h, int size, int x, int y )
|
||||
static const uint8_t satd_shift_x[3] = {3, 2, 2};
|
||||
static const uint8_t satd_shift_y[3] = {2-1, 3-2, 2-2};
|
||||
static const uint8_t satd_offset[3] = {0, 8, 16};
|
||||
ALIGNED_16( static pixel zero[16] ) = {0};
|
||||
int cache_index = (x >> satd_shift_x[size - PIXEL_8x4]) + (y >> satd_shift_y[size - PIXEL_8x4])
|
||||
+ satd_offset[size - PIXEL_8x4];
|
||||
int res = h->mb.pic.fenc_satd_cache[cache_index];
|
||||
@ -105,8 +104,8 @@ static ALWAYS_INLINE int cached_satd( x264_t *h, int size, int x, int y )
|
||||
else
|
||||
{
|
||||
pixel *fenc = h->mb.pic.p_fenc[0] + x + y*FENC_STRIDE;
|
||||
int dc = h->pixf.sad[size]( fenc, FENC_STRIDE, zero, 0 ) >> 1;
|
||||
res = h->pixf.satd[size]( fenc, FENC_STRIDE, zero, 0 ) - dc;
|
||||
int dc = h->pixf.sad[size]( fenc, FENC_STRIDE, (pixel*)x264_zero, 0 ) >> 1;
|
||||
res = h->pixf.satd[size]( fenc, FENC_STRIDE, (pixel*)x264_zero, 0 ) - dc;
|
||||
h->mb.pic.fenc_satd_cache[cache_index] = res + 1;
|
||||
return res;
|
||||
}
|
||||
@ -123,7 +122,6 @@ static ALWAYS_INLINE int cached_satd( x264_t *h, int size, int x, int y )
|
||||
|
||||
static inline int ssd_plane( x264_t *h, int size, int p, int x, int y )
|
||||
{
|
||||
ALIGNED_16( static pixel zero[16] ) = {0};
|
||||
int satd = 0;
|
||||
pixel *fdec = h->mb.pic.p_fdec[p] + x + y*FDEC_STRIDE;
|
||||
pixel *fenc = h->mb.pic.p_fenc[p] + x + y*FENC_STRIDE;
|
||||
@ -140,8 +138,8 @@ static inline int ssd_plane( x264_t *h, int size, int p, int x, int y )
|
||||
}
|
||||
else
|
||||
{
|
||||
int dc = h->pixf.sad[size]( fdec, FDEC_STRIDE, zero, 0 ) >> 1;
|
||||
satd = abs(h->pixf.satd[size]( fdec, FDEC_STRIDE, zero, 0 ) - dc - cached_satd( h, size, x, y ));
|
||||
int dc = h->pixf.sad[size]( fdec, FDEC_STRIDE, (pixel*)x264_zero, 0 ) >> 1;
|
||||
satd = abs(h->pixf.satd[size]( fdec, FDEC_STRIDE, (pixel*)x264_zero, 0 ) - dc - cached_satd( h, size, x, y ));
|
||||
}
|
||||
satd = (satd * h->mb.i_psy_rd * h->mb.i_psy_rd_lambda + 128) >> 8;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user