mirror of
https://git.videolan.org/git/ffmpeg.git
synced 2024-08-21 00:35:05 +02:00
ARM: NEON 2xN chroma MC
Originally committed as revision 20696 to svn://svn.ffmpeg.org/ffmpeg/trunk
This commit is contained in:
parent
04e7f6d2d0
commit
1025d19dd7
@ -125,9 +125,11 @@ void ff_avg_h264_qpel8_mc33_neon(uint8_t *, uint8_t *, int);
|
|||||||
|
|
||||||
void ff_put_h264_chroma_mc8_neon(uint8_t *, uint8_t *, int, int, int, int);
|
void ff_put_h264_chroma_mc8_neon(uint8_t *, uint8_t *, int, int, int, int);
|
||||||
void ff_put_h264_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int);
|
void ff_put_h264_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int);
|
||||||
|
void ff_put_h264_chroma_mc2_neon(uint8_t *, uint8_t *, int, int, int, int);
|
||||||
|
|
||||||
void ff_avg_h264_chroma_mc8_neon(uint8_t *, uint8_t *, int, int, int, int);
|
void ff_avg_h264_chroma_mc8_neon(uint8_t *, uint8_t *, int, int, int, int);
|
||||||
void ff_avg_h264_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int);
|
void ff_avg_h264_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int);
|
||||||
|
void ff_avg_h264_chroma_mc2_neon(uint8_t *, uint8_t *, int, int, int, int);
|
||||||
|
|
||||||
void ff_h264_v_loop_filter_luma_neon(uint8_t *pix, int stride, int alpha,
|
void ff_h264_v_loop_filter_luma_neon(uint8_t *pix, int stride, int alpha,
|
||||||
int beta, int8_t *tc0);
|
int beta, int8_t *tc0);
|
||||||
@ -272,9 +274,11 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
|
|||||||
if (CONFIG_H264_DECODER) {
|
if (CONFIG_H264_DECODER) {
|
||||||
c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_neon;
|
c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_neon;
|
||||||
c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_neon;
|
c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_neon;
|
||||||
|
c->put_h264_chroma_pixels_tab[2] = ff_put_h264_chroma_mc2_neon;
|
||||||
|
|
||||||
c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_neon;
|
c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_neon;
|
||||||
c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_neon;
|
c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_neon;
|
||||||
|
c->avg_h264_chroma_pixels_tab[2] = ff_avg_h264_chroma_mc2_neon;
|
||||||
|
|
||||||
c->put_h264_qpel_pixels_tab[0][ 0] = ff_put_h264_qpel16_mc00_neon;
|
c->put_h264_qpel_pixels_tab[0][ 0] = ff_put_h264_qpel16_mc00_neon;
|
||||||
c->put_h264_qpel_pixels_tab[0][ 1] = ff_put_h264_qpel16_mc10_neon;
|
c->put_h264_qpel_pixels_tab[0][ 1] = ff_put_h264_qpel16_mc10_neon;
|
||||||
|
@ -320,6 +320,74 @@ function ff_\type\()_h264_chroma_mc4_neon, export=1
|
|||||||
.endfunc
|
.endfunc
|
||||||
.endm
|
.endm
|
||||||
|
|
||||||
|
.macro h264_chroma_mc2 type
|
||||||
|
function ff_\type\()_h264_chroma_mc2_neon, export=1
|
||||||
|
push {r4-r6, lr}
|
||||||
|
ldr r4, [sp, #16]
|
||||||
|
ldr lr, [sp, #20]
|
||||||
|
pld [r1]
|
||||||
|
pld [r1, r2]
|
||||||
|
orrs r5, r4, lr
|
||||||
|
beq 2f
|
||||||
|
|
||||||
|
mul r5, r4, lr
|
||||||
|
rsb r6, r5, lr, lsl #3
|
||||||
|
rsb r12, r5, r4, lsl #3
|
||||||
|
sub r4, r5, r4, lsl #3
|
||||||
|
sub r4, r4, lr, lsl #3
|
||||||
|
add r4, r4, #64
|
||||||
|
vdup.8 d0, r4
|
||||||
|
vdup.8 d2, r12
|
||||||
|
vdup.8 d1, r6
|
||||||
|
vdup.8 d3, r5
|
||||||
|
vtrn.16 q0, q1
|
||||||
|
1:
|
||||||
|
vld1.32 {d4[0]}, [r1], r2
|
||||||
|
vld1.32 {d4[1]}, [r1], r2
|
||||||
|
vrev64.32 d5, d4
|
||||||
|
vld1.32 {d5[1]}, [r1]
|
||||||
|
vext.8 q3, q2, q2, #1
|
||||||
|
vtrn.16 q2, q3
|
||||||
|
vmull.u8 q8, d4, d0
|
||||||
|
vmlal.u8 q8, d5, d1
|
||||||
|
.ifc \type,avg
|
||||||
|
vld1.16 {d18[0]}, [r0,:16], r2
|
||||||
|
vld1.16 {d18[1]}, [r0,:16]
|
||||||
|
sub r0, r0, r2
|
||||||
|
.endif
|
||||||
|
vtrn.32 d16, d17
|
||||||
|
vadd.i16 d16, d16, d17
|
||||||
|
vrshrn.u16 d16, q8, #6
|
||||||
|
.ifc \type,avg
|
||||||
|
vrhadd.u8 d16, d16, d18
|
||||||
|
.endif
|
||||||
|
vst1.16 {d16[0]}, [r0,:16], r2
|
||||||
|
vst1.16 {d16[1]}, [r0,:16], r2
|
||||||
|
subs r3, r3, #2
|
||||||
|
bgt 1b
|
||||||
|
pop {r4-r6, pc}
|
||||||
|
2:
|
||||||
|
.ifc \type,put
|
||||||
|
ldrh r5, [r1], r2
|
||||||
|
strh r5, [r0], r2
|
||||||
|
ldrh r6, [r1], r2
|
||||||
|
strh r6, [r0], r2
|
||||||
|
.else
|
||||||
|
vld1.16 {d16[0]}, [r1], r2
|
||||||
|
vld1.16 {d16[1]}, [r1], r2
|
||||||
|
vld1.16 {d18[0]}, [r0,:16], r2
|
||||||
|
vld1.16 {d18[1]}, [r0,:16]
|
||||||
|
sub r0, r0, r2
|
||||||
|
vrhadd.u8 d16, d16, d18
|
||||||
|
vst1.16 {d16[0]}, [r0,:16], r2
|
||||||
|
vst1.16 {d16[1]}, [r0,:16], r2
|
||||||
|
.endif
|
||||||
|
subs r3, r3, #2
|
||||||
|
bgt 2b
|
||||||
|
pop {r4-r6, pc}
|
||||||
|
.endfunc
|
||||||
|
.endm
|
||||||
|
|
||||||
.text
|
.text
|
||||||
.align
|
.align
|
||||||
|
|
||||||
@ -327,6 +395,8 @@ function ff_\type\()_h264_chroma_mc4_neon, export=1
|
|||||||
h264_chroma_mc8 avg
|
h264_chroma_mc8 avg
|
||||||
h264_chroma_mc4 put
|
h264_chroma_mc4 put
|
||||||
h264_chroma_mc4 avg
|
h264_chroma_mc4 avg
|
||||||
|
h264_chroma_mc2 put
|
||||||
|
h264_chroma_mc2 avg
|
||||||
|
|
||||||
/* H.264 loop filter */
|
/* H.264 loop filter */
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user