From 65d5d5865845f057cc6530a8d0f34db952d9009c Mon Sep 17 00:00:00 2001 From: Diego Biurrun Date: Mon, 23 Dec 2013 19:48:43 +0100 Subject: [PATCH] dsputil: Move SVQ1 encoding specific bits into svq1enc --- libavcodec/dsputil.c | 12 ----- libavcodec/dsputil.h | 3 -- libavcodec/ppc/Makefile | 1 + libavcodec/ppc/int_altivec.c | 44 ------------------ libavcodec/ppc/svq1enc_altivec.c | 80 ++++++++++++++++++++++++++++++++ libavcodec/svq1enc.c | 58 ++++++++--------------- libavcodec/svq1enc.h | 78 +++++++++++++++++++++++++++++++ libavcodec/x86/Makefile | 1 + libavcodec/x86/dsputilenc_mmx.c | 36 -------------- libavcodec/x86/svq1enc_mmx.c | 73 +++++++++++++++++++++++++++++ 10 files changed, 252 insertions(+), 134 deletions(-) create mode 100644 libavcodec/ppc/svq1enc_altivec.c create mode 100644 libavcodec/svq1enc.h create mode 100644 libavcodec/x86/svq1enc_mmx.c diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c index ab0206b351..e26d27a2f3 100644 --- a/libavcodec/dsputil.c +++ b/libavcodec/dsputil.c @@ -2099,16 +2099,6 @@ static int vsse16_c(MpegEncContext *c, uint8_t *s1, uint8_t *s2, return score; } -static int ssd_int8_vs_int16_c(const int8_t *pix1, const int16_t *pix2, - int size) -{ - int score = 0, i; - - for (i = 0; i < size; i++) - score += (pix1[i] - pix2[i]) * (pix1[i] - pix2[i]); - return score; -} - #define WRAPPER8_16_SQ(name8, name16) \ static int name16(MpegEncContext *s, uint8_t *dst, uint8_t *src, \ int stride, int h) \ @@ -2430,8 +2420,6 @@ av_cold void ff_dsputil_init(DSPContext *c, AVCodecContext *avctx) c->nsse[0] = nsse16_c; c->nsse[1] = nsse8_c; - c->ssd_int8_vs_int16 = ssd_int8_vs_int16_c; - c->bswap_buf = bswap_buf; c->bswap16_buf = bswap16_buf; diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h index 2cfbd550d8..a4a9f87677 100644 --- a/libavcodec/dsputil.h +++ b/libavcodec/dsputil.h @@ -175,9 +175,6 @@ typedef struct DSPContext { me_cmp_func ildct_cmp[6]; // only width 16 used me_cmp_func frame_skip_cmp[6]; // only width 8 used - int (*ssd_int8_vs_int16)(const int8_t *pix1, const int16_t *pix2, - int size); - qpel_mc_func put_qpel_pixels_tab[2][16]; qpel_mc_func avg_qpel_pixels_tab[2][16]; qpel_mc_func put_no_rnd_qpel_pixels_tab[2][16]; diff --git a/libavcodec/ppc/Makefile b/libavcodec/ppc/Makefile index 07806661f4..ec0674c817 100644 --- a/libavcodec/ppc/Makefile +++ b/libavcodec/ppc/Makefile @@ -12,6 +12,7 @@ OBJS-$(CONFIG_MPEGVIDEO) += ppc/mpegvideo_altivec.o OBJS-$(CONFIG_VIDEODSP) += ppc/videodsp_ppc.o OBJS-$(CONFIG_VP3DSP) += ppc/vp3dsp_altivec.o +OBJS-$(CONFIG_SVQ1_ENCODER) += ppc/svq1enc_altivec.o OBJS-$(CONFIG_VC1_DECODER) += ppc/vc1dsp_altivec.o OBJS-$(CONFIG_VORBIS_DECODER) += ppc/vorbisdsp_altivec.o OBJS-$(CONFIG_VP7_DECODER) += ppc/vp8dsp_altivec.o diff --git a/libavcodec/ppc/int_altivec.c b/libavcodec/ppc/int_altivec.c index cd1984a54c..fa3cb66095 100644 --- a/libavcodec/ppc/int_altivec.c +++ b/libavcodec/ppc/int_altivec.c @@ -34,48 +34,6 @@ #include "libavcodec/dsputil.h" #include "dsputil_altivec.h" -static int ssd_int8_vs_int16_altivec(const int8_t *pix1, const int16_t *pix2, - int size) -{ - int i, size16 = size >> 4; - vector signed char vpix1; - vector signed short vpix2, vdiff, vpix1l, vpix1h; - union { - vector signed int vscore; - int32_t score[4]; - } u = { .vscore = vec_splat_s32(0) }; - -// XXX lazy way, fix it later - - while (size16) { - // score += (pix1[i] - pix2[i]) * (pix1[i] - pix2[i]); - // load pix1 and the first batch of pix2 - - vpix1 = vec_unaligned_load(pix1); - vpix2 = vec_unaligned_load(pix2); - pix2 += 8; - // unpack - vpix1h = vec_unpackh(vpix1); - vdiff = vec_sub(vpix1h, vpix2); - vpix1l = vec_unpackl(vpix1); - // load another batch from pix2 - vpix2 = vec_unaligned_load(pix2); - u.vscore = vec_msum(vdiff, vdiff, u.vscore); - vdiff = vec_sub(vpix1l, vpix2); - u.vscore = vec_msum(vdiff, vdiff, u.vscore); - pix1 += 16; - pix2 += 8; - size16--; - } - u.vscore = vec_sums(u.vscore, vec_splat_s32(0)); - - size %= 16; - for (i = 0; i < size; i++) - u.score[3] += (pix1[i] - pix2[i]) * (pix1[i] - pix2[i]); - - return u.score[3]; -} - static int32_t scalarproduct_int16_altivec(const int16_t *v1, const int16_t *v2, int order) { @@ -140,8 +98,6 @@ static int32_t scalarproduct_and_madd_int16_altivec(int16_t *v1, av_cold void ff_int_init_altivec(DSPContext *c, AVCodecContext *avctx) { - c->ssd_int8_vs_int16 = ssd_int8_vs_int16_altivec; - c->scalarproduct_int16 = scalarproduct_int16_altivec; c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_altivec; diff --git a/libavcodec/ppc/svq1enc_altivec.c b/libavcodec/ppc/svq1enc_altivec.c new file mode 100644 index 0000000000..005239f430 --- /dev/null +++ b/libavcodec/ppc/svq1enc_altivec.c @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2007 Luca Barbato + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include + +#include "config.h" +#if HAVE_ALTIVEC_H +#include +#endif + +#include "libavutil/attributes.h" +#include "libavutil/ppc/types_altivec.h" +#include "libavutil/ppc/util_altivec.h" +#include "libavcodec/svq1enc.h" + +#if HAVE_ALTIVEC +static int ssd_int8_vs_int16_altivec(const int8_t *pix1, const int16_t *pix2, + int size) +{ + int i, size16 = size >> 4; + vector signed char vpix1; + vector signed short vpix2, vdiff, vpix1l, vpix1h; + union { + vector signed int vscore; + int32_t score[4]; + } u = { .vscore = vec_splat_s32(0) }; + + while (size16) { + // score += (pix1[i] - pix2[i]) * (pix1[i] - pix2[i]); + // load pix1 and the first batch of pix2 + + vpix1 = vec_unaligned_load(pix1); + vpix2 = vec_unaligned_load(pix2); + pix2 += 8; + // unpack + vpix1h = vec_unpackh(vpix1); + vdiff = vec_sub(vpix1h, vpix2); + vpix1l = vec_unpackl(vpix1); + // load another batch from pix2 + vpix2 = vec_unaligned_load(pix2); + u.vscore = vec_msum(vdiff, vdiff, u.vscore); + vdiff = vec_sub(vpix1l, vpix2); + u.vscore = vec_msum(vdiff, vdiff, u.vscore); + pix1 += 16; + pix2 += 8; + size16--; + } + u.vscore = vec_sums(u.vscore, vec_splat_s32(0)); + + size %= 16; + for (i = 0; i < size; i++) + u.score[3] += (pix1[i] - pix2[i]) * (pix1[i] - pix2[i]); + + return u.score[3]; +} +#endif /* HAVE_ALTIVEC */ + +av_cold void ff_svq1enc_init_ppc(SVQ1EncContext *c) +{ +#if HAVE_ALTIVEC + c->ssd_int8_vs_int16 = ssd_int8_vs_int16_altivec; +#endif /* HAVE_ALTIVEC */ +} diff --git a/libavcodec/svq1enc.c b/libavcodec/svq1enc.c index 76c3e6e7ab..bdb6f0fd54 100644 --- a/libavcodec/svq1enc.c +++ b/libavcodec/svq1enc.c @@ -34,49 +34,12 @@ #include "internal.h" #include "mpegutils.h" #include "svq1.h" +#include "svq1enc.h" #include "svq1enc_cb.h" #undef NDEBUG #include -typedef struct SVQ1EncContext { - /* FIXME: Needed for motion estimation, should not be used for anything - * else, the idea is to make the motion estimation eventually independent - * of MpegEncContext, so this will be removed then. */ - MpegEncContext m; - AVCodecContext *avctx; - DSPContext dsp; - HpelDSPContext hdsp; - AVFrame *current_picture; - AVFrame *last_picture; - PutBitContext pb; - GetBitContext gb; - - /* why ooh why this sick breadth first order, - * everything is slower and more complex */ - PutBitContext reorder_pb[6]; - - int frame_width; - int frame_height; - - /* Y plane block dimensions */ - int y_block_width; - int y_block_height; - - /* U & V plane (C planes) block dimensions */ - int c_block_width; - int c_block_height; - - uint16_t *mb_type; - uint32_t *dummy; - int16_t (*motion_val8[3])[2]; - int16_t (*motion_val16[3])[2]; - - int64_t rd_total; - - uint8_t *scratchbuf; -} SVQ1EncContext; - static void svq1_write_header(SVQ1EncContext *s, int frame_type) { int i; @@ -114,6 +77,16 @@ static void svq1_write_header(SVQ1EncContext *s, int frame_type) #define QUALITY_THRESHOLD 100 #define THRESHOLD_MULTIPLIER 0.6 +static int ssd_int8_vs_int16_c(const int8_t *pix1, const int16_t *pix2, + int size) +{ + int score = 0, i; + + for (i = 0; i < size; i++) + score += (pix1[i] - pix2[i]) * (pix1[i] - pix2[i]); + return score; +} + static int encode_block(SVQ1EncContext *s, uint8_t *src, uint8_t *ref, uint8_t *decoded, int stride, int level, int threshold, int lambda, int intra) @@ -175,7 +148,7 @@ static int encode_block(SVQ1EncContext *s, uint8_t *src, uint8_t *ref, int sqr, diff, score; vector = codebook + stage * size * 16 + i * size; - sqr = s->dsp.ssd_int8_vs_int16(vector, block[stage], size); + sqr = s->ssd_int8_vs_int16(vector, block[stage], size); diff = block_sum[stage] - sum; score = sqr - (diff * (int64_t)diff >> (level + 3)); // FIXME: 64bit slooow if (score < best_vector_score) { @@ -574,6 +547,13 @@ static av_cold int svq1_encode_init(AVCodecContext *avctx) s->y_block_height * sizeof(int16_t)); s->dummy = av_mallocz((s->y_block_width + 1) * s->y_block_height * sizeof(int32_t)); + s->ssd_int8_vs_int16 = ssd_int8_vs_int16_c; + + if (ARCH_PPC) + ff_svq1enc_init_ppc(s); + if (ARCH_X86) + ff_svq1enc_init_x86(s); + ff_h263_encode_init(&s->m); // mv_penalty return 0; diff --git a/libavcodec/svq1enc.h b/libavcodec/svq1enc.h new file mode 100644 index 0000000000..1fe2815f19 --- /dev/null +++ b/libavcodec/svq1enc.h @@ -0,0 +1,78 @@ +/* + * SVQ1 encoder + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_SVQ1ENC_H +#define AVCODEC_SVQ1ENC_H + +#include + +#include "libavutil/frame.h" +#include "avcodec.h" +#include "dsputil.h" +#include "get_bits.h" +#include "hpeldsp.h" +#include "mpegvideo.h" +#include "put_bits.h" + +typedef struct SVQ1EncContext { + /* FIXME: Needed for motion estimation, should not be used for anything + * else, the idea is to make the motion estimation eventually independent + * of MpegEncContext, so this will be removed then. */ + MpegEncContext m; + AVCodecContext *avctx; + DSPContext dsp; + HpelDSPContext hdsp; + AVFrame *current_picture; + AVFrame *last_picture; + PutBitContext pb; + GetBitContext gb; + + /* why ooh why this sick breadth first order, + * everything is slower and more complex */ + PutBitContext reorder_pb[6]; + + int frame_width; + int frame_height; + + /* Y plane block dimensions */ + int y_block_width; + int y_block_height; + + /* U & V plane (C planes) block dimensions */ + int c_block_width; + int c_block_height; + + uint16_t *mb_type; + uint32_t *dummy; + int16_t (*motion_val8[3])[2]; + int16_t (*motion_val16[3])[2]; + + int64_t rd_total; + + uint8_t *scratchbuf; + + int (*ssd_int8_vs_int16)(const int8_t *pix1, const int16_t *pix2, + int size); +} SVQ1EncContext; + +void ff_svq1enc_init_ppc(SVQ1EncContext *c); +void ff_svq1enc_init_x86(SVQ1EncContext *c); + +#endif /* AVCODEC_SVQ1ENC_H */ diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile index fef98a5e40..8830a22a8f 100644 --- a/libavcodec/x86/Makefile +++ b/libavcodec/x86/Makefile @@ -51,6 +51,7 @@ MMX-OBJS-$(CONFIG_HPELDSP) += x86/fpel_mmx.o \ x86/hpeldsp_mmx.o MMX-OBJS-$(CONFIG_HUFFYUVDSP) += x86/huffyuvdsp_mmx.o +MMX-OBJS-$(CONFIG_SVQ1_ENCODER) += x86/svq1enc_mmx.o MMX-OBJS-$(CONFIG_VC1_DECODER) += x86/vc1dsp_mmx.o YASM-OBJS += x86/deinterlace.o \ diff --git a/libavcodec/x86/dsputilenc_mmx.c b/libavcodec/x86/dsputilenc_mmx.c index 79066a74e5..81c9d137f2 100644 --- a/libavcodec/x86/dsputilenc_mmx.c +++ b/libavcodec/x86/dsputilenc_mmx.c @@ -805,40 +805,6 @@ DCT_SAD_FUNC(ssse3) #undef HSUM #undef DCT_SAD -static int ssd_int8_vs_int16_mmx(const int8_t *pix1, const int16_t *pix2, - int size) -{ - int sum; - x86_reg i = size; - - __asm__ volatile ( - "pxor %%mm4, %%mm4 \n" - "1: \n" - "sub $8, %0 \n" - "movq (%2, %0), %%mm2 \n" - "movq (%3, %0, 2), %%mm0 \n" - "movq 8(%3, %0, 2), %%mm1 \n" - "punpckhbw %%mm2, %%mm3 \n" - "punpcklbw %%mm2, %%mm2 \n" - "psraw $8, %%mm3 \n" - "psraw $8, %%mm2 \n" - "psubw %%mm3, %%mm1 \n" - "psubw %%mm2, %%mm0 \n" - "pmaddwd %%mm1, %%mm1 \n" - "pmaddwd %%mm0, %%mm0 \n" - "paddd %%mm1, %%mm4 \n" - "paddd %%mm0, %%mm4 \n" - "jg 1b \n" - "movq %%mm4, %%mm3 \n" - "psrlq $32, %%mm3 \n" - "paddd %%mm3, %%mm4 \n" - "movd %%mm4, %1 \n" - : "+r" (i), "=r" (sum) - : "r" (pix1), "r" (pix2)); - - return sum; -} - #define PHADDD(a, t) \ "movq " #a ", " #t " \n\t" \ "psrlq $32, " #a " \n\t" \ @@ -958,8 +924,6 @@ av_cold void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx, c->try_8x8basis = try_8x8basis_mmx; } c->add_8x8basis = add_8x8basis_mmx; - - c->ssd_int8_vs_int16 = ssd_int8_vs_int16_mmx; } if (INLINE_AMD3DNOW(cpu_flags)) { diff --git a/libavcodec/x86/svq1enc_mmx.c b/libavcodec/x86/svq1enc_mmx.c new file mode 100644 index 0000000000..02b0a84b8c --- /dev/null +++ b/libavcodec/x86/svq1enc_mmx.c @@ -0,0 +1,73 @@ +/* + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "config.h" +#include "libavutil/attributes.h" +#include "libavutil/cpu.h" +#include "libavutil/x86/asm.h" +#include "libavutil/x86/cpu.h" +#include "libavcodec/svq1enc.h" + +#if HAVE_INLINE_ASM + +static int ssd_int8_vs_int16_mmx(const int8_t *pix1, const int16_t *pix2, + int size) +{ + int sum; + x86_reg i = size; + + __asm__ volatile ( + "pxor %%mm4, %%mm4 \n" + "1: \n" + "sub $8, %0 \n" + "movq (%2, %0), %%mm2 \n" + "movq (%3, %0, 2), %%mm0 \n" + "movq 8(%3, %0, 2), %%mm1 \n" + "punpckhbw %%mm2, %%mm3 \n" + "punpcklbw %%mm2, %%mm2 \n" + "psraw $8, %%mm3 \n" + "psraw $8, %%mm2 \n" + "psubw %%mm3, %%mm1 \n" + "psubw %%mm2, %%mm0 \n" + "pmaddwd %%mm1, %%mm1 \n" + "pmaddwd %%mm0, %%mm0 \n" + "paddd %%mm1, %%mm4 \n" + "paddd %%mm0, %%mm4 \n" + "jg 1b \n" + "movq %%mm4, %%mm3 \n" + "psrlq $32, %%mm3 \n" + "paddd %%mm3, %%mm4 \n" + "movd %%mm4, %1 \n" + : "+r" (i), "=r" (sum) + : "r" (pix1), "r" (pix2)); + + return sum; +} + +#endif /* HAVE_INLINE_ASM */ + +av_cold void ff_svq1enc_init_x86(SVQ1EncContext *c) +{ +#if HAVE_INLINE_ASM + int cpu_flags = av_get_cpu_flags(); + + if (INLINE_MMX(cpu_flags)) { + c->ssd_int8_vs_int16 = ssd_int8_vs_int16_mmx; + } +#endif /* HAVE_INLINE_ASM */ +}