Utility functions (CRC calc & float->int converters)

[imported from MPlayer, based on a52dec's libao]

Originally committed as revision 1780 to svn://svn.ffmpeg.org/ffmpeg/trunk
This commit is contained in:
Arpi 2003-04-16 20:03:07 +00:00
parent 6814a25c67
commit 1a7c3c8562
7 changed files with 858 additions and 0 deletions

View File

@ -118,6 +118,8 @@ void a52_upmix (sample_t * samples, int acmod, int output);
void a52_imdct_init (uint32_t mm_accel);
void a52_imdct_256 (sample_t * data, sample_t * delay, sample_t bias);
void a52_imdct_512 (sample_t * data, sample_t * delay, sample_t bias);
//extern void (* a52_imdct_256) (sample_t data[], sample_t delay[], sample_t bias);
//extern void (* a52_imdct_512) (sample_t data[], sample_t delay[], sample_t bias);
#define ROUND(x) ((int)((x) + ((x) > 0 ? 0.5 : -0.5)))

View File

@ -0,0 +1,32 @@
/*
* a52_util.h
* Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
* Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
*
* This file is part of a52dec, a free ATSC A-52 stream decoder.
* See http://liba52.sourceforge.net/ for updates.
*
* a52dec is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* a52dec is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef A52_UTIL_H
#define A52_UTIL_H
uint16_t a52_crc16_block(uint8_t *data,uint32_t num_bytes);
void* a52_resample_init(uint32_t mm_accel,int flags,int chans);
extern int (* a52_resample) (float * _f, int16_t * s16);
#endif /* A52_H */

73
libavcodec/liba52/crc.c Normal file
View File

@ -0,0 +1,73 @@
/*
* crc.c
*
* Copyright (C) Aaron Holtzman - May 1999
*
* This file is part of ac3dec, a free Dolby AC-3 stream decoder.
*
* ac3dec is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* ac3dec is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU Make; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
*
*/
#include <stdlib.h>
#include <stdio.h>
#include <inttypes.h>
static const uint16_t crc_lut[256] =
{
0x0000,0x8005,0x800f,0x000a,0x801b,0x001e,0x0014,0x8011,
0x8033,0x0036,0x003c,0x8039,0x0028,0x802d,0x8027,0x0022,
0x8063,0x0066,0x006c,0x8069,0x0078,0x807d,0x8077,0x0072,
0x0050,0x8055,0x805f,0x005a,0x804b,0x004e,0x0044,0x8041,
0x80c3,0x00c6,0x00cc,0x80c9,0x00d8,0x80dd,0x80d7,0x00d2,
0x00f0,0x80f5,0x80ff,0x00fa,0x80eb,0x00ee,0x00e4,0x80e1,
0x00a0,0x80a5,0x80af,0x00aa,0x80bb,0x00be,0x00b4,0x80b1,
0x8093,0x0096,0x009c,0x8099,0x0088,0x808d,0x8087,0x0082,
0x8183,0x0186,0x018c,0x8189,0x0198,0x819d,0x8197,0x0192,
0x01b0,0x81b5,0x81bf,0x01ba,0x81ab,0x01ae,0x01a4,0x81a1,
0x01e0,0x81e5,0x81ef,0x01ea,0x81fb,0x01fe,0x01f4,0x81f1,
0x81d3,0x01d6,0x01dc,0x81d9,0x01c8,0x81cd,0x81c7,0x01c2,
0x0140,0x8145,0x814f,0x014a,0x815b,0x015e,0x0154,0x8151,
0x8173,0x0176,0x017c,0x8179,0x0168,0x816d,0x8167,0x0162,
0x8123,0x0126,0x012c,0x8129,0x0138,0x813d,0x8137,0x0132,
0x0110,0x8115,0x811f,0x011a,0x810b,0x010e,0x0104,0x8101,
0x8303,0x0306,0x030c,0x8309,0x0318,0x831d,0x8317,0x0312,
0x0330,0x8335,0x833f,0x033a,0x832b,0x032e,0x0324,0x8321,
0x0360,0x8365,0x836f,0x036a,0x837b,0x037e,0x0374,0x8371,
0x8353,0x0356,0x035c,0x8359,0x0348,0x834d,0x8347,0x0342,
0x03c0,0x83c5,0x83cf,0x03ca,0x83db,0x03de,0x03d4,0x83d1,
0x83f3,0x03f6,0x03fc,0x83f9,0x03e8,0x83ed,0x83e7,0x03e2,
0x83a3,0x03a6,0x03ac,0x83a9,0x03b8,0x83bd,0x83b7,0x03b2,
0x0390,0x8395,0x839f,0x039a,0x838b,0x038e,0x0384,0x8381,
0x0280,0x8285,0x828f,0x028a,0x829b,0x029e,0x0294,0x8291,
0x82b3,0x02b6,0x02bc,0x82b9,0x02a8,0x82ad,0x82a7,0x02a2,
0x82e3,0x02e6,0x02ec,0x82e9,0x02f8,0x82fd,0x82f7,0x02f2,
0x02d0,0x82d5,0x82df,0x02da,0x82cb,0x02ce,0x02c4,0x82c1,
0x8243,0x0246,0x024c,0x8249,0x0258,0x825d,0x8257,0x0252,
0x0270,0x8275,0x827f,0x027a,0x826b,0x026e,0x0264,0x8261,
0x0220,0x8225,0x822f,0x022a,0x823b,0x023e,0x0234,0x8231,
0x8213,0x0216,0x021c,0x8219,0x0208,0x820d,0x8207,0x0202
};
uint16_t a52_crc16_block(uint8_t *data,uint32_t num_bytes)
{
uint32_t i;
uint16_t state=0;
for(i=0;i<num_bytes;i++)
state = crc_lut[data[i] ^ (state>>8)] ^ (state<<8);
return state;
}

View File

@ -31,6 +31,11 @@
#define MM_ACCEL_X86_MMX 0x80000000
#define MM_ACCEL_X86_3DNOW 0x40000000
#define MM_ACCEL_X86_MMXEXT 0x20000000
#define MM_ACCEL_X86_SSE 0x10000000
#define MM_ACCEL_X86_3DNOWEXT 0x08000000
/* PPC accelerations */
#define MM_ACCEL_PPC_ALTIVEC 0x00010000
uint32_t mm_accel (void);

View File

@ -0,0 +1,45 @@
// a52_resample_init should find the requested converter (from type flags ->
// given number of channels) and set up some function pointers...
// a52_resample() should do the conversion.
#include <inttypes.h>
#include <stdio.h>
#include "a52.h"
#include "mm_accel.h"
#include "config.h"
#include "../libpostproc/mangle.h"
int (* a52_resample) (float * _f, int16_t * s16)=NULL;
#include "resample_c.c"
#ifdef ARCH_X86
#include "resample_mmx.c"
#endif
void* a52_resample_init(uint32_t mm_accel,int flags,int chans){
void* tmp;
#ifdef ARCH_X86
if(mm_accel&MM_ACCEL_X86_MMX){
tmp=a52_resample_MMX(flags,chans);
if(tmp){
if(a52_resample==NULL) fprintf(stderr, "Using MMX optimized resampler\n");
a52_resample=tmp;
return tmp;
}
}
#endif
tmp=a52_resample_C(flags,chans);
if(tmp){
if(a52_resample==NULL) fprintf(stderr, "No accelerated resampler found\n");
a52_resample=tmp;
return tmp;
}
fprintf(stderr, "Unimplemented resampler for mode 0x%X -> %d channels conversion - Contact MPlayer developers!\n", flags, chans);
return NULL;
}

View File

@ -0,0 +1,183 @@
// this code is based on a52dec/libao/audio_out_oss.c
static inline int16_t convert (int32_t i)
{
if (i > 0x43c07fff)
return 32767;
else if (i < 0x43bf8000)
return -32768;
else
return i - 0x43c00000;
}
static int a52_resample_MONO_to_5_C(float * _f, int16_t * s16){
int i;
int32_t * f = (int32_t *) _f;
for (i = 0; i < 256; i++) {
s16[5*i] = s16[5*i+1] = s16[5*i+2] = s16[5*i+3] = 0;
s16[5*i+4] = convert (f[i]);
}
return 5*256;
}
static int a52_resample_MONO_to_1_C(float * _f, int16_t * s16){
int i;
int32_t * f = (int32_t *) _f;
for (i = 0; i < 256; i++) {
s16[i] = convert (f[i]);
}
return 1*256;
}
static int a52_resample_STEREO_to_2_C(float * _f, int16_t * s16){
int i;
int32_t * f = (int32_t *) _f;
for (i = 0; i < 256; i++) {
s16[2*i] = convert (f[i]);
s16[2*i+1] = convert (f[i+256]);
}
return 2*256;
}
static int a52_resample_3F_to_5_C(float * _f, int16_t * s16){
int i;
int32_t * f = (int32_t *) _f;
for (i = 0; i < 256; i++) {
s16[5*i] = convert (f[i]);
s16[5*i+1] = convert (f[i+512]);
s16[5*i+2] = s16[5*i+3] = 0;
s16[5*i+4] = convert (f[i+256]);
}
return 5*256;
}
static int a52_resample_2F_2R_to_4_C(float * _f, int16_t * s16){
int i;
int32_t * f = (int32_t *) _f;
for (i = 0; i < 256; i++) {
s16[4*i] = convert (f[i]);
s16[4*i+1] = convert (f[i+256]);
s16[4*i+2] = convert (f[i+512]);
s16[4*i+3] = convert (f[i+768]);
}
return 4*256;
}
static int a52_resample_3F_2R_to_5_C(float * _f, int16_t * s16){
int i;
int32_t * f = (int32_t *) _f;
for (i = 0; i < 256; i++) {
s16[5*i] = convert (f[i]);
s16[5*i+1] = convert (f[i+512]);
s16[5*i+2] = convert (f[i+768]);
s16[5*i+3] = convert (f[i+1024]);
s16[5*i+4] = convert (f[i+256]);
}
return 5*256;
}
static int a52_resample_MONO_LFE_to_6_C(float * _f, int16_t * s16){
int i;
int32_t * f = (int32_t *) _f;
for (i = 0; i < 256; i++) {
s16[6*i] = s16[6*i+1] = s16[6*i+2] = s16[6*i+3] = 0;
s16[6*i+4] = convert (f[i+256]);
s16[6*i+5] = convert (f[i]);
}
return 6*256;
}
static int a52_resample_STEREO_LFE_to_6_C(float * _f, int16_t * s16){
int i;
int32_t * f = (int32_t *) _f;
for (i = 0; i < 256; i++) {
s16[6*i] = convert (f[i+256]);
s16[6*i+1] = convert (f[i+512]);
s16[6*i+2] = s16[6*i+3] = s16[6*i+4] = 0;
s16[6*i+5] = convert (f[i]);
}
return 6*256;
}
static int a52_resample_3F_LFE_to_6_C(float * _f, int16_t * s16){
int i;
int32_t * f = (int32_t *) _f;
for (i = 0; i < 256; i++) {
s16[6*i] = convert (f[i+256]);
s16[6*i+1] = convert (f[i+768]);
s16[6*i+2] = s16[6*i+3] = 0;
s16[6*i+4] = convert (f[i+512]);
s16[6*i+5] = convert (f[i]);
}
return 6*256;
}
static int a52_resample_2F_2R_LFE_to_6_C(float * _f, int16_t * s16){
int i;
int32_t * f = (int32_t *) _f;
for (i = 0; i < 256; i++) {
s16[6*i] = convert (f[i+256]);
s16[6*i+1] = convert (f[i+512]);
s16[6*i+2] = convert (f[i+768]);
s16[6*i+3] = convert (f[i+1024]);
s16[6*i+4] = 0;
s16[6*i+5] = convert (f[i]);
}
return 6*256;
}
static int a52_resample_3F_2R_LFE_to_6_C(float * _f, int16_t * s16){
int i;
int32_t * f = (int32_t *) _f;
for (i = 0; i < 256; i++) {
s16[6*i] = convert (f[i+256]);
s16[6*i+1] = convert (f[i+768]);
s16[6*i+2] = convert (f[i+1024]);
s16[6*i+3] = convert (f[i+1280]);
s16[6*i+4] = convert (f[i+512]);
s16[6*i+5] = convert (f[i]);
}
return 6*256;
}
static void* a52_resample_C(int flags, int ch){
switch (flags) {
case A52_MONO:
if(ch==5) return a52_resample_MONO_to_5_C;
if(ch==1) return a52_resample_MONO_to_1_C;
break;
case A52_CHANNEL:
case A52_STEREO:
case A52_DOLBY:
if(ch==2) return a52_resample_STEREO_to_2_C;
break;
case A52_3F:
if(ch==5) return a52_resample_3F_to_5_C;
break;
case A52_2F2R:
if(ch==4) return a52_resample_2F_2R_to_4_C;
break;
case A52_3F2R:
if(ch==5) return a52_resample_3F_2R_to_5_C;
break;
case A52_MONO | A52_LFE:
if(ch==6) return a52_resample_MONO_LFE_to_6_C;
break;
case A52_CHANNEL | A52_LFE:
case A52_STEREO | A52_LFE:
case A52_DOLBY | A52_LFE:
if(ch==6) return a52_resample_STEREO_LFE_to_6_C;
break;
case A52_3F | A52_LFE:
if(ch==6) return a52_resample_3F_LFE_to_6_C;
break;
case A52_2F2R | A52_LFE:
if(ch==6) return a52_resample_2F_2R_LFE_to_6_C;
break;
case A52_3F2R | A52_LFE:
if(ch==6) return a52_resample_3F_2R_LFE_to_6_C;
break;
}
return NULL;
}

View File

@ -0,0 +1,518 @@
// MMX optimizations from Michael Niedermayer (michaelni@gmx.at) (under GPL)
/* optimization TODO / NOTES
movntq is slightly faster (0.5% with the current test.c benchmark)
(but thats just test.c so that needs to be testd in reallity)
and it would mean (C / MMX2 / MMX / 3DNOW) versions
*/
static uint64_t __attribute__((aligned(8))) magicF2W= 0x43c0000043c00000LL;
static uint64_t __attribute__((aligned(8))) wm1010= 0xFFFF0000FFFF0000LL;
static uint64_t __attribute__((aligned(8))) wm0101= 0x0000FFFF0000FFFFLL;
static uint64_t __attribute__((aligned(8))) wm1100= 0xFFFFFFFF00000000LL;
static int a52_resample_MONO_to_5_MMX(float * _f, int16_t * s16){
int32_t * f = (int32_t *) _f;
asm volatile(
"movl $-512, %%esi \n\t"
"movq "MANGLE(magicF2W)", %%mm7 \n\t"
"movq "MANGLE(wm1100)", %%mm3 \n\t"
"movq "MANGLE(wm0101)", %%mm4 \n\t"
"movq "MANGLE(wm1010)", %%mm5 \n\t"
"pxor %%mm6, %%mm6 \n\t"
"1: \n\t"
"movq (%1, %%esi, 2), %%mm0 \n\t"
"movq 8(%1, %%esi, 2), %%mm1 \n\t"
"leal (%%esi, %%esi, 4), %%edi \n\t"
"psubd %%mm7, %%mm0 \n\t"
"psubd %%mm7, %%mm1 \n\t"
"packssdw %%mm1, %%mm0 \n\t"
"movq %%mm0, %%mm1 \n\t"
"pand %%mm4, %%mm0 \n\t"
"pand %%mm5, %%mm1 \n\t"
"movq %%mm6, (%0, %%edi) \n\t" // 0 0 0 0
"movd %%mm0, 8(%0, %%edi) \n\t" // A 0
"pand %%mm3, %%mm0 \n\t"
"movd %%mm6, 12(%0, %%edi) \n\t" // 0 0
"movd %%mm1, 16(%0, %%edi) \n\t" // 0 B
"pand %%mm3, %%mm1 \n\t"
"movd %%mm6, 20(%0, %%edi) \n\t" // 0 0
"movq %%mm0, 24(%0, %%edi) \n\t" // 0 0 C 0
"movq %%mm1, 32(%0, %%edi) \n\t" // 0 0 0 B
"addl $8, %%esi \n\t"
" jnz 1b \n\t"
"emms \n\t"
:: "r" (s16+1280), "r" (f+256)
:"%esi", "%edi", "memory"
);
return 5*256;
}
static int a52_resample_STEREO_to_2_MMX(float * _f, int16_t * s16){
int32_t * f = (int32_t *) _f;
/* benchmark scores are 0.3% better with SSE but we would need to set bias=0 and premultiply it
#ifdef HAVE_SSE
asm volatile(
"movl $-1024, %%esi \n\t"
"1: \n\t"
"cvtps2pi (%1, %%esi), %%mm0 \n\t"
"cvtps2pi 1024(%1, %%esi), %%mm2\n\t"
"movq %%mm0, %%mm1 \n\t"
"punpcklwd %%mm2, %%mm0 \n\t"
"punpckhwd %%mm2, %%mm1 \n\t"
"movq %%mm0, (%0, %%esi) \n\t"
"movq %%mm1, 8(%0, %%esi) \n\t"
"addl $16, %%esi \n\t"
" jnz 1b \n\t"
"emms \n\t"
:: "r" (s16+512), "r" (f+256)
:"%esi", "memory"
);*/
asm volatile(
"movl $-1024, %%esi \n\t"
"movq "MANGLE(magicF2W)", %%mm7 \n\t"
"1: \n\t"
"movq (%1, %%esi), %%mm0 \n\t"
"movq 8(%1, %%esi), %%mm1 \n\t"
"movq 1024(%1, %%esi), %%mm2 \n\t"
"movq 1032(%1, %%esi), %%mm3 \n\t"
"psubd %%mm7, %%mm0 \n\t"
"psubd %%mm7, %%mm1 \n\t"
"psubd %%mm7, %%mm2 \n\t"
"psubd %%mm7, %%mm3 \n\t"
"packssdw %%mm1, %%mm0 \n\t"
"packssdw %%mm3, %%mm2 \n\t"
"movq %%mm0, %%mm1 \n\t"
"punpcklwd %%mm2, %%mm0 \n\t"
"punpckhwd %%mm2, %%mm1 \n\t"
"movq %%mm0, (%0, %%esi) \n\t"
"movq %%mm1, 8(%0, %%esi) \n\t"
"addl $16, %%esi \n\t"
" jnz 1b \n\t"
"emms \n\t"
:: "r" (s16+512), "r" (f+256)
:"%esi", "memory"
);
return 2*256;
}
static int a52_resample_3F_to_5_MMX(float * _f, int16_t * s16){
int32_t * f = (int32_t *) _f;
asm volatile(
"movl $-1024, %%esi \n\t"
"movq "MANGLE(magicF2W)", %%mm7 \n\t"
"pxor %%mm6, %%mm6 \n\t"
"movq %%mm7, %%mm5 \n\t"
"punpckldq %%mm6, %%mm5 \n\t"
"1: \n\t"
"movd (%1, %%esi), %%mm0 \n\t"
"punpckldq 2048(%1, %%esi), %%mm0\n\t"
"movd 1024(%1, %%esi), %%mm1 \n\t"
"punpckldq 4(%1, %%esi), %%mm1 \n\t"
"movd 2052(%1, %%esi), %%mm2 \n\t"
"movq %%mm7, %%mm3 \n\t"
"punpckldq 1028(%1, %%esi), %%mm3\n\t"
"movd 8(%1, %%esi), %%mm4 \n\t"
"punpckldq 2056(%1, %%esi), %%mm4\n\t"
"leal (%%esi, %%esi, 4), %%edi \n\t"
"sarl $1, %%edi \n\t"
"psubd %%mm7, %%mm0 \n\t"
"psubd %%mm7, %%mm1 \n\t"
"psubd %%mm5, %%mm2 \n\t"
"psubd %%mm7, %%mm3 \n\t"
"psubd %%mm7, %%mm4 \n\t"
"packssdw %%mm6, %%mm0 \n\t"
"packssdw %%mm2, %%mm1 \n\t"
"packssdw %%mm4, %%mm3 \n\t"
"movq %%mm0, (%0, %%edi) \n\t"
"movq %%mm1, 8(%0, %%edi) \n\t"
"movq %%mm3, 16(%0, %%edi) \n\t"
"movd 1032(%1, %%esi), %%mm1 \n\t"
"punpckldq 12(%1, %%esi), %%mm1\n\t"
"movd 2060(%1, %%esi), %%mm2 \n\t"
"movq %%mm7, %%mm3 \n\t"
"punpckldq 1036(%1, %%esi), %%mm3\n\t"
"pxor %%mm0, %%mm0 \n\t"
"psubd %%mm7, %%mm1 \n\t"
"psubd %%mm5, %%mm2 \n\t"
"psubd %%mm7, %%mm3 \n\t"
"packssdw %%mm1, %%mm0 \n\t"
"packssdw %%mm3, %%mm2 \n\t"
"movq %%mm0, 24(%0, %%edi) \n\t"
"movq %%mm2, 32(%0, %%edi) \n\t"
"addl $16, %%esi \n\t"
" jnz 1b \n\t"
"emms \n\t"
:: "r" (s16+1280), "r" (f+256)
:"%esi", "%edi", "memory"
);
return 5*256;
}
static int a52_resample_2F_2R_to_4_MMX(float * _f, int16_t * s16){
int32_t * f = (int32_t *) _f;
asm volatile(
"movl $-1024, %%esi \n\t"
"movq "MANGLE(magicF2W)", %%mm7 \n\t"
"1: \n\t"
"movq (%1, %%esi), %%mm0 \n\t"
"movq 8(%1, %%esi), %%mm1 \n\t"
"movq 1024(%1, %%esi), %%mm2 \n\t"
"movq 1032(%1, %%esi), %%mm3 \n\t"
"psubd %%mm7, %%mm0 \n\t"
"psubd %%mm7, %%mm1 \n\t"
"psubd %%mm7, %%mm2 \n\t"
"psubd %%mm7, %%mm3 \n\t"
"packssdw %%mm1, %%mm0 \n\t"
"packssdw %%mm3, %%mm2 \n\t"
"movq 2048(%1, %%esi), %%mm3 \n\t"
"movq 2056(%1, %%esi), %%mm4 \n\t"
"movq 3072(%1, %%esi), %%mm5 \n\t"
"movq 3080(%1, %%esi), %%mm6 \n\t"
"psubd %%mm7, %%mm3 \n\t"
"psubd %%mm7, %%mm4 \n\t"
"psubd %%mm7, %%mm5 \n\t"
"psubd %%mm7, %%mm6 \n\t"
"packssdw %%mm4, %%mm3 \n\t"
"packssdw %%mm6, %%mm5 \n\t"
"movq %%mm0, %%mm1 \n\t"
"movq %%mm3, %%mm4 \n\t"
"punpcklwd %%mm2, %%mm0 \n\t"
"punpckhwd %%mm2, %%mm1 \n\t"
"punpcklwd %%mm5, %%mm3 \n\t"
"punpckhwd %%mm5, %%mm4 \n\t"
"movq %%mm0, %%mm2 \n\t"
"movq %%mm1, %%mm5 \n\t"
"punpckldq %%mm3, %%mm0 \n\t"
"punpckhdq %%mm3, %%mm2 \n\t"
"punpckldq %%mm4, %%mm1 \n\t"
"punpckhdq %%mm4, %%mm5 \n\t"
"movq %%mm0, (%0, %%esi,2) \n\t"
"movq %%mm2, 8(%0, %%esi,2) \n\t"
"movq %%mm1, 16(%0, %%esi,2) \n\t"
"movq %%mm5, 24(%0, %%esi,2) \n\t"
"addl $16, %%esi \n\t"
" jnz 1b \n\t"
"emms \n\t"
:: "r" (s16+1024), "r" (f+256)
:"%esi", "memory"
);
return 4*256;
}
static int a52_resample_3F_2R_to_5_MMX(float * _f, int16_t * s16){
int32_t * f = (int32_t *) _f;
asm volatile(
"movl $-1024, %%esi \n\t"
"movq "MANGLE(magicF2W)", %%mm7 \n\t"
"1: \n\t"
"movd (%1, %%esi), %%mm0 \n\t"
"punpckldq 2048(%1, %%esi), %%mm0\n\t"
"movd 3072(%1, %%esi), %%mm1 \n\t"
"punpckldq 4096(%1, %%esi), %%mm1\n\t"
"movd 1024(%1, %%esi), %%mm2 \n\t"
"punpckldq 4(%1, %%esi), %%mm2 \n\t"
"movd 2052(%1, %%esi), %%mm3 \n\t"
"punpckldq 3076(%1, %%esi), %%mm3\n\t"
"movd 4100(%1, %%esi), %%mm4 \n\t"
"punpckldq 1028(%1, %%esi), %%mm4\n\t"
"movd 8(%1, %%esi), %%mm5 \n\t"
"punpckldq 2056(%1, %%esi), %%mm5\n\t"
"leal (%%esi, %%esi, 4), %%edi \n\t"
"sarl $1, %%edi \n\t"
"psubd %%mm7, %%mm0 \n\t"
"psubd %%mm7, %%mm1 \n\t"
"psubd %%mm7, %%mm2 \n\t"
"psubd %%mm7, %%mm3 \n\t"
"psubd %%mm7, %%mm4 \n\t"
"psubd %%mm7, %%mm5 \n\t"
"packssdw %%mm1, %%mm0 \n\t"
"packssdw %%mm3, %%mm2 \n\t"
"packssdw %%mm5, %%mm4 \n\t"
"movq %%mm0, (%0, %%edi) \n\t"
"movq %%mm2, 8(%0, %%edi) \n\t"
"movq %%mm4, 16(%0, %%edi) \n\t"
"movd 3080(%1, %%esi), %%mm0 \n\t"
"punpckldq 4104(%1, %%esi), %%mm0\n\t"
"movd 1032(%1, %%esi), %%mm1 \n\t"
"punpckldq 12(%1, %%esi), %%mm1\n\t"
"movd 2060(%1, %%esi), %%mm2 \n\t"
"punpckldq 3084(%1, %%esi), %%mm2\n\t"
"movd 4108(%1, %%esi), %%mm3 \n\t"
"punpckldq 1036(%1, %%esi), %%mm3\n\t"
"psubd %%mm7, %%mm0 \n\t"
"psubd %%mm7, %%mm1 \n\t"
"psubd %%mm7, %%mm2 \n\t"
"psubd %%mm7, %%mm3 \n\t"
"packssdw %%mm1, %%mm0 \n\t"
"packssdw %%mm3, %%mm2 \n\t"
"movq %%mm0, 24(%0, %%edi) \n\t"
"movq %%mm2, 32(%0, %%edi) \n\t"
"addl $16, %%esi \n\t"
" jnz 1b \n\t"
"emms \n\t"
:: "r" (s16+1280), "r" (f+256)
:"%esi", "%edi", "memory"
);
return 5*256;
}
static int a52_resample_MONO_LFE_to_6_MMX(float * _f, int16_t * s16){
int32_t * f = (int32_t *) _f;
asm volatile(
"movl $-1024, %%esi \n\t"
"movq "MANGLE(magicF2W)", %%mm7 \n\t"
"pxor %%mm6, %%mm6 \n\t"
"1: \n\t"
"movq 1024(%1, %%esi), %%mm0 \n\t"
"movq 1032(%1, %%esi), %%mm1 \n\t"
"movq (%1, %%esi), %%mm2 \n\t"
"movq 8(%1, %%esi), %%mm3 \n\t"
"psubd %%mm7, %%mm0 \n\t"
"psubd %%mm7, %%mm1 \n\t"
"psubd %%mm7, %%mm2 \n\t"
"psubd %%mm7, %%mm3 \n\t"
"packssdw %%mm1, %%mm0 \n\t"
"packssdw %%mm3, %%mm2 \n\t"
"movq %%mm0, %%mm1 \n\t"
"punpcklwd %%mm2, %%mm0 \n\t"
"punpckhwd %%mm2, %%mm1 \n\t"
"leal (%%esi, %%esi, 2), %%edi \n\t"
"movq %%mm6, (%0, %%edi) \n\t"
"movd %%mm0, 8(%0, %%edi) \n\t"
"punpckhdq %%mm0, %%mm0 \n\t"
"movq %%mm6, 12(%0, %%edi) \n\t"
"movd %%mm0, 20(%0, %%edi) \n\t"
"movq %%mm6, 24(%0, %%edi) \n\t"
"movd %%mm1, 32(%0, %%edi) \n\t"
"punpckhdq %%mm1, %%mm1 \n\t"
"movq %%mm6, 36(%0, %%edi) \n\t"
"movd %%mm1, 44(%0, %%edi) \n\t"
"addl $16, %%esi \n\t"
" jnz 1b \n\t"
"emms \n\t"
:: "r" (s16+1536), "r" (f+256)
:"%esi", "%edi", "memory"
);
return 6*256;
}
static int a52_resample_STEREO_LFE_to_6_MMX(float * _f, int16_t * s16){
int32_t * f = (int32_t *) _f;
asm volatile(
"movl $-1024, %%esi \n\t"
"movq "MANGLE(magicF2W)", %%mm7 \n\t"
"pxor %%mm6, %%mm6 \n\t"
"1: \n\t"
"movq 1024(%1, %%esi), %%mm0 \n\t"
"movq 2048(%1, %%esi), %%mm1 \n\t"
"movq (%1, %%esi), %%mm5 \n\t"
"psubd %%mm7, %%mm0 \n\t"
"psubd %%mm7, %%mm1 \n\t"
"psubd %%mm7, %%mm5 \n\t"
"leal (%%esi, %%esi, 2), %%edi \n\t"
"pxor %%mm4, %%mm4 \n\t"
"packssdw %%mm5, %%mm0 \n\t" // FfAa
"packssdw %%mm4, %%mm1 \n\t" // 00Bb
"punpckhwd %%mm0, %%mm4 \n\t" // F0f0
"punpcklwd %%mm1, %%mm0 \n\t" // BAba
"movq %%mm0, %%mm1 \n\t" // BAba
"punpckldq %%mm4, %%mm3 \n\t" // f0XX
"punpckldq %%mm6, %%mm0 \n\t" // 00ba
"punpckhdq %%mm1, %%mm3 \n\t" // BAf0
"movq %%mm0, (%0, %%edi) \n\t" // 00ba
"punpckhdq %%mm4, %%mm0 \n\t" // F000
"movq %%mm3, 8(%0, %%edi) \n\t" // BAf0
"movq %%mm0, 16(%0, %%edi) \n\t" // F000
"addl $8, %%esi \n\t"
" jnz 1b \n\t"
"emms \n\t"
:: "r" (s16+1536), "r" (f+256)
:"%esi", "%edi", "memory"
);
return 6*256;
}
static int a52_resample_3F_LFE_to_6_MMX(float * _f, int16_t * s16){
int32_t * f = (int32_t *) _f;
asm volatile(
"movl $-1024, %%esi \n\t"
"movq "MANGLE(magicF2W)", %%mm7 \n\t"
"pxor %%mm6, %%mm6 \n\t"
"1: \n\t"
"movq 1024(%1, %%esi), %%mm0 \n\t"
"movq 3072(%1, %%esi), %%mm1 \n\t"
"movq 2048(%1, %%esi), %%mm4 \n\t"
"movq (%1, %%esi), %%mm5 \n\t"
"psubd %%mm7, %%mm0 \n\t"
"psubd %%mm7, %%mm1 \n\t"
"psubd %%mm7, %%mm4 \n\t"
"psubd %%mm7, %%mm5 \n\t"
"leal (%%esi, %%esi, 2), %%edi \n\t"
"packssdw %%mm4, %%mm0 \n\t" // EeAa
"packssdw %%mm5, %%mm1 \n\t" // FfBb
"movq %%mm0, %%mm2 \n\t" // EeAa
"punpcklwd %%mm1, %%mm0 \n\t" // BAba
"punpckhwd %%mm1, %%mm2 \n\t" // FEfe
"movq %%mm0, %%mm1 \n\t" // BAba
"punpckldq %%mm6, %%mm0 \n\t" // 00ba
"punpckhdq %%mm1, %%mm1 \n\t" // BABA
"movq %%mm0, (%0, %%edi) \n\t"
"punpckhdq %%mm2, %%mm0 \n\t" // FE00
"punpckldq %%mm1, %%mm2 \n\t" // BAfe
"movq %%mm2, 8(%0, %%edi) \n\t"
"movq %%mm0, 16(%0, %%edi) \n\t"
"addl $8, %%esi \n\t"
" jnz 1b \n\t"
"emms \n\t"
:: "r" (s16+1536), "r" (f+256)
:"%esi", "%edi", "memory"
);
return 6*256;
}
static int a52_resample_2F_2R_LFE_to_6_MMX(float * _f, int16_t * s16){
int32_t * f = (int32_t *) _f;
asm volatile(
"movl $-1024, %%esi \n\t"
"movq "MANGLE(magicF2W)", %%mm7 \n\t"
// "pxor %%mm6, %%mm6 \n\t"
"1: \n\t"
"movq 1024(%1, %%esi), %%mm0 \n\t"
"movq 2048(%1, %%esi), %%mm1 \n\t"
"movq 3072(%1, %%esi), %%mm2 \n\t"
"movq 4096(%1, %%esi), %%mm3 \n\t"
"movq (%1, %%esi), %%mm5 \n\t"
"psubd %%mm7, %%mm0 \n\t"
"psubd %%mm7, %%mm1 \n\t"
"psubd %%mm7, %%mm2 \n\t"
"psubd %%mm7, %%mm3 \n\t"
"psubd %%mm7, %%mm5 \n\t"
"leal (%%esi, %%esi, 2), %%edi \n\t"
"packssdw %%mm2, %%mm0 \n\t" // CcAa
"packssdw %%mm3, %%mm1 \n\t" // DdBb
"packssdw %%mm5, %%mm5 \n\t" // FfFf
"movq %%mm0, %%mm2 \n\t" // CcAa
"punpcklwd %%mm1, %%mm0 \n\t" // BAba
"punpckhwd %%mm1, %%mm2 \n\t" // DCdc
"pxor %%mm4, %%mm4 \n\t" // 0000
"punpcklwd %%mm5, %%mm4 \n\t" // F0f0
"movq %%mm0, %%mm1 \n\t" // BAba
"movq %%mm4, %%mm3 \n\t" // F0f0
"punpckldq %%mm2, %%mm0 \n\t" // dcba
"punpckhdq %%mm1, %%mm1 \n\t" // BABA
"punpckldq %%mm1, %%mm4 \n\t" // BAf0
"punpckhdq %%mm3, %%mm2 \n\t" // F0DC
"movq %%mm0, (%0, %%edi) \n\t"
"movq %%mm4, 8(%0, %%edi) \n\t"
"movq %%mm2, 16(%0, %%edi) \n\t"
"addl $8, %%esi \n\t"
" jnz 1b \n\t"
"emms \n\t"
:: "r" (s16+1536), "r" (f+256)
:"%esi", "%edi", "memory"
);
return 6*256;
}
static int a52_resample_3F_2R_LFE_to_6_MMX(float * _f, int16_t * s16){
int32_t * f = (int32_t *) _f;
asm volatile(
"movl $-1024, %%esi \n\t"
"movq "MANGLE(magicF2W)", %%mm7 \n\t"
// "pxor %%mm6, %%mm6 \n\t"
"1: \n\t"
"movq 1024(%1, %%esi), %%mm0 \n\t"
"movq 3072(%1, %%esi), %%mm1 \n\t"
"movq 4096(%1, %%esi), %%mm2 \n\t"
"movq 5120(%1, %%esi), %%mm3 \n\t"
"movq 2048(%1, %%esi), %%mm4 \n\t"
"movq (%1, %%esi), %%mm5 \n\t"
"psubd %%mm7, %%mm0 \n\t"
"psubd %%mm7, %%mm1 \n\t"
"psubd %%mm7, %%mm2 \n\t"
"psubd %%mm7, %%mm3 \n\t"
"psubd %%mm7, %%mm4 \n\t"
"psubd %%mm7, %%mm5 \n\t"
"leal (%%esi, %%esi, 2), %%edi \n\t"
"packssdw %%mm2, %%mm0 \n\t" // CcAa
"packssdw %%mm3, %%mm1 \n\t" // DdBb
"packssdw %%mm4, %%mm4 \n\t" // EeEe
"packssdw %%mm5, %%mm5 \n\t" // FfFf
"movq %%mm0, %%mm2 \n\t" // CcAa
"punpcklwd %%mm1, %%mm0 \n\t" // BAba
"punpckhwd %%mm1, %%mm2 \n\t" // DCdc
"punpcklwd %%mm5, %%mm4 \n\t" // FEfe
"movq %%mm0, %%mm1 \n\t" // BAba
"movq %%mm4, %%mm3 \n\t" // FEfe
"punpckldq %%mm2, %%mm0 \n\t" // dcba
"punpckhdq %%mm1, %%mm1 \n\t" // BABA
"punpckldq %%mm1, %%mm4 \n\t" // BAfe
"punpckhdq %%mm3, %%mm2 \n\t" // FEDC
"movq %%mm0, (%0, %%edi) \n\t"
"movq %%mm4, 8(%0, %%edi) \n\t"
"movq %%mm2, 16(%0, %%edi) \n\t"
"addl $8, %%esi \n\t"
" jnz 1b \n\t"
"emms \n\t"
:: "r" (s16+1536), "r" (f+256)
:"%esi", "%edi", "memory"
);
return 6*256;
}
static void* a52_resample_MMX(int flags, int ch){
switch (flags) {
case A52_MONO:
if(ch==5) return a52_resample_MONO_to_5_MMX;
break;
case A52_CHANNEL:
case A52_STEREO:
case A52_DOLBY:
if(ch==2) return a52_resample_STEREO_to_2_MMX;
break;
case A52_3F:
if(ch==5) return a52_resample_3F_to_5_MMX;
break;
case A52_2F2R:
if(ch==4) return a52_resample_2F_2R_to_4_MMX;
break;
case A52_3F2R:
if(ch==5) return a52_resample_3F_2R_to_5_MMX;
break;
case A52_MONO | A52_LFE:
if(ch==6) return a52_resample_MONO_LFE_to_6_MMX;
break;
case A52_CHANNEL | A52_LFE:
case A52_STEREO | A52_LFE:
case A52_DOLBY | A52_LFE:
if(ch==6) return a52_resample_STEREO_LFE_to_6_MMX;
break;
case A52_3F | A52_LFE:
if(ch==6) return a52_resample_3F_LFE_to_6_MMX;
break;
case A52_2F2R | A52_LFE:
if(ch==6) return a52_resample_2F_2R_LFE_to_6_MMX;
break;
case A52_3F2R | A52_LFE:
if(ch==6) return a52_resample_3F_2R_LFE_to_6_MMX;
break;
}
return NULL;
}