mirror of https://git.videolan.org/git/ffmpeg.git
more speed
Originally committed as revision 2429 to svn://svn.mplayerhq.hu/mplayer/trunk/postproc
This commit is contained in:
parent
ac0b0b2f6d
commit
9736722ad7
|
@ -60,6 +60,7 @@ compare the quality & speed of all filters
|
||||||
split this huge file
|
split this huge file
|
||||||
fix warnings (unused vars, ...)
|
fix warnings (unused vars, ...)
|
||||||
noise reduction filters
|
noise reduction filters
|
||||||
|
write an exact implementation of the horizontal delocking filter
|
||||||
...
|
...
|
||||||
|
|
||||||
Notes:
|
Notes:
|
||||||
|
@ -1450,7 +1451,10 @@ static inline void doHorizDefFilterAndCopyBack(uint8_t dst[], int stride, int QP
|
||||||
{
|
{
|
||||||
#ifdef HAVE_MMX
|
#ifdef HAVE_MMX
|
||||||
asm volatile(
|
asm volatile(
|
||||||
"pushl %0 \n\t"
|
"leal (%0, %1), %%ecx \n\t"
|
||||||
|
"leal (%%ecx, %1, 4), %%ebx \n\t"
|
||||||
|
// 0 1 2 3 4 5 6 7 8 9
|
||||||
|
// %0 ecx ecx+%1 ecx+2%1 %0+4%1 ebx ebx+%1 ebx+2%1 %0+8%1 ebx+4%1
|
||||||
"pxor %%mm7, %%mm7 \n\t"
|
"pxor %%mm7, %%mm7 \n\t"
|
||||||
"movq bm00001000, %%mm6 \n\t"
|
"movq bm00001000, %%mm6 \n\t"
|
||||||
"movd %2, %%mm5 \n\t" // QP
|
"movd %2, %%mm5 \n\t" // QP
|
||||||
|
@ -1464,10 +1468,10 @@ static inline void doHorizDefFilterAndCopyBack(uint8_t dst[], int stride, int QP
|
||||||
|
|
||||||
//FIXME? "unroll by 2" and mix
|
//FIXME? "unroll by 2" and mix
|
||||||
#ifdef HAVE_MMX2
|
#ifdef HAVE_MMX2
|
||||||
#define HDF(i) \
|
#define HDF(src, dst) \
|
||||||
"movq " #i "(%%eax), %%mm0 \n\t"\
|
"movq " #src "(%%eax), %%mm0 \n\t"\
|
||||||
"movq %%mm0, %%mm1 \n\t"\
|
"movq " #src "(%%eax), %%mm1 \n\t"\
|
||||||
"movq %%mm0, %%mm2 \n\t"\
|
"movq " #src "(%%eax), %%mm2 \n\t"\
|
||||||
"psrlq $8, %%mm1 \n\t"\
|
"psrlq $8, %%mm1 \n\t"\
|
||||||
"psubusb %%mm1, %%mm2 \n\t"\
|
"psubusb %%mm1, %%mm2 \n\t"\
|
||||||
"psubusb %%mm0, %%mm1 \n\t"\
|
"psubusb %%mm0, %%mm1 \n\t"\
|
||||||
|
@ -1486,12 +1490,12 @@ static inline void doHorizDefFilterAndCopyBack(uint8_t dst[], int stride, int QP
|
||||||
"psubb %%mm1, %%mm0 \n\t"\
|
"psubb %%mm1, %%mm0 \n\t"\
|
||||||
"psllq $8, %%mm1 \n\t"\
|
"psllq $8, %%mm1 \n\t"\
|
||||||
"paddb %%mm1, %%mm0 \n\t"\
|
"paddb %%mm1, %%mm0 \n\t"\
|
||||||
"movd %%mm0, (%0) \n\t"\
|
"movd %%mm0, " #dst" \n\t"\
|
||||||
"psrlq $32, %%mm0 \n\t"\
|
"psrlq $32, %%mm0 \n\t"\
|
||||||
"movd %%mm0, 4(%0) \n\t"
|
"movd %%mm0, 4" #dst" \n\t"
|
||||||
#else
|
#else
|
||||||
#define HDF(i)\
|
#define HDF(src, dst)\
|
||||||
"movq " #i "(%%eax), %%mm0 \n\t"\
|
"movq " #src "(%%eax), %%mm0 \n\t"\
|
||||||
"movq %%mm0, %%mm1 \n\t"\
|
"movq %%mm0, %%mm1 \n\t"\
|
||||||
"movq %%mm0, %%mm2 \n\t"\
|
"movq %%mm0, %%mm2 \n\t"\
|
||||||
"psrlq $8, %%mm1 \n\t"\
|
"psrlq $8, %%mm1 \n\t"\
|
||||||
|
@ -1515,29 +1519,21 @@ static inline void doHorizDefFilterAndCopyBack(uint8_t dst[], int stride, int QP
|
||||||
"psubb %%mm1, %%mm0 \n\t"\
|
"psubb %%mm1, %%mm0 \n\t"\
|
||||||
"psllq $8, %%mm1 \n\t"\
|
"psllq $8, %%mm1 \n\t"\
|
||||||
"paddb %%mm1, %%mm0 \n\t"\
|
"paddb %%mm1, %%mm0 \n\t"\
|
||||||
"movd %%mm0, (%0) \n\t"\
|
"movd %%mm0, " #dst " \n\t"\
|
||||||
"psrlq $32, %%mm0 \n\t"\
|
"psrlq $32, %%mm0 \n\t"\
|
||||||
"movd %%mm0, 4(%0) \n\t"
|
"movd %%mm0, 4" #dst " \n\t"
|
||||||
#endif
|
#endif
|
||||||
HDF(0)
|
HDF(0,(%0))
|
||||||
"addl %1, %0 \n\t"
|
HDF(8,(%%ecx))
|
||||||
HDF(8)
|
HDF(16,(%%ecx, %1))
|
||||||
"addl %1, %0 \n\t"
|
HDF(24,(%%ecx, %1, 2))
|
||||||
HDF(16)
|
HDF(32,(%0, %1, 4))
|
||||||
"addl %1, %0 \n\t"
|
HDF(40,(%%ebx))
|
||||||
HDF(24)
|
HDF(48,(%%ebx, %1))
|
||||||
"addl %1, %0 \n\t"
|
HDF(56,(%%ebx, %1, 2))
|
||||||
HDF(32)
|
|
||||||
"addl %1, %0 \n\t"
|
|
||||||
HDF(40)
|
|
||||||
"addl %1, %0 \n\t"
|
|
||||||
HDF(48)
|
|
||||||
"addl %1, %0 \n\t"
|
|
||||||
HDF(56)
|
|
||||||
"popl %0 \n\t"
|
|
||||||
:
|
:
|
||||||
: "r" (dst), "r" (stride), "r" (QP)
|
: "r" (dst), "r" (stride), "r" (QP)
|
||||||
: "%eax"
|
: "%eax", "%ebx", "%ecx"
|
||||||
);
|
);
|
||||||
#else
|
#else
|
||||||
uint8_t *src= tempBlock;
|
uint8_t *src= tempBlock;
|
||||||
|
@ -1597,8 +1593,11 @@ static inline void doHorizLowPassAndCopyBack(uint8_t dst[], int stride, int QP)
|
||||||
{
|
{
|
||||||
//return;
|
//return;
|
||||||
#if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
|
#if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
|
||||||
asm volatile( //"movv %0 %1 %2\n\t"
|
asm volatile(
|
||||||
"pushl %0\n\t"
|
"leal (%0, %1), %%ecx \n\t"
|
||||||
|
"leal (%%ecx, %1, 4), %%ebx \n\t"
|
||||||
|
// 0 1 2 3 4 5 6 7 8 9
|
||||||
|
// %0 ecx ecx+%1 ecx+2%1 %0+4%1 ebx ebx+%1 ebx+2%1 %0+8%1 ebx+4%1
|
||||||
"pxor %%mm7, %%mm7 \n\t"
|
"pxor %%mm7, %%mm7 \n\t"
|
||||||
"leal tempBlock, %%eax \n\t"
|
"leal tempBlock, %%eax \n\t"
|
||||||
/*
|
/*
|
||||||
|
@ -1714,20 +1713,20 @@ Implemented Exact 7-Tap
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* uses the 7-Tap Filter: 1112111 */
|
/* uses the 7-Tap Filter: 1112111 */
|
||||||
#define NEW_HLP(i)\
|
#define NEW_HLP(src, dst)\
|
||||||
"movq " #i "(%%eax), %%mm0 \n\t"\
|
"movq " #src "(%%eax), %%mm1 \n\t"\
|
||||||
"movq %%mm0, %%mm1 \n\t"\
|
"movq " #src "(%%eax), %%mm2 \n\t"\
|
||||||
"movq %%mm0, %%mm2 \n\t"\
|
|
||||||
"movd -4(%0), %%mm3 \n\t" /*0001000*/\
|
|
||||||
"movd 8(%0), %%mm4 \n\t" /*0001000*/\
|
|
||||||
"psllq $8, %%mm1 \n\t"\
|
"psllq $8, %%mm1 \n\t"\
|
||||||
"psrlq $8, %%mm2 \n\t"\
|
"psrlq $8, %%mm2 \n\t"\
|
||||||
|
"movd -4" #dst ", %%mm3 \n\t" /*0001000*/\
|
||||||
|
"movd 8" #dst ", %%mm4 \n\t" /*0001000*/\
|
||||||
"psrlq $24, %%mm3 \n\t"\
|
"psrlq $24, %%mm3 \n\t"\
|
||||||
"psllq $56, %%mm4 \n\t"\
|
"psllq $56, %%mm4 \n\t"\
|
||||||
"por %%mm3, %%mm1 \n\t"\
|
"por %%mm3, %%mm1 \n\t"\
|
||||||
"por %%mm4, %%mm2 \n\t"\
|
"por %%mm4, %%mm2 \n\t"\
|
||||||
"movq %%mm1, %%mm5 \n\t"\
|
"movq %%mm1, %%mm5 \n\t"\
|
||||||
PAVGB(%%mm2, %%mm1)\
|
PAVGB(%%mm2, %%mm1)\
|
||||||
|
"movq " #src "(%%eax), %%mm0 \n\t"\
|
||||||
PAVGB(%%mm1, %%mm0)\
|
PAVGB(%%mm1, %%mm0)\
|
||||||
"psllq $8, %%mm5 \n\t"\
|
"psllq $8, %%mm5 \n\t"\
|
||||||
"psrlq $8, %%mm2 \n\t"\
|
"psrlq $8, %%mm2 \n\t"\
|
||||||
|
@ -1742,9 +1741,9 @@ Implemented Exact 7-Tap
|
||||||
PAVGB(%%mm2, %%mm1)\
|
PAVGB(%%mm2, %%mm1)\
|
||||||
PAVGB(%%mm1, %%mm5)\
|
PAVGB(%%mm1, %%mm5)\
|
||||||
PAVGB(%%mm5, %%mm0)\
|
PAVGB(%%mm5, %%mm0)\
|
||||||
"movd %%mm0, (%0) \n\t"\
|
"movd %%mm0, " #dst " \n\t"\
|
||||||
"psrlq $32, %%mm0 \n\t"\
|
"psrlq $32, %%mm0 \n\t"\
|
||||||
"movd %%mm0, 4(%0) \n\t"
|
"movd %%mm0, 4" #dst " \n\t"
|
||||||
|
|
||||||
/* uses the 9-Tap Filter: 112242211 */
|
/* uses the 9-Tap Filter: 112242211 */
|
||||||
#define NEW_HLP2(i)\
|
#define NEW_HLP2(i)\
|
||||||
|
@ -1786,28 +1785,20 @@ Implemented Exact 7-Tap
|
||||||
"psrlq $32, %%mm0 \n\t"\
|
"psrlq $32, %%mm0 \n\t"\
|
||||||
"movd %%mm0, 4(%0) \n\t"
|
"movd %%mm0, 4(%0) \n\t"
|
||||||
|
|
||||||
#define HLP(i) NEW_HLP(i)
|
#define HLP(src, dst) NEW_HLP(src, dst)
|
||||||
|
|
||||||
HLP(0)
|
HLP(0, (%0))
|
||||||
"addl %1, %0 \n\t"
|
HLP(8, (%%ecx))
|
||||||
HLP(8)
|
HLP(16, (%%ecx, %1))
|
||||||
"addl %1, %0 \n\t"
|
HLP(24, (%%ecx, %1, 2))
|
||||||
HLP(16)
|
HLP(32, (%0, %1, 4))
|
||||||
"addl %1, %0 \n\t"
|
HLP(40, (%%ebx))
|
||||||
HLP(24)
|
HLP(48, (%%ebx, %1))
|
||||||
"addl %1, %0 \n\t"
|
HLP(56, (%%ebx, %1, 2))
|
||||||
HLP(32)
|
|
||||||
"addl %1, %0 \n\t"
|
|
||||||
HLP(40)
|
|
||||||
"addl %1, %0 \n\t"
|
|
||||||
HLP(48)
|
|
||||||
"addl %1, %0 \n\t"
|
|
||||||
HLP(56)
|
|
||||||
|
|
||||||
"popl %0\n\t"
|
|
||||||
:
|
:
|
||||||
: "r" (dst), "r" (stride)
|
: "r" (dst), "r" (stride)
|
||||||
: "%eax", "%ebx"
|
: "%eax", "%ebx", "%ecx"
|
||||||
);
|
);
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
@ -2743,10 +2734,17 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri
|
||||||
for(x=0; x<width; x+=BLOCK_SIZE)
|
for(x=0; x<width; x+=BLOCK_SIZE)
|
||||||
{
|
{
|
||||||
const int stride= dstStride;
|
const int stride= dstStride;
|
||||||
int QP= isColor ?
|
int QP;
|
||||||
QPs[(y>>3)*QPStride + (x>>3)]:
|
if(isColor)
|
||||||
QPs[(y>>4)*QPStride + (x>>4)];
|
{
|
||||||
if(!isColor && (mode & LEVEL_FIX)) QP= (QP* (packedYScale &0xFFFF))>>8;
|
QP=QPs[(y>>3)*QPStride + (x>>3)];
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
QP= QPs[(y>>4)*QPStride + (x>>4)];
|
||||||
|
if(mode & LEVEL_FIX) QP= (QP* (packedYScale &0xFFFF))>>8;
|
||||||
|
yHistogram[ srcBlock[srcStride*5] ]++;
|
||||||
|
}
|
||||||
#ifdef HAVE_MMX
|
#ifdef HAVE_MMX
|
||||||
asm volatile(
|
asm volatile(
|
||||||
"movd %0, %%mm7 \n\t"
|
"movd %0, %%mm7 \n\t"
|
||||||
|
@ -2776,8 +2774,6 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri
|
||||||
*/
|
*/
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if(!isColor) yHistogram[ srcBlock[srcStride*5] ]++;
|
|
||||||
|
|
||||||
#ifdef PP_FUNNY_STRIDE
|
#ifdef PP_FUNNY_STRIDE
|
||||||
//can we mess with a 8x16 block, if not use a temp buffer, yes again
|
//can we mess with a 8x16 block, if not use a temp buffer, yes again
|
||||||
if(x+7 >= width)
|
if(x+7 >= width)
|
||||||
|
|
|
@ -60,6 +60,7 @@ compare the quality & speed of all filters
|
||||||
split this huge file
|
split this huge file
|
||||||
fix warnings (unused vars, ...)
|
fix warnings (unused vars, ...)
|
||||||
noise reduction filters
|
noise reduction filters
|
||||||
|
write an exact implementation of the horizontal delocking filter
|
||||||
...
|
...
|
||||||
|
|
||||||
Notes:
|
Notes:
|
||||||
|
@ -1450,7 +1451,10 @@ static inline void doHorizDefFilterAndCopyBack(uint8_t dst[], int stride, int QP
|
||||||
{
|
{
|
||||||
#ifdef HAVE_MMX
|
#ifdef HAVE_MMX
|
||||||
asm volatile(
|
asm volatile(
|
||||||
"pushl %0 \n\t"
|
"leal (%0, %1), %%ecx \n\t"
|
||||||
|
"leal (%%ecx, %1, 4), %%ebx \n\t"
|
||||||
|
// 0 1 2 3 4 5 6 7 8 9
|
||||||
|
// %0 ecx ecx+%1 ecx+2%1 %0+4%1 ebx ebx+%1 ebx+2%1 %0+8%1 ebx+4%1
|
||||||
"pxor %%mm7, %%mm7 \n\t"
|
"pxor %%mm7, %%mm7 \n\t"
|
||||||
"movq bm00001000, %%mm6 \n\t"
|
"movq bm00001000, %%mm6 \n\t"
|
||||||
"movd %2, %%mm5 \n\t" // QP
|
"movd %2, %%mm5 \n\t" // QP
|
||||||
|
@ -1464,10 +1468,10 @@ static inline void doHorizDefFilterAndCopyBack(uint8_t dst[], int stride, int QP
|
||||||
|
|
||||||
//FIXME? "unroll by 2" and mix
|
//FIXME? "unroll by 2" and mix
|
||||||
#ifdef HAVE_MMX2
|
#ifdef HAVE_MMX2
|
||||||
#define HDF(i) \
|
#define HDF(src, dst) \
|
||||||
"movq " #i "(%%eax), %%mm0 \n\t"\
|
"movq " #src "(%%eax), %%mm0 \n\t"\
|
||||||
"movq %%mm0, %%mm1 \n\t"\
|
"movq " #src "(%%eax), %%mm1 \n\t"\
|
||||||
"movq %%mm0, %%mm2 \n\t"\
|
"movq " #src "(%%eax), %%mm2 \n\t"\
|
||||||
"psrlq $8, %%mm1 \n\t"\
|
"psrlq $8, %%mm1 \n\t"\
|
||||||
"psubusb %%mm1, %%mm2 \n\t"\
|
"psubusb %%mm1, %%mm2 \n\t"\
|
||||||
"psubusb %%mm0, %%mm1 \n\t"\
|
"psubusb %%mm0, %%mm1 \n\t"\
|
||||||
|
@ -1486,12 +1490,12 @@ static inline void doHorizDefFilterAndCopyBack(uint8_t dst[], int stride, int QP
|
||||||
"psubb %%mm1, %%mm0 \n\t"\
|
"psubb %%mm1, %%mm0 \n\t"\
|
||||||
"psllq $8, %%mm1 \n\t"\
|
"psllq $8, %%mm1 \n\t"\
|
||||||
"paddb %%mm1, %%mm0 \n\t"\
|
"paddb %%mm1, %%mm0 \n\t"\
|
||||||
"movd %%mm0, (%0) \n\t"\
|
"movd %%mm0, " #dst" \n\t"\
|
||||||
"psrlq $32, %%mm0 \n\t"\
|
"psrlq $32, %%mm0 \n\t"\
|
||||||
"movd %%mm0, 4(%0) \n\t"
|
"movd %%mm0, 4" #dst" \n\t"
|
||||||
#else
|
#else
|
||||||
#define HDF(i)\
|
#define HDF(src, dst)\
|
||||||
"movq " #i "(%%eax), %%mm0 \n\t"\
|
"movq " #src "(%%eax), %%mm0 \n\t"\
|
||||||
"movq %%mm0, %%mm1 \n\t"\
|
"movq %%mm0, %%mm1 \n\t"\
|
||||||
"movq %%mm0, %%mm2 \n\t"\
|
"movq %%mm0, %%mm2 \n\t"\
|
||||||
"psrlq $8, %%mm1 \n\t"\
|
"psrlq $8, %%mm1 \n\t"\
|
||||||
|
@ -1515,29 +1519,21 @@ static inline void doHorizDefFilterAndCopyBack(uint8_t dst[], int stride, int QP
|
||||||
"psubb %%mm1, %%mm0 \n\t"\
|
"psubb %%mm1, %%mm0 \n\t"\
|
||||||
"psllq $8, %%mm1 \n\t"\
|
"psllq $8, %%mm1 \n\t"\
|
||||||
"paddb %%mm1, %%mm0 \n\t"\
|
"paddb %%mm1, %%mm0 \n\t"\
|
||||||
"movd %%mm0, (%0) \n\t"\
|
"movd %%mm0, " #dst " \n\t"\
|
||||||
"psrlq $32, %%mm0 \n\t"\
|
"psrlq $32, %%mm0 \n\t"\
|
||||||
"movd %%mm0, 4(%0) \n\t"
|
"movd %%mm0, 4" #dst " \n\t"
|
||||||
#endif
|
#endif
|
||||||
HDF(0)
|
HDF(0,(%0))
|
||||||
"addl %1, %0 \n\t"
|
HDF(8,(%%ecx))
|
||||||
HDF(8)
|
HDF(16,(%%ecx, %1))
|
||||||
"addl %1, %0 \n\t"
|
HDF(24,(%%ecx, %1, 2))
|
||||||
HDF(16)
|
HDF(32,(%0, %1, 4))
|
||||||
"addl %1, %0 \n\t"
|
HDF(40,(%%ebx))
|
||||||
HDF(24)
|
HDF(48,(%%ebx, %1))
|
||||||
"addl %1, %0 \n\t"
|
HDF(56,(%%ebx, %1, 2))
|
||||||
HDF(32)
|
|
||||||
"addl %1, %0 \n\t"
|
|
||||||
HDF(40)
|
|
||||||
"addl %1, %0 \n\t"
|
|
||||||
HDF(48)
|
|
||||||
"addl %1, %0 \n\t"
|
|
||||||
HDF(56)
|
|
||||||
"popl %0 \n\t"
|
|
||||||
:
|
:
|
||||||
: "r" (dst), "r" (stride), "r" (QP)
|
: "r" (dst), "r" (stride), "r" (QP)
|
||||||
: "%eax"
|
: "%eax", "%ebx", "%ecx"
|
||||||
);
|
);
|
||||||
#else
|
#else
|
||||||
uint8_t *src= tempBlock;
|
uint8_t *src= tempBlock;
|
||||||
|
@ -1597,8 +1593,11 @@ static inline void doHorizLowPassAndCopyBack(uint8_t dst[], int stride, int QP)
|
||||||
{
|
{
|
||||||
//return;
|
//return;
|
||||||
#if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
|
#if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
|
||||||
asm volatile( //"movv %0 %1 %2\n\t"
|
asm volatile(
|
||||||
"pushl %0\n\t"
|
"leal (%0, %1), %%ecx \n\t"
|
||||||
|
"leal (%%ecx, %1, 4), %%ebx \n\t"
|
||||||
|
// 0 1 2 3 4 5 6 7 8 9
|
||||||
|
// %0 ecx ecx+%1 ecx+2%1 %0+4%1 ebx ebx+%1 ebx+2%1 %0+8%1 ebx+4%1
|
||||||
"pxor %%mm7, %%mm7 \n\t"
|
"pxor %%mm7, %%mm7 \n\t"
|
||||||
"leal tempBlock, %%eax \n\t"
|
"leal tempBlock, %%eax \n\t"
|
||||||
/*
|
/*
|
||||||
|
@ -1714,20 +1713,20 @@ Implemented Exact 7-Tap
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* uses the 7-Tap Filter: 1112111 */
|
/* uses the 7-Tap Filter: 1112111 */
|
||||||
#define NEW_HLP(i)\
|
#define NEW_HLP(src, dst)\
|
||||||
"movq " #i "(%%eax), %%mm0 \n\t"\
|
"movq " #src "(%%eax), %%mm1 \n\t"\
|
||||||
"movq %%mm0, %%mm1 \n\t"\
|
"movq " #src "(%%eax), %%mm2 \n\t"\
|
||||||
"movq %%mm0, %%mm2 \n\t"\
|
|
||||||
"movd -4(%0), %%mm3 \n\t" /*0001000*/\
|
|
||||||
"movd 8(%0), %%mm4 \n\t" /*0001000*/\
|
|
||||||
"psllq $8, %%mm1 \n\t"\
|
"psllq $8, %%mm1 \n\t"\
|
||||||
"psrlq $8, %%mm2 \n\t"\
|
"psrlq $8, %%mm2 \n\t"\
|
||||||
|
"movd -4" #dst ", %%mm3 \n\t" /*0001000*/\
|
||||||
|
"movd 8" #dst ", %%mm4 \n\t" /*0001000*/\
|
||||||
"psrlq $24, %%mm3 \n\t"\
|
"psrlq $24, %%mm3 \n\t"\
|
||||||
"psllq $56, %%mm4 \n\t"\
|
"psllq $56, %%mm4 \n\t"\
|
||||||
"por %%mm3, %%mm1 \n\t"\
|
"por %%mm3, %%mm1 \n\t"\
|
||||||
"por %%mm4, %%mm2 \n\t"\
|
"por %%mm4, %%mm2 \n\t"\
|
||||||
"movq %%mm1, %%mm5 \n\t"\
|
"movq %%mm1, %%mm5 \n\t"\
|
||||||
PAVGB(%%mm2, %%mm1)\
|
PAVGB(%%mm2, %%mm1)\
|
||||||
|
"movq " #src "(%%eax), %%mm0 \n\t"\
|
||||||
PAVGB(%%mm1, %%mm0)\
|
PAVGB(%%mm1, %%mm0)\
|
||||||
"psllq $8, %%mm5 \n\t"\
|
"psllq $8, %%mm5 \n\t"\
|
||||||
"psrlq $8, %%mm2 \n\t"\
|
"psrlq $8, %%mm2 \n\t"\
|
||||||
|
@ -1742,9 +1741,9 @@ Implemented Exact 7-Tap
|
||||||
PAVGB(%%mm2, %%mm1)\
|
PAVGB(%%mm2, %%mm1)\
|
||||||
PAVGB(%%mm1, %%mm5)\
|
PAVGB(%%mm1, %%mm5)\
|
||||||
PAVGB(%%mm5, %%mm0)\
|
PAVGB(%%mm5, %%mm0)\
|
||||||
"movd %%mm0, (%0) \n\t"\
|
"movd %%mm0, " #dst " \n\t"\
|
||||||
"psrlq $32, %%mm0 \n\t"\
|
"psrlq $32, %%mm0 \n\t"\
|
||||||
"movd %%mm0, 4(%0) \n\t"
|
"movd %%mm0, 4" #dst " \n\t"
|
||||||
|
|
||||||
/* uses the 9-Tap Filter: 112242211 */
|
/* uses the 9-Tap Filter: 112242211 */
|
||||||
#define NEW_HLP2(i)\
|
#define NEW_HLP2(i)\
|
||||||
|
@ -1786,28 +1785,20 @@ Implemented Exact 7-Tap
|
||||||
"psrlq $32, %%mm0 \n\t"\
|
"psrlq $32, %%mm0 \n\t"\
|
||||||
"movd %%mm0, 4(%0) \n\t"
|
"movd %%mm0, 4(%0) \n\t"
|
||||||
|
|
||||||
#define HLP(i) NEW_HLP(i)
|
#define HLP(src, dst) NEW_HLP(src, dst)
|
||||||
|
|
||||||
HLP(0)
|
HLP(0, (%0))
|
||||||
"addl %1, %0 \n\t"
|
HLP(8, (%%ecx))
|
||||||
HLP(8)
|
HLP(16, (%%ecx, %1))
|
||||||
"addl %1, %0 \n\t"
|
HLP(24, (%%ecx, %1, 2))
|
||||||
HLP(16)
|
HLP(32, (%0, %1, 4))
|
||||||
"addl %1, %0 \n\t"
|
HLP(40, (%%ebx))
|
||||||
HLP(24)
|
HLP(48, (%%ebx, %1))
|
||||||
"addl %1, %0 \n\t"
|
HLP(56, (%%ebx, %1, 2))
|
||||||
HLP(32)
|
|
||||||
"addl %1, %0 \n\t"
|
|
||||||
HLP(40)
|
|
||||||
"addl %1, %0 \n\t"
|
|
||||||
HLP(48)
|
|
||||||
"addl %1, %0 \n\t"
|
|
||||||
HLP(56)
|
|
||||||
|
|
||||||
"popl %0\n\t"
|
|
||||||
:
|
:
|
||||||
: "r" (dst), "r" (stride)
|
: "r" (dst), "r" (stride)
|
||||||
: "%eax", "%ebx"
|
: "%eax", "%ebx", "%ecx"
|
||||||
);
|
);
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
@ -2743,10 +2734,17 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri
|
||||||
for(x=0; x<width; x+=BLOCK_SIZE)
|
for(x=0; x<width; x+=BLOCK_SIZE)
|
||||||
{
|
{
|
||||||
const int stride= dstStride;
|
const int stride= dstStride;
|
||||||
int QP= isColor ?
|
int QP;
|
||||||
QPs[(y>>3)*QPStride + (x>>3)]:
|
if(isColor)
|
||||||
QPs[(y>>4)*QPStride + (x>>4)];
|
{
|
||||||
if(!isColor && (mode & LEVEL_FIX)) QP= (QP* (packedYScale &0xFFFF))>>8;
|
QP=QPs[(y>>3)*QPStride + (x>>3)];
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
QP= QPs[(y>>4)*QPStride + (x>>4)];
|
||||||
|
if(mode & LEVEL_FIX) QP= (QP* (packedYScale &0xFFFF))>>8;
|
||||||
|
yHistogram[ srcBlock[srcStride*5] ]++;
|
||||||
|
}
|
||||||
#ifdef HAVE_MMX
|
#ifdef HAVE_MMX
|
||||||
asm volatile(
|
asm volatile(
|
||||||
"movd %0, %%mm7 \n\t"
|
"movd %0, %%mm7 \n\t"
|
||||||
|
@ -2776,8 +2774,6 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri
|
||||||
*/
|
*/
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if(!isColor) yHistogram[ srcBlock[srcStride*5] ]++;
|
|
||||||
|
|
||||||
#ifdef PP_FUNNY_STRIDE
|
#ifdef PP_FUNNY_STRIDE
|
||||||
//can we mess with a 8x16 block, if not use a temp buffer, yes again
|
//can we mess with a 8x16 block, if not use a temp buffer, yes again
|
||||||
if(x+7 >= width)
|
if(x+7 >= width)
|
||||||
|
|
Loading…
Reference in New Issue