r�paration de mes betises sur la yuv (d�sol� tm), mais je remets ca bientot.

2024-09-04 09:11:33 +02:00 · 2000-06-14 21:37:36 +00:00 · 2000-06-14 21:37:36 +00:00 · fe175c116a
commit fe175c116a
parent c2e97975f7
1 changed files with 33 additions and 207 deletions
--- a/src/video_output/video_yuv.c
+++ b/src/video_output/video_yuv.c
@ -12,15 +12,16 @@
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
- * 
+ *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
 *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
 *****************************************************************************/

 /*****************************************************************************
@ -72,26 +73,6 @@
 #define V_RED_COEF      ((int)(1.596 * (1<<SHIFT) / 1.164))
 #define V_GREEN_COEF    ((int)(-0.813 * (1<<SHIFT) / 1.164))

-#ifdef HAVE_MMX
-/* hope these constant values are cache line aligned */
-static unsigned long long mmx_80w     = 0x0080008000800080;
-static unsigned long long mmx_10w     = 0x1010101010101010;
-static unsigned long long mmx_00ffw   = 0x00ff00ff00ff00ff;
-static unsigned long long mmx_Y_coeff = 0x253f253f253f253f;
-
-/* hope these constant values are cache line aligned */
-static unsigned long long mmx_U_green = 0xf37df37df37df37d;
-static unsigned long long mmx_U_blue  = 0x4093409340934093;
-static unsigned long long mmx_V_red   = 0x3312331233123312;
-static unsigned long long mmx_V_green = 0xe5fce5fce5fce5fc;
-
-/* hope these constant values are cache line aligned */
-static unsigned long long mmx_redmask = 0xf8f8f8f8f8f8f8f8;
-static unsigned long long mmx_grnmask = 0xfcfcfcfcfcfcfcfc;
-static unsigned long long mmx_grnshift   = 0x03;
-static unsigned long long mmx_blueshift  = 0x03;
-#endif
-
 /*****************************************************************************
 * Local prototypes
 *****************************************************************************/
@ -200,7 +181,7 @@ static void     ConvertYUV444RGB32( p_vout_thread_t p_vout, u32 *p_pic, yuv_data
        (((*p_y + dither10[i_real_y]) >> 4) << 7)                             \
        + ((*p_u + dither20[i_real_y])   >> 5) * 9                            \
        + ((*p_v + dither20[i_real_y])   >> 5) ];                             \
-    b_jump_uv += *p_offset;                                                   \
+    b_jump_uv = (b_jump_uv + *p_offset) & 0x1;                                \
    p_y += *p_offset;                                                         \
    p_u += *p_offset   & b_jump_uv;                                           \
    p_v += *p_offset++ & b_jump_uv;                                           \
@ -208,15 +189,15 @@ static void     ConvertYUV444RGB32( p_vout_thread_t p_vout, u32 *p_pic, yuv_data
        (((*p_y + dither11[i_real_y]) >> 4) << 7)                             \
        + ((*p_u + dither21[i_real_y])   >> 5) * 9                            \
        + ((*p_v + dither21[i_real_y])   >> 5) ];                             \
-    b_jump_uv += *p_offset;                                                   \
+    b_jump_uv = (b_jump_uv + *p_offset) & 0x1;                                \
    p_y += *p_offset;                                                         \
    p_u += *p_offset   & b_jump_uv;                                           \
    p_v += *p_offset++ & b_jump_uv;                                           \
    *p_pic++ = p_lookup[                                                      \
        (((*p_y + dither12[i_real_y]) >> 4) << 7)                             \
        + ((*p_u + dither22[i_real_y])   >> 5) * 9                            \
-        + ((*p_v + dither22[i_real_y])   >> 5) ];                             \
-    b_jump_uv += *p_offset;                                                   \
+        + ((*p_v + dither22[i_real_y])   >> 5) ];                             \
+    b_jump_uv = (b_jump_uv + *p_offset) & 0x1;                                \
    p_y += *p_offset;                                                         \
    p_u += *p_offset   & b_jump_uv;                                           \
    p_v += *p_offset++ & b_jump_uv;                                           \
@ -224,7 +205,7 @@ static void     ConvertYUV444RGB32( p_vout_thread_t p_vout, u32 *p_pic, yuv_data
        (((*p_y + dither13[i_real_y]) >> 4) << 7)                             \
        + ((*p_u + dither23[i_real_y])   >> 5) * 9                            \
        + ((*p_v + dither23[i_real_y])   >> 5) ];                             \
-    b_jump_uv += *p_offset;                                                   \
+    b_jump_uv = (b_jump_uv + *p_offset) & 0x1;                                \
    p_y += *p_offset;                                                         \
    p_u += *p_offset   & b_jump_uv;                                           \
    p_v += *p_offset++ & b_jump_uv;                                           \
@ -312,13 +293,12 @@ static void     ConvertYUV444RGB32( p_vout_thread_t p_vout, u32 *p_pic, yuv_data
 * and 4 Bpp.
 *****************************************************************************/
 #define SCALE_HEIGHT( CHROMA, BPP )                                           \
-                                                                              \
    /* If line is odd, rewind 4:2:0 U and V samples */                        \
-    /*if( ((CHROMA == 420) || (CHROMA == 422)) && !(i_y & 0x1) )                */\
-    /*{                                                                         */\
-      /*  p_u -= i_chroma_width;                                                */\
-      /*  p_v -= i_chroma_width;                                                */\
-    /*}                                                                         */\
+    if( ((CHROMA == 420) || (CHROMA == 422)) && !(i_y & 0x1) )                \
+    {                                                                         \
+        p_u -= i_chroma_width;                                                \
+        p_v -= i_chroma_width;                                                \
+    }                                                                         \
                                                                              \
    /*                                                                        \
     * Handle vertical scaling. The current line can be copied or next one    \
@ -327,70 +307,24 @@ static void     ConvertYUV444RGB32( p_vout_thread_t p_vout, u32 *p_pic, yuv_data
    switch( i_vertical_scaling )                                              \
    {                                                                         \
    case -1:                             /* vertical scaling factor is < 1 */ \
-        if( i_y & 0x1 )                                                       \
+        while( (i_scale_count -= i_pic_height) >= 0 )                         \
        {                                                                     \
-            while( (i_scale_count -= i_pic_height) >= 0 )                     \
-            {                                                                 \
            /* Height reduction: skip next source line */                     \
-                p_y += i_width;                                               \
-                if( (CHROMA == 420) || (CHROMA == 422) )                      \
-                {                                                             \
-                    if( (i_scale_count -= i_pic_height) >= 0 )                \
-                    {                                                         \
-                        p_y += i_width;                                       \
-                        i_y += 2;                                             \
-                        p_u += i_chroma_width;                                \
-                        p_v += i_chroma_width;                                \
-                        continue;                                             \
-                    }                                                         \
-                    else                                                      \
-                    {                                                         \
-                        i_y++;                                                \
-                        break;                                                \
-                    }                                                         \
-                }                                                             \
-                else if( CHROMA == 444 )                                      \
-                {                                                             \
-                    i_y++;                                                    \
-                    p_u += i_width;                                           \
-                    p_v += i_width;                                           \
-                }                                                             \
-            }                                                                 \
-        }                                                                     \
-        else                                                                  \
-        {                                                                     \
-            if( CHROMA == 420 || CHROMA == 422 )                              \
+            p_y += i_width;                                                   \
+            i_y++;                                                            \
+            if( (CHROMA == 420) || (CHROMA == 422) )                          \
            {                                                                 \
-                p_u -= i_chroma_width;                                        \
-                p_v -= i_chroma_width;                                        \
-            }                                                                 \
-            while( (i_scale_count -= i_pic_height) >= 0 )                     \
-            {                                                                 \
-                /* Height reduction: skip next source line */                 \
-                p_y += i_width;                                               \
-                if( (CHROMA == 420) || (CHROMA == 422) )                      \
+                if( i_y & 0x1 )                                               \
                {                                                             \
                    p_u += i_chroma_width;                                    \
                    p_v += i_chroma_width;                                    \
-                    if( (i_scale_count -= i_pic_height) >= 0 )                \
-                    {                                                         \
-                        p_y += i_width;                                       \
-                        i_y+=2;                                               \
-                        continue;                                             \
-                    }                                                         \
-                    else                                                      \
-                    {                                                         \
-                        i_y++;                                                \
-                        break;                                                \
-                    }                                                         \
-                }                                                             \
-                else if( CHROMA == 444 )                                      \
-                {                                                             \
-                    i_y++;                                                    \
-                    p_u += i_width;                                           \
-                    p_v += i_width;                                           \
                }                                                             \
            }                                                                 \
+            else if( CHROMA == 444 )                                          \
+            {                                                                 \
+                p_u += i_width;                                               \
+                p_v += i_width;                                               \
+            }                                                                 \
        }                                                                     \
        i_scale_count += i_height;                                            \
        break;                                                                \
@ -398,7 +332,7 @@ static void     ConvertYUV444RGB32( p_vout_thread_t p_vout, u32 *p_pic, yuv_data
        while( (i_scale_count -= i_height) > 0 )                              \
        {                                                                     \
            /* Height increment: copy previous picture line */                \
-            for( i_x = i_pic_width >> 4; i_x--; )                             \
+            for( i_x = i_pic_width / 16; i_x--; )                             \
            {                                                                 \
                *(((u64 *) p_pic)++) = *(((u64 *) p_pic_start)++ );           \
                *(((u64 *) p_pic)++) = *(((u64 *) p_pic_start)++ );           \
@ -444,6 +378,7 @@ static void     ConvertYUV444RGB32( p_vout_thread_t p_vout, u32 *p_pic, yuv_data
     * Handle vertical scaling. The current line can be copied or next one    \
     * can be ignored.                                                        \
     */                                                                       \
+                                                                              \
    switch( i_vertical_scaling )                                              \
    {                                                                         \
    case -1:                             /* vertical scaling factor is < 1 */ \
@ -1168,16 +1103,15 @@ static void ConvertYUV420RGB8( p_vout_thread_t p_vout, u8 *p_pic, yuv_data_t *p_
    int dither22[4] = {  0x6, 0x16,  0x2, 0x12 };
    int dither23[4] = { 0x1e,  0xe, 0x1a,  0xa };

-    #if 0
-    /* other matrices that can be interesting, either for debugging or for
-     * various effects */
+    /* other matrices that can be interesting, either for debugging or for effects */
+#if 0
    int dither[4][4] = { { 0, 8, 2, 10 }, { 12, 4, 14, 16 }, { 3, 11, 1, 9}, {15, 7, 13, 5} };
    int dither[4][4] = { { 7, 8, 0, 15 }, { 0, 15, 8, 7 }, { 7, 0, 15, 8 }, { 15, 7, 8, 0 } };
    int dither[4][4] = { { 0, 15, 0, 15 }, { 15, 0, 15, 0 }, { 0, 15, 0, 15 }, { 15, 0, 15, 0 } };
    int dither[4][4] = { { 15, 15, 0, 0 }, { 15, 15, 0, 0 }, { 0, 0, 15, 15 }, { 0, 0, 15, 15 } };
    int dither[4][4] = { { 8, 8, 8, 8 }, { 8, 8, 8, 8 }, { 8, 8, 8, 8 }, { 8, 8, 8, 8 } };
    int dither[4][4] = { { 0, 1, 2, 3 }, { 4, 5, 6, 7 }, { 8, 9, 10, 11 }, { 12, 13, 14, 15 } };
-    #endif
+#endif

    /*
     * Initialize some values  - i_pic_line_width will store the line skip
@ -1352,15 +1286,11 @@ static void ConvertYUV420RGB16( p_vout_thread_t p_vout, u16 *p_pic, yuv_data_t *
    int         i_vertical_scaling;                 /* vertical scaling type */
    int         i_x, i_y;                 /* horizontal and vertical indexes */
    int         i_scale_count;                       /* scale modulo counter */
-#ifndef HAVE_MMX
    int         i_uval, i_vval;                           /* U and V samples */
    int         i_red, i_green, i_blue;          /* U and V modified samples */
-#endif
    int         i_chroma_width;                              /* chroma width */
    u16 *       p_yuv;                              /* base conversion table */
-#ifndef HAVE_MMX
    u16 *       p_ybase;                     /* Y dependant conversion table */
-#endif
    u16 *       p_pic_start;       /* beginning of the current line for copy */
    u16 *       p_buffer_start;                   /* conversion buffer start */
    u16 *       p_buffer;                       /* conversion buffer pointer */
@ -1389,9 +1319,6 @@ static void ConvertYUV420RGB16( p_vout_thread_t p_vout, u16 *p_pic, yuv_data_t *
        p_pic_start =   p_pic;
        p_buffer =      b_horizontal_scaling ? p_buffer_start : p_pic;

-
-#ifndef HAVE_MMX
-
        /* Do YUV conversion to buffer - YUV picture is always formed of 16
         * pixels wide blocks */
        for( i_x = i_width / 16; i_x--;  )
@ -1405,112 +1332,11 @@ static void ConvertYUV420RGB16( p_vout_thread_t p_vout, u16 *p_pic, yuv_data_t *
            CONVERT_YUV_PIXEL(2);  CONVERT_Y_PIXEL(2);
            CONVERT_YUV_PIXEL(2);  CONVERT_Y_PIXEL(2);
        }
-        SCALE_WIDTH;
-        SCALE_HEIGHT(420, 2);
-    }
-    
-#else
-        for ( i_x = i_width / 8; i_x--; )
-        {
-        __asm__ (
-            "movd      (%1), %%mm0       # Load 4 Cb       00 00 00 00 u3 u2 u1 u0\n\t"
-            "movd      (%2), %%mm1       # Load 4 Cr       00 00 00 00 v3 v2 v1 v0\n\t"
-            "pxor      %%mm4, %%mm4      # zero mm4\n\t"
-            "movq      (%0), %%mm6       # Load 8 Y        Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0\n\t"
-          //"movl      $0, (%3)          # cache preload for image\n\t"
-             : : "r" (p_y), "r" (p_u), "r" (p_v), "r" (p_buffer));

-        __asm__ (
-            ".align 8 \n\t"
-            /* Do the multiply part of the conversion for even and odd pixels,
-             * register usage:
-             * mm0 -> Cblue, mm1 -> Cred, mm2 -> Cgreen even pixels,
-             * mm3 -> Cblue, mm4 -> Cred, mm5 -> Cgreen odd  pixels,
-             * mm6 -> Y even, mm7 -> Y odd */
-            /* convert the chroma part */
-            "punpcklbw %%mm4, %%mm0      # scatter 4 Cb    00 u3 00 u2 00 u1 00 u0\n\t"
-            "punpcklbw %%mm4, %%mm1      # scatter 4 Cr    00 v3 00 v2 00 v1 00 v0\n\t"
-            "psubsw    mmx_80w, %%mm0    # Cb -= 128\n\t"
-            "psubsw    mmx_80w, %%mm1    # Cr -= 128\n\t"
-            "psllw     $3, %%mm0         # Promote precision\n\t"
-            "psllw     $3, %%mm1         # Promote precision\n\t"
-            "movq      %%mm0, %%mm2      # Copy 4 Cb       00 u3 00 u2 00 u1 00 u0\n\t"
-            "movq      %%mm1, %%mm3      # Copy 4 Cr       00 v3 00 v2 00 v1 00 v0\n\t"
-            "pmulhw    mmx_U_green, %%mm2# Mul Cb with green coeff -> Cb green\n\t"
-            "pmulhw    mmx_V_green, %%mm3# Mul Cr with green coeff -> Cr green\n\t"
-            "pmulhw    mmx_U_blue, %%mm0 # Mul Cb -> Cblue 00 b3 00 b2 00 b1 00 b0\n\t"
-            "pmulhw    mmx_V_red, %%mm1  # Mul Cr -> Cred  00 r3 00 r2 00 r1 00 r0\n\t"
-            "paddsw    %%mm3, %%mm2      # Cb green + Cr green -> Cgreen\n\t"
-            /* convert the luma part */
-            "psubusb   mmx_10w, %%mm6    # Y -= 16\n\t"
-            "movq      %%mm6, %%mm7      # Copy 8 Y        Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0\n\t"
-            "pand      mmx_00ffw, %%mm6  # get Y even      00 Y6 00 Y4 00 Y2 00 Y0\n\t"
-            "psrlw     $8, %%mm7         # get Y odd       00 Y7 00 Y5 00 Y3 00 Y1\n\t"
-            "psllw     $3, %%mm6         # Promote precision\n\t"
-            "psllw     $3, %%mm7         # Promote precision\n\t"
-            "pmulhw    mmx_Y_coeff, %%mm6# Mul 4 Y even    00 y6 00 y4 00 y2 00 y0\n\t"
-            "pmulhw    mmx_Y_coeff, %%mm7# Mul 4 Y odd     00 y7 00 y5 00 y3 00 y1\n\t"
-            /* Do the addition part of the conversion for even and odd pixels,
-             * register usage:
-             * mm0 -> Cblue, mm1 -> Cred, mm2 -> Cgreen even pixels,
-             * mm3 -> Cblue, mm4 -> Cred, mm5 -> Cgreen odd  pixels,
-             * mm6 -> Y even, mm7 -> Y odd */                                                                                                                                        /* Do horizontal and vertical scaling */
-            "movq      %%mm0, %%mm3      # Copy Cblue\n\t"
-            "movq      %%mm1, %%mm4      # Copy Cred\n\t"
-            "movq      %%mm2, %%mm5      # Copy Cgreen\n\t"
-            "paddsw    %%mm6, %%mm0      # Y even + Cblue  00 B6 00 B4 00 B2 00 B0\n\t"
-            "paddsw    %%mm7, %%mm3      # Y odd  + Cblue  00 B7 00 B5 00 B3 00 B1\n\t"
-            "paddsw    %%mm6, %%mm1      # Y even + Cred   00 R6 00 R4 00 R2 00 R0\n\t"
-            "paddsw    %%mm7, %%mm4      # Y odd  + Cred   00 R7 00 R5 00 R3 00 R1\n\t"
-            "paddsw    %%mm6, %%mm2      # Y even + Cgreen 00 G6 00 G4 00 G2 00 G0\n\t"
-            "paddsw    %%mm7, %%mm5      # Y odd  + Cgreen 00 G7 00 G5 00 G3 00 G1\n\t"
-            /* Limit RGB even to 0..255 */
-            "packuswb  %%mm0, %%mm0      # B6 B4 B2 B0 | B6 B4 B2 B0\n\t"
-            "packuswb  %%mm1, %%mm1      # R6 R4 R2 R0 | R6 R4 R2 R0\n\t"
-            "packuswb  %%mm2, %%mm2      # G6 G4 G2 G0 | G6 G4 G2 G0\n\t"
-            /* Limit RGB odd to 0..255 */
-            "packuswb  %%mm3, %%mm3      # B7 B5 B3 B1 | B7 B5 B3 B1\n\t"
-            "packuswb  %%mm4, %%mm4      # R7 R5 R3 R1 | R7 R5 R3 R1\n\t"
-            "packuswb  %%mm5, %%mm5      # G7 G5 G3 G1 | G7 G5 G3 G1\n\t"
-            /* Interleave RGB even and odd */
-            "punpcklbw %%mm3, %%mm0      #                 B7 B6 B5 B4 B3 B2 B1 B0\n\t"
-            "punpcklbw %%mm4, %%mm1      #                 R7 R6 R5 R4 R3 R2 R1 R0\n\t"
-            "punpcklbw %%mm5, %%mm2      #                 G7 G6 G5 G4 G3 G2 G1 G0\n\t"
-            /* mask unneeded bits off */
-            "pand      mmx_redmask, %%mm0# b7b6b5b4 b3_0_0_0 b7b6b5b4 b3_0_0_0\n\t"
-            "pand      mmx_grnmask, %%mm2# g7g6g5g4 g3g2_0_0 g7g6g5g4 g3g2_0_0\n\t"
-            "pand      mmx_redmask, %%mm1# r7r6r5r4 r3_0_0_0 r7r6r5r4 r3_0_0_0\n\t"
-            "psrlw     mmx_blueshift,%%mm0#0_0_0_b7 b6b5b4b3 0_0_0_b7 b6b5b4b3\n\t"
-            "pxor      %%mm4, %%mm4      # zero mm4\n\t"
-            "movq      %%mm0, %%mm5      # Copy B7-B0\n\t"
-            "movq      %%mm2, %%mm7      # Copy G7-G0\n\t"
-            /* convert rgb24 plane to rgb16 pack for pixel 0-3 */
-            "punpcklbw %%mm4, %%mm2      #  0_0_0_0  0_0_0_0 g7g6g5g4 g3g2_0_0\n\t"
-            "punpcklbw %%mm1, %%mm0      # r7r6r5r4 r3_0_0_0 0_0_0_b7 b6b5b4b3\n\t"
-            "psllw     mmx_blueshift,%%mm2#  0_0_0_0 0_g7g6g5 g4g3g2_0  0_0_0_0\n\t"
-            "por       %%mm2, %%mm0      # r7r6r5r4 r3g7g6g5 g4g3g2b7 b6b5b4b3\n\t"
-            "movq      8(%0), %%mm6      # Load 8 Y        Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0\n\t"
-            "movq      %%mm0, (%3)       # store pixel 0-3\n\t"
-            /* convert rgb24 plane to rgb16 pack for pixel 0-3 */
-            "punpckhbw %%mm4, %%mm7      #  0_0_0_0  0_0_0_0 g7g6g5g4 g3g2_0_0\n\t"
-            "punpckhbw %%mm1, %%mm5      # r7r6r5r4 r3_0_0_0 0_0_0_b7 b6b5b4b3\n\t"
-            "psllw     mmx_blueshift,%%mm7#  0_0_0_0 0_g7g6g5 g4g3g2_0  0_0_0_0\n\t"
-            "movd      4(%1), %%mm0      # Load 4 Cb       00 00 00 00 u3 u2 u1 u0\n\t"
-            "por       %%mm7, %%mm5      # r7r6r5r4 r3g7g6g5 g4g3g2b7 b6b5b4b3\n\t"
-            "movd      4(%2), %%mm1      # Load 4 Cr       00 00 00 00 v3 v2 v1 v0\n\t"
-            "movq      %%mm5, 8(%3)      # store pixel 4-7\n\t"
-            : : "r" (p_y), "r" (p_u), "r" (p_v), "r" (p_buffer));
-        p_y += 8;
-        p_u += 4;
-        p_v += 4;
-        p_buffer += 8;
-        }
-        
+        /* Do horizontal and vertical scaling */
        SCALE_WIDTH;
        SCALE_HEIGHT(420, 2);
    }
-    __asm__ ("emms\n\t");
-#endif
 }

 /*****************************************************************************