1
mirror of https://github.com/mpv-player/mpv synced 2024-09-12 23:45:53 +02:00

yuv2rgb_mmx crashes with ffdivx codec, when we play back avi files that have

a frame width that is not an exact multiple of 8.

Testcase: 405.avi (356x240).  Playing on an MMX capable x86 system using the
x11 video-out driver results in a segfault.

The MMX routines convert image data in quantities of 8 pixels in each loop,
and the inner loop was not terminated in case there are only 1-7 pixels left,
producing too much RGB output.

For now, just ignore the last few pixels on each row, to avoid the segfaults.
(Gives a black vertical border on the right, if you play a video with
width%8 != 0)  A possible future enhancement would be, to add a second loop
to convert the last width%8 pixels to RGB using a byte loop.


git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@1307 b3059339-0415-0410-9bf9-f77b7e298cf2
This commit is contained in:
jkeil 2001-07-12 15:23:26 +00:00
parent 88b34d0d11
commit 009d2b0dc7

View File

@ -76,24 +76,29 @@ static void yuv420_rgb16_mmx (uint8_t * image, uint8_t * py,
int rgb_stride, int y_stride, int uv_stride)
{
int even = 1;
int x = 0, y = 0;
int x, y;
/* load data for first scan line */
__asm__ __volatile__ (
"movd (%1), %%mm0;" /* Load 4 Cb 00 00 00 00 u3 u2 u1 u0 */
"movd (%2), %%mm1;" /* Load 4 Cr 00 00 00 00 v3 v2 v1 v0 */
__asm__ __volatile__ ("pxor %mm4, %mm4;" /* zero mm4 */ );
"pxor %%mm4, %%mm4;" /* zero mm4 */
"movq (%0), %%mm6;" /* Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */
for (y = v_size; --y >= 0; ) {
uint8_t *_image = image;
uint8_t *_py = py;
uint8_t *_pu = pu;
uint8_t *_pv = pv;
//"movl $0, (%3);" /* cache preload for image */
: : "r" (py), "r" (pu), "r" (pv), "r" (image));
/* load data for start of next scan line */
__asm__ __volatile__ (
"movd (%1), %%mm0;" /* Load 4 Cb 00 00 00 00 u3 u2 u1 u0 */
"movd (%2), %%mm1;" /* Load 4 Cr 00 00 00 00 v3 v2 v1 v0 */
"movq (%0), %%mm6;" /* Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */
do {
do {
: : "r" (_py), "r" (_pu), "r" (_pv));
for (x = h_size >> 3; --x >= 0; ) {
/* this mmx assembly code deals with SINGLE scan line at a time, it convert 8
pixels in each iteration */
__asm__ __volatile__ (".align 8;"
__asm__ __volatile__ (
/* Do the multiply part of the conversion for even and odd pixels,
register usage:
mm0 -> Cblue, mm1 -> Cred, mm2 -> Cgreen even pixels,
@ -199,40 +204,24 @@ static void yuv420_rgb16_mmx (uint8_t * image, uint8_t * py,
"movd 4 (%2), %%mm1;" /* Load 4 Cr 00 00 00 00 v3 v2 v1 v0 */
MOVNTQ " %%mm5, 8 (%3);" /* store pixel 4-7 */
: : "r" (py), "r" (pu), "r" (pv), "r" (image));
: : "r" (_py), "r" (_pu), "r" (_pv), "r" (_image));
py += 8;
pu += 4;
pv += 4;
image += 16;
x += 8;
} while (x < h_size);
if (even) {
pu -= h_size/2;
pv -= h_size/2;
} else {
pu += (uv_stride - h_size/2);
pv += (uv_stride - h_size/2);
_py += 8;
_pu += 4;
_pv += 4;
_image += 16;
}
py += (y_stride - h_size);
image += (rgb_stride - 2*h_size);
if (!even) {
pu += uv_stride;
pv += uv_stride;
}
/* load data for start of next scan line */
__asm__ __volatile__ (
"movd (%1), %%mm0;" /* Load 4 Cb 00 00 00 00 00 u3 u2 u1 u0 */
"movd (%2), %%mm1;" /* Load 4 Cr 00 00 00 00 00 v2 v1 v0 */
py += y_stride;
image += rgb_stride;
//"movl $0, (%3);" /* cache preload for image */
"movq (%0), %%mm6;" /* Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */
: : "r" (py), "r" (pu), "r" (pv), "r" (image));
x = 0;
y += 1;
even = (!even);
} while (y < v_size) ;
}
__asm__ __volatile__ (EMMS);
}
@ -243,25 +232,29 @@ static void yuv420_argb32_mmx (uint8_t * image, uint8_t * py,
int rgb_stride, int y_stride, int uv_stride)
{
int even = 1;
int x = 0, y = 0;
int x, y;
__asm__ __volatile__ (
".align 8;"
__asm__ __volatile__ ("pxor %mm4, %mm4;" /* zero mm4 */ );
for (y = v_size; --y >= 0; ) {
uint8_t *_image = image;
uint8_t *_py = py;
uint8_t *_pu = pu;
uint8_t *_pv = pv;
/* load data for start of next scan line */
__asm__ __volatile__
(
"movd (%1), %%mm0;" /* Load 4 Cb 00 00 00 00 u3 u2 u1 u0 */
//"movl $0, (%3);" /* cache preload for image */
"movd (%2), %%mm1;" /* Load 4 Cr 00 00 00 00 v3 v2 v1 v0 */
"pxor %%mm4, %%mm4;" /* zero mm4 */
"movq (%0), %%mm6;" /* Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */
: : "r" (_py), "r" (_pu), "r" (_pv)
);
"movq (%0), %%mm6;" /* Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */
: : "r" (py), "r" (pu), "r" (pv), "r" (image));
do {
do {
for (x = h_size >> 3; --x >= 0; ) {
/* this mmx assembly code deals with SINGLE scan line at a time, it convert 8
pixels in each iteration */
__asm__ __volatile__ (
".align 8;"
/* Do the multiply part of the conversion for even and odd pixels,
register usage:
mm0 -> Cblue, mm1 -> Cred, mm2 -> Cgreen even pixels,
@ -379,43 +372,24 @@ static void yuv420_argb32_mmx (uint8_t * image, uint8_t * py,
"pxor %%mm4, %%mm4;" /* zero mm4 */
"movq 8 (%0), %%mm6;" /* Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */
: : "r" (py), "r" (pu), "r" (pv), "r" (image));
: : "r" (_py), "r" (_pu), "r" (_pv), "r" (_image));
py += 8;
pu += 4;
pv += 4;
image += 32;
x += 8;
} while (x < h_size);
if (even) {
pu -= h_size/2;
pv -= h_size/2;
} else {
pu += (uv_stride - h_size/2);
pv += (uv_stride - h_size/2);
_py += 8;
_pu += 4;
_pv += 4;
_image += 32;
}
py += (y_stride - h_size);
image += (rgb_stride - 4*h_size);
if (!even) {
pu += uv_stride;
pv += uv_stride;
}
/* load data for start of next scan line */
__asm__ __volatile__
(
".align 8;"
"movd (%1), %%mm0;" /* Load 4 Cb 00 00 00 00 u3 u2 u1 u0 */
"movd (%2), %%mm1;" /* Load 4 Cr 00 00 00 00 v3 v2 v1 v0 */
py += y_stride;
image += rgb_stride;
//"movl $0, (%3);" /* cache preload for image */
"movq (%0), %%mm6;" /* Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */
: : "r" (py), "r" (pu), "r" (pv), "r" (image)
);
x = 0;
y += 1;
even = (!even);
} while ( y < v_size) ;
}
__asm__ __volatile__ (EMMS);
}