mirror of
https://git.videolan.org/git/ffmpeg.git
synced 2024-10-01 00:54:33 +02:00
AltiVec operations need to have memory aligned on 16-byte boundaries.
patch by Alan Curry, pacman at world dot std dot com Originally committed as revision 17559 to svn://svn.mplayerhq.hu/mplayer/trunk/postproc
This commit is contained in:
parent
be9d060d0c
commit
5edb653bca
@ -1166,7 +1166,8 @@ static inline void initFilter(int16_t **outFilter, int16_t **filterPos, int *out
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Note the +1 is for the MMXscaler which reads over the end
|
// Note the +1 is for the MMXscaler which reads over the end
|
||||||
*outFilter= (int16_t*)memalign(8, *outFilterSize*(dstW+1)*sizeof(int16_t));
|
/* align at 16 for AltiVec (needed by hScale_altivec_real) */
|
||||||
|
*outFilter= (int16_t*)memalign(16, *outFilterSize*(dstW+1)*sizeof(int16_t));
|
||||||
memset(*outFilter, 0, *outFilterSize*(dstW+1)*sizeof(int16_t));
|
memset(*outFilter, 0, *outFilterSize*(dstW+1)*sizeof(int16_t));
|
||||||
|
|
||||||
/* Normalize & Store in outFilter */
|
/* Normalize & Store in outFilter */
|
||||||
@ -2132,10 +2133,11 @@ SwsContext *sws_getContext(int srcW, int srcH, int origSrcFormat, int dstW, int
|
|||||||
c->lumPixBuf= (int16_t**)memalign(4, c->vLumBufSize*2*sizeof(int16_t*));
|
c->lumPixBuf= (int16_t**)memalign(4, c->vLumBufSize*2*sizeof(int16_t*));
|
||||||
c->chrPixBuf= (int16_t**)memalign(4, c->vChrBufSize*2*sizeof(int16_t*));
|
c->chrPixBuf= (int16_t**)memalign(4, c->vChrBufSize*2*sizeof(int16_t*));
|
||||||
//Note we need at least one pixel more at the end because of the mmx code (just in case someone wanna replace the 4000/8000)
|
//Note we need at least one pixel more at the end because of the mmx code (just in case someone wanna replace the 4000/8000)
|
||||||
|
/* align at 16 bytes for AltiVec */
|
||||||
for(i=0; i<c->vLumBufSize; i++)
|
for(i=0; i<c->vLumBufSize; i++)
|
||||||
c->lumPixBuf[i]= c->lumPixBuf[i+c->vLumBufSize]= (uint16_t*)memalign(8, 4000);
|
c->lumPixBuf[i]= c->lumPixBuf[i+c->vLumBufSize]= (uint16_t*)memalign(16, 4000);
|
||||||
for(i=0; i<c->vChrBufSize; i++)
|
for(i=0; i<c->vChrBufSize; i++)
|
||||||
c->chrPixBuf[i]= c->chrPixBuf[i+c->vChrBufSize]= (uint16_t*)memalign(8, 8000);
|
c->chrPixBuf[i]= c->chrPixBuf[i+c->vChrBufSize]= (uint16_t*)memalign(16, 8000);
|
||||||
|
|
||||||
//try to avoid drawing green stuff between the right end and the stride end
|
//try to avoid drawing green stuff between the right end and the stride end
|
||||||
for(i=0; i<c->vLumBufSize; i++) memset(c->lumPixBuf[i], 0, 4000);
|
for(i=0; i<c->vLumBufSize; i++) memset(c->lumPixBuf[i], 0, 4000);
|
||||||
|
@ -68,6 +68,9 @@
|
|||||||
#include <inttypes.h>
|
#include <inttypes.h>
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
#include "config.h"
|
#include "config.h"
|
||||||
|
#ifdef HAVE_MALLOC_H
|
||||||
|
#include <malloc.h>
|
||||||
|
#endif
|
||||||
#include "rgb2rgb.h"
|
#include "rgb2rgb.h"
|
||||||
#include "swscale.h"
|
#include "swscale.h"
|
||||||
#include "swscale_internal.h"
|
#include "swscale_internal.h"
|
||||||
@ -788,8 +791,8 @@ altivec_yuv2packedX (SwsContext *c,
|
|||||||
|
|
||||||
vector signed short *YCoeffs, *CCoeffs;
|
vector signed short *YCoeffs, *CCoeffs;
|
||||||
|
|
||||||
vYCoeffsBank = malloc (sizeof (vector signed short)*lumFilterSize*c->dstH);
|
vYCoeffsBank = memalign (16, sizeof (vector signed short)*lumFilterSize*c->dstH);
|
||||||
vCCoeffsBank = malloc (sizeof (vector signed short)*chrFilterSize*c->dstH);
|
vCCoeffsBank = memalign (16, sizeof (vector signed short)*chrFilterSize*c->dstH);
|
||||||
|
|
||||||
for (i=0;i<lumFilterSize*c->dstH;i++) {
|
for (i=0;i<lumFilterSize*c->dstH;i++) {
|
||||||
tmp = c->vLumFilter[i];
|
tmp = c->vLumFilter[i];
|
||||||
|
Loading…
Reference in New Issue
Block a user