avcodec/pngdsp: Fix unaligned accesses, effective type violations

Affected the lscr fate-test (only visible on x86 if
the SSE2 is disabled).

Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
This commit is contained in:
Andreas Rheinhardt 2024-03-29 02:22:06 +01:00
parent 356610a2ac
commit 9e4e8ae1e6
1 changed files with 19 additions and 6 deletions

View File

@ -21,20 +21,33 @@
#include "config.h"
#include "libavutil/attributes.h"
#include "libavutil/intreadwrite.h"
#include "libavutil/macros.h"
#include "png.h"
#include "pngdsp.h"
#if HAVE_FAST_64BIT
#define BITS 64
typedef uint64_t uint_native;
#else
#define BITS 32
typedef uint32_t uint_native;
#endif
#define RN AV_JOIN(AV_RN, BITS)
#define RNA AV_JOIN(AV_JOIN(AV_RN, BITS), A)
#define WN AV_JOIN(AV_WN, BITS)
// 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size
#define pb_7f (~0UL / 255 * 0x7f)
#define pb_80 (~0UL / 255 * 0x80)
#define pb_7f (~(uint_native)0 / 255 * 0x7f)
#define pb_80 (~(uint_native)0 / 255 * 0x80)
static void add_bytes_l2_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w)
{
long i;
for (i = 0; i <= w - (int) sizeof(long); i += sizeof(long)) {
long a = *(long *)(src1 + i);
long b = *(long *)(src2 + i);
*(long *)(dst + i) = ((a & pb_7f) + (b & pb_7f)) ^ ((a ^ b) & pb_80);
for (i = 0; i <= w - (int) sizeof(uint_native); i += sizeof(uint_native)) {
uint_native a = RNA(src1 + i);
uint_native b = RN (src2 + i);
WN(dst + i, ((a & pb_7f) + (b & pb_7f)) ^ ((a ^ b) & pb_80));
}
for (; i < w; i++)
dst[i] = src1[i] + src2[i];