diff --git a/OpenCL/m03200.cl b/OpenCL/m03200.cl index 91d2ea1a9..7c9ab411f 100644 --- a/OpenCL/m03200.cl +++ b/OpenCL/m03200.cl @@ -332,6 +332,14 @@ __constant u32a c_sbox3[256] = } #endif +// temporary hack for Apple Iris GPUs (with as little performance drop as possible) + +#ifdef IS_APPLE +#define TMP_TYPE volatile u32 +#else +#define TMP_TYPE u32 +#endif + #define BF_ENCRYPT(L,R) \ { \ L ^= P[0]; \ @@ -353,7 +361,7 @@ __constant u32a c_sbox3[256] = BF_ROUND (L, R, 15); \ BF_ROUND (R, L, 16); \ \ - u32 tmp; \ + TMP_TYPE tmp; \ \ tmp = R; \ R = L; \ diff --git a/docs/changes.txt b/docs/changes.txt index af76de705..32a830885 100644 --- a/docs/changes.txt +++ b/docs/changes.txt @@ -35,6 +35,7 @@ ## - Workaround added for NVidia OpenCL runtime: RACF kernel requires EBCDIC lookup to be done on shared memory +- Workaround added for Apple OpenCL runtime: bcrypt kernel requires a volatile variable because of a compiler optimization bug - Workaround added for AMDGPU-Pro OpenCL runtime: AES encrypt and decrypt Invertkey function was calculated wrong in certain cases - Workaround added for AMDGPU-Pro OpenCL runtime: RAR3 kernel require a volatile variable to work correctly