Fixed a bug in all SCRYPT-based hash modes with Apple Metal

This commit is contained in:
Gabriele Gristina 2023-06-07 00:49:37 +02:00
parent 890de0bff6
commit 6847458a00
7 changed files with 79 additions and 0 deletions

View File

@ -327,6 +327,9 @@ KERNEL_FQ void m08900_init (KERN_ATTR_TMPS (scrypt_tmp_t))
#if defined IS_CUDA || defined IS_HIP
const uint4 tmp0 = make_uint4 (digest[0], digest[1], digest[2], digest[3]);
const uint4 tmp1 = make_uint4 (digest[4], digest[5], digest[6], digest[7]);
#elif defined IS_METAL
const uint4 tmp0 = uint4 (digest[0], digest[1], digest[2], digest[3]);
const uint4 tmp1 = uint4 (digest[4], digest[5], digest[6], digest[7]);
#else
const uint4 tmp0 = (uint4) (digest[0], digest[1], digest[2], digest[3]);
const uint4 tmp1 = (uint4) (digest[4], digest[5], digest[6], digest[7]);
@ -357,6 +360,11 @@ KERNEL_FQ void m08900_init (KERN_ATTR_TMPS (scrypt_tmp_t))
X[1] = make_uint4 (T[1].x, T[2].y, T[3].z, T[0].w);
X[2] = make_uint4 (T[2].x, T[3].y, T[0].z, T[1].w);
X[3] = make_uint4 (T[3].x, T[0].y, T[1].z, T[2].w);
#elif defined IS_METAL
X[0] = uint4 (T[0].x, T[1].y, T[2].z, T[3].w);
X[1] = uint4 (T[1].x, T[2].y, T[3].z, T[0].w);
X[2] = uint4 (T[2].x, T[3].y, T[0].z, T[1].w);
X[3] = uint4 (T[3].x, T[0].y, T[1].z, T[2].w);
#else
X[0] = (uint4) (T[0].x, T[1].y, T[2].z, T[3].w);
X[1] = (uint4) (T[1].x, T[2].y, T[3].z, T[0].w);
@ -467,6 +475,11 @@ KERNEL_FQ void m08900_comp (KERN_ATTR_TMPS (scrypt_tmp_t))
T[1] = make_uint4 (X[1].x, X[0].y, X[3].z, X[2].w);
T[2] = make_uint4 (X[2].x, X[1].y, X[0].z, X[3].w);
T[3] = make_uint4 (X[3].x, X[2].y, X[1].z, X[0].w);
#elif defined IS_METAL
T[0] = uint4 (X[0].x, X[3].y, X[2].z, X[1].w);
T[1] = uint4 (X[1].x, X[0].y, X[3].z, X[2].w);
T[2] = uint4 (X[2].x, X[1].y, X[0].z, X[3].w);
T[3] = uint4 (X[3].x, X[2].y, X[1].z, X[0].w);
#else
T[0] = (uint4) (X[0].x, X[3].y, X[2].z, X[1].w);
T[1] = (uint4) (X[1].x, X[0].y, X[3].z, X[2].w);

View File

@ -463,6 +463,9 @@ KERNEL_FQ void m15700_init (KERN_ATTR_TMPS_ESALT (scrypt_tmp_t, ethereum_scrypt_
#if defined IS_CUDA || defined IS_HIP
const uint4 tmp0 = make_uint4 (digest[0], digest[1], digest[2], digest[3]);
const uint4 tmp1 = make_uint4 (digest[4], digest[5], digest[6], digest[7]);
#elif defined IS_METAL
const uint4 tmp0 = uint4 (digest[0], digest[1], digest[2], digest[3]);
const uint4 tmp1 = uint4 (digest[4], digest[5], digest[6], digest[7]);
#else
const uint4 tmp0 = (uint4) (digest[0], digest[1], digest[2], digest[3]);
const uint4 tmp1 = (uint4) (digest[4], digest[5], digest[6], digest[7]);
@ -493,6 +496,11 @@ KERNEL_FQ void m15700_init (KERN_ATTR_TMPS_ESALT (scrypt_tmp_t, ethereum_scrypt_
X[1] = make_uint4 (T[1].x, T[2].y, T[3].z, T[0].w);
X[2] = make_uint4 (T[2].x, T[3].y, T[0].z, T[1].w);
X[3] = make_uint4 (T[3].x, T[0].y, T[1].z, T[2].w);
#elif defined IS_METAL
X[0] = uint4 (T[0].x, T[1].y, T[2].z, T[3].w);
X[1] = uint4 (T[1].x, T[2].y, T[3].z, T[0].w);
X[2] = uint4 (T[2].x, T[3].y, T[0].z, T[1].w);
X[3] = uint4 (T[3].x, T[0].y, T[1].z, T[2].w);
#else
X[0] = (uint4) (T[0].x, T[1].y, T[2].z, T[3].w);
X[1] = (uint4) (T[1].x, T[2].y, T[3].z, T[0].w);
@ -603,6 +611,11 @@ KERNEL_FQ void m15700_comp (KERN_ATTR_TMPS_ESALT (scrypt_tmp_t, ethereum_scrypt_
T[1] = make_uint4 (X[1].x, X[0].y, X[3].z, X[2].w);
T[2] = make_uint4 (X[2].x, X[1].y, X[0].z, X[3].w);
T[3] = make_uint4 (X[3].x, X[2].y, X[1].z, X[0].w);
#elif defined IS_METAL
T[0] = uint4 (X[0].x, X[3].y, X[2].z, X[1].w);
T[1] = uint4 (X[1].x, X[0].y, X[3].z, X[2].w);
T[2] = uint4 (X[2].x, X[1].y, X[0].z, X[3].w);
T[3] = uint4 (X[3].x, X[2].y, X[1].z, X[0].w);
#else
T[0] = (uint4) (X[0].x, X[3].y, X[2].z, X[1].w);
T[1] = (uint4) (X[1].x, X[0].y, X[3].z, X[2].w);

View File

@ -400,6 +400,9 @@ KERNEL_FQ void m22700_init (KERN_ATTR_TMPS (scrypt_tmp_t))
#if defined IS_CUDA || defined IS_HIP
const uint4 tmp0 = make_uint4 (digest[0], digest[1], digest[2], digest[3]);
const uint4 tmp1 = make_uint4 (digest[4], digest[5], digest[6], digest[7]);
#elif defined IS_METAL
const uint4 tmp0 = uint4 (digest[0], digest[1], digest[2], digest[3]);
const uint4 tmp1 = uint4 (digest[4], digest[5], digest[6], digest[7]);
#else
const uint4 tmp0 = (uint4) (digest[0], digest[1], digest[2], digest[3]);
const uint4 tmp1 = (uint4) (digest[4], digest[5], digest[6], digest[7]);
@ -430,6 +433,11 @@ KERNEL_FQ void m22700_init (KERN_ATTR_TMPS (scrypt_tmp_t))
X[1] = make_uint4 (T[1].x, T[2].y, T[3].z, T[0].w);
X[2] = make_uint4 (T[2].x, T[3].y, T[0].z, T[1].w);
X[3] = make_uint4 (T[3].x, T[0].y, T[1].z, T[2].w);
#elif defined IS_METAL
X[0] = uint4 (T[0].x, T[1].y, T[2].z, T[3].w);
X[1] = uint4 (T[1].x, T[2].y, T[3].z, T[0].w);
X[2] = uint4 (T[2].x, T[3].y, T[0].z, T[1].w);
X[3] = uint4 (T[3].x, T[0].y, T[1].z, T[2].w);
#else
X[0] = (uint4) (T[0].x, T[1].y, T[2].z, T[3].w);
X[1] = (uint4) (T[1].x, T[2].y, T[3].z, T[0].w);
@ -605,6 +613,11 @@ KERNEL_FQ void m22700_comp (KERN_ATTR_TMPS (scrypt_tmp_t))
T[1] = make_uint4 (X[1].x, X[0].y, X[3].z, X[2].w);
T[2] = make_uint4 (X[2].x, X[1].y, X[0].z, X[3].w);
T[3] = make_uint4 (X[3].x, X[2].y, X[1].z, X[0].w);
#elif defined IS_METAL
T[0] = uint4 (X[0].x, X[3].y, X[2].z, X[1].w);
T[1] = uint4 (X[1].x, X[0].y, X[3].z, X[2].w);
T[2] = uint4 (X[2].x, X[1].y, X[0].z, X[3].w);
T[3] = uint4 (X[3].x, X[2].y, X[1].z, X[0].w);
#else
T[0] = (uint4) (X[0].x, X[3].y, X[2].z, X[1].w);
T[1] = (uint4) (X[1].x, X[0].y, X[3].z, X[2].w);

View File

@ -351,6 +351,9 @@ KERNEL_FQ void m27700_init (KERN_ATTR_TMPS (scrypt_tmp_t))
#if defined IS_CUDA || defined IS_HIP
const uint4 tmp0 = make_uint4 (digest[0], digest[1], digest[2], digest[3]);
const uint4 tmp1 = make_uint4 (digest[4], digest[5], digest[6], digest[7]);
#elif defined IS_METAL
const uint4 tmp0 = uint4 (digest[0], digest[1], digest[2], digest[3]);
const uint4 tmp1 = uint4 (digest[4], digest[5], digest[6], digest[7]);
#else
const uint4 tmp0 = (uint4) (digest[0], digest[1], digest[2], digest[3]);
const uint4 tmp1 = (uint4) (digest[4], digest[5], digest[6], digest[7]);
@ -381,6 +384,11 @@ KERNEL_FQ void m27700_init (KERN_ATTR_TMPS (scrypt_tmp_t))
X[1] = make_uint4 (T[1].x, T[2].y, T[3].z, T[0].w);
X[2] = make_uint4 (T[2].x, T[3].y, T[0].z, T[1].w);
X[3] = make_uint4 (T[3].x, T[0].y, T[1].z, T[2].w);
#elif defined IS_METAL
X[0] = uint4 (T[0].x, T[1].y, T[2].z, T[3].w);
X[1] = uint4 (T[1].x, T[2].y, T[3].z, T[0].w);
X[2] = uint4 (T[2].x, T[3].y, T[0].z, T[1].w);
X[3] = uint4 (T[3].x, T[0].y, T[1].z, T[2].w);
#else
X[0] = (uint4) (T[0].x, T[1].y, T[2].z, T[3].w);
X[1] = (uint4) (T[1].x, T[2].y, T[3].z, T[0].w);
@ -557,6 +565,11 @@ KERNEL_FQ void m27700_comp (KERN_ATTR_TMPS (scrypt_tmp_t))
T[1] = make_uint4 (X[1].x, X[0].y, X[3].z, X[2].w);
T[2] = make_uint4 (X[2].x, X[1].y, X[0].z, X[3].w);
T[3] = make_uint4 (X[3].x, X[2].y, X[1].z, X[0].w);
#elif defined IS_METAL
T[0] = uint4 (X[0].x, X[3].y, X[2].z, X[1].w);
T[1] = uint4 (X[1].x, X[0].y, X[3].z, X[2].w);
T[2] = uint4 (X[2].x, X[1].y, X[0].z, X[3].w);
T[3] = uint4 (X[3].x, X[2].y, X[1].z, X[0].w);
#else
T[0] = (uint4) (X[0].x, X[3].y, X[2].z, X[1].w);
T[1] = (uint4) (X[1].x, X[0].y, X[3].z, X[2].w);

View File

@ -337,6 +337,9 @@ KERNEL_FQ void m28200_init (KERN_ATTR_TMPS_ESALT (exodus_tmp_t, exodus_t))
#if defined IS_CUDA || defined IS_HIP
const uint4 tmp0 = make_uint4 (digest[0], digest[1], digest[2], digest[3]);
const uint4 tmp1 = make_uint4 (digest[4], digest[5], digest[6], digest[7]);
#elif defined IS_METAL
const uint4 tmp0 = uint4 (digest[0], digest[1], digest[2], digest[3]);
const uint4 tmp1 = uint4 (digest[4], digest[5], digest[6], digest[7]);
#else
const uint4 tmp0 = (uint4) (digest[0], digest[1], digest[2], digest[3]);
const uint4 tmp1 = (uint4) (digest[4], digest[5], digest[6], digest[7]);
@ -367,6 +370,11 @@ KERNEL_FQ void m28200_init (KERN_ATTR_TMPS_ESALT (exodus_tmp_t, exodus_t))
X[1] = make_uint4 (T[1].x, T[2].y, T[3].z, T[0].w);
X[2] = make_uint4 (T[2].x, T[3].y, T[0].z, T[1].w);
X[3] = make_uint4 (T[3].x, T[0].y, T[1].z, T[2].w);
#elif defined IS_METAL
X[0] = uint4 (T[0].x, T[1].y, T[2].z, T[3].w);
X[1] = uint4 (T[1].x, T[2].y, T[3].z, T[0].w);
X[2] = uint4 (T[2].x, T[3].y, T[0].z, T[1].w);
X[3] = uint4 (T[3].x, T[0].y, T[1].z, T[2].w);
#else
X[0] = (uint4) (T[0].x, T[1].y, T[2].z, T[3].w);
X[1] = (uint4) (T[1].x, T[2].y, T[3].z, T[0].w);
@ -525,6 +533,11 @@ KERNEL_FQ void m28200_comp (KERN_ATTR_TMPS_ESALT (exodus_tmp_t, exodus_t))
T[1] = make_uint4 (X[1].x, X[0].y, X[3].z, X[2].w);
T[2] = make_uint4 (X[2].x, X[1].y, X[0].z, X[3].w);
T[3] = make_uint4 (X[3].x, X[2].y, X[1].z, X[0].w);
#elif defined IS_METAL
T[0] = uint4 (X[0].x, X[3].y, X[2].z, X[1].w);
T[1] = uint4 (X[1].x, X[0].y, X[3].z, X[2].w);
T[2] = uint4 (X[2].x, X[1].y, X[0].z, X[3].w);
T[3] = uint4 (X[3].x, X[2].y, X[1].z, X[0].w);
#else
T[0] = (uint4) (X[0].x, X[3].y, X[2].z, X[1].w);
T[1] = (uint4) (X[1].x, X[0].y, X[3].z, X[2].w);

View File

@ -351,6 +351,9 @@ KERNEL_FQ void m29800_init (KERN_ATTR_TMPS (scrypt_tmp_t))
#if defined IS_CUDA || defined IS_HIP
const uint4 tmp0 = make_uint4 (digest[0], digest[1], digest[2], digest[3]);
const uint4 tmp1 = make_uint4 (digest[4], digest[5], digest[6], digest[7]);
#elif defined IS_METAL
const uint4 tmp0 = uint4 (digest[0], digest[1], digest[2], digest[3]);
const uint4 tmp1 = uint4 (digest[4], digest[5], digest[6], digest[7]);
#else
const uint4 tmp0 = (uint4) (digest[0], digest[1], digest[2], digest[3]);
const uint4 tmp1 = (uint4) (digest[4], digest[5], digest[6], digest[7]);
@ -381,6 +384,11 @@ KERNEL_FQ void m29800_init (KERN_ATTR_TMPS (scrypt_tmp_t))
X[1] = make_uint4 (T[1].x, T[2].y, T[3].z, T[0].w);
X[2] = make_uint4 (T[2].x, T[3].y, T[0].z, T[1].w);
X[3] = make_uint4 (T[3].x, T[0].y, T[1].z, T[2].w);
#elif defined IS_METAL
X[0] = uint4 (T[0].x, T[1].y, T[2].z, T[3].w);
X[1] = uint4 (T[1].x, T[2].y, T[3].z, T[0].w);
X[2] = uint4 (T[2].x, T[3].y, T[0].z, T[1].w);
X[3] = uint4 (T[3].x, T[0].y, T[1].z, T[2].w);
#else
X[0] = (uint4) (T[0].x, T[1].y, T[2].z, T[3].w);
X[1] = (uint4) (T[1].x, T[2].y, T[3].z, T[0].w);
@ -557,6 +565,11 @@ KERNEL_FQ void m29800_comp (KERN_ATTR_TMPS (scrypt_tmp_t))
T[1] = make_uint4 (X[1].x, X[0].y, X[3].z, X[2].w);
T[2] = make_uint4 (X[2].x, X[1].y, X[0].z, X[3].w);
T[3] = make_uint4 (X[3].x, X[2].y, X[1].z, X[0].w);
#elif defined IS_METAL
T[0] = uint4 (X[0].x, X[3].y, X[2].z, X[1].w);
T[1] = uint4 (X[1].x, X[0].y, X[3].z, X[2].w);
T[2] = uint4 (X[2].x, X[1].y, X[0].z, X[3].w);
T[3] = uint4 (X[3].x, X[2].y, X[1].z, X[0].w);
#else
T[0] = (uint4) (X[0].x, X[3].y, X[2].z, X[1].w);
T[1] = (uint4) (X[1].x, X[0].y, X[3].z, X[2].w);

View File

@ -70,6 +70,7 @@
- Fixed bug in 29600 module OPTS_TYPE setting
- Fixed bug in grep out-of-memory workaround on Unit Test
- Fixed bug in input_tokenizer when TOKEN_ATTR_FIXED_LENGTH is used and refactor modules
- Fixed a bug in all SCRYPT-based hash modes with Apple Metal
- Added verification of token buffer length when using TOKEN_ATTR_FIXED_LENGTH
- Fixed build failed for 4410 with vector width > 1
- Fixed build failed for 10700 optimized with Apple Metal