Workaround some AMD JiT compiler segfault on complex kernels

2017-07-19 13:34:36 +02:00 · 2017-07-19 13:34:36 +02:00 · eae9329761
parent dae5d81a9c
commit eae9329761
9 changed files with 84 additions and 12 deletions
--- a/OpenCL/inc_hash_md4.cl
+++ b/OpenCL/inc_hash_md4.cl
@ -110,7 +110,11 @@ void md4_init (md4_ctx_t *ctx)

 void md4_update_64 (md4_ctx_t *ctx, u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const int len)
 {
+  #ifdef IS_AMD
+  volatile const int pos = ctx->len & 63;
+  #else
  const int pos = ctx->len & 63;
+  #endif

  ctx->len += len;

@ -1229,7 +1233,11 @@ void md4_init_vector_from_scalar (md4_ctx_vector_t *ctx, md4_ctx_t *ctx0)

 void md4_update_vector_64 (md4_ctx_vector_t *ctx, u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const int len)
 {
+  #ifdef IS_AMD
+  volatile const int pos = ctx->len & 63;
+  #else
  const int pos = ctx->len & 63;
+  #endif

  ctx->len += len;

--- a/OpenCL/inc_hash_md5.cl
+++ b/OpenCL/inc_hash_md5.cl
@ -144,7 +144,11 @@ void md5_init (md5_ctx_t *ctx)

 void md5_update_64 (md5_ctx_t *ctx, u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const int len)
 {
+  #ifdef IS_AMD
+  volatile const int pos = ctx->len & 63;
+  #else
  const int pos = ctx->len & 63;
+  #endif

  ctx->len += len;

@ -1298,7 +1302,11 @@ void md5_init_vector_from_scalar (md5_ctx_vector_t *ctx, md5_ctx_t *ctx0)

 void md5_update_vector_64 (md5_ctx_vector_t *ctx, u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const int len)
 {
+  #ifdef IS_AMD
+  volatile const int pos = ctx->len & 63;
+  #else
  const int pos = ctx->len & 63;
+  #endif

  ctx->len += len;

--- a/OpenCL/inc_hash_ripemd160.cl
+++ b/OpenCL/inc_hash_ripemd160.cl
@ -244,7 +244,11 @@ void ripemd160_init (ripemd160_ctx_t *ctx)

 void ripemd160_update_64 (ripemd160_ctx_t *ctx, u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const int len)
 {
+  #ifdef IS_AMD
+  volatile const int pos = ctx->len & 63;
+  #else
  const int pos = ctx->len & 63;
+  #endif

  ctx->len += len;

@ -1499,7 +1503,11 @@ void ripemd160_init_vector_from_scalar (ripemd160_ctx_vector_t *ctx, ripemd160_c

 void ripemd160_update_vector_64 (ripemd160_ctx_vector_t *ctx, u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const int len)
 {
+  #ifdef IS_AMD
+  volatile const int pos = ctx->len & 63;
+  #else
  const int pos = ctx->len & 63;
+  #endif

  ctx->len += len;

--- a/OpenCL/inc_hash_sha1.cl
+++ b/OpenCL/inc_hash_sha1.cl
@ -176,7 +176,11 @@ void sha1_init (sha1_ctx_t *ctx)

 void sha1_update_64 (sha1_ctx_t *ctx, u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const int len)
 {
+  #ifdef IS_AMD
+  volatile const int pos = ctx->len & 63;
+  #else
  const int pos = ctx->len & 63;
+  #endif

  ctx->len += len;

@ -762,7 +766,7 @@ void sha1_update_global_utf16le_swap (sha1_ctx_t *ctx, const __global u32 *w, co

 void sha1_final (sha1_ctx_t *ctx)
 {
-  int pos = ctx->len & 63;
+  const int pos = ctx->len & 63;

  append_0x80_4x4_S (ctx->w0, ctx->w1, ctx->w2, ctx->w3, pos ^ 3);

@ -1363,7 +1367,11 @@ void sha1_init_vector_from_scalar (sha1_ctx_vector_t *ctx, sha1_ctx_t *ctx0)

 void sha1_update_vector_64 (sha1_ctx_vector_t *ctx, u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const int len)
 {
+  #ifdef IS_AMD
+  volatile const int pos = ctx->len & 63;
+  #else
  const int pos = ctx->len & 63;
+  #endif

  ctx->len += len;

@ -1735,7 +1743,7 @@ void sha1_update_vector_utf16beN (sha1_ctx_vector_t *ctx, const u32x *w, const i

 void sha1_final_vector (sha1_ctx_vector_t *ctx)
 {
-  int pos = ctx->len & 63;
+  const int pos = ctx->len & 63;

  append_0x80_4x4 (ctx->w0, ctx->w1, ctx->w2, ctx->w3, pos ^ 3);

--- a/OpenCL/inc_hash_sha224.cl
+++ b/OpenCL/inc_hash_sha224.cl
@ -161,7 +161,11 @@ void sha224_init (sha224_ctx_t *ctx)

 void sha224_update_64 (sha224_ctx_t *ctx, u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const int len)
 {
+  #ifdef IS_AMD
+  volatile const int pos = ctx->len & 63;
+  #else
  const int pos = ctx->len & 63;
+  #endif

  ctx->len += len;

@ -747,7 +751,7 @@ void sha224_update_global_utf16le_swap (sha224_ctx_t *ctx, const __global u32 *w

 void sha224_final (sha224_ctx_t *ctx)
 {
-  int pos = ctx->len & 63;
+  const int pos = ctx->len & 63;

  append_0x80_4x4_S (ctx->w0, ctx->w1, ctx->w2, ctx->w3, pos ^ 3);

@ -1316,7 +1320,11 @@ void sha224_init_vector_from_scalar (sha224_ctx_vector_t *ctx, sha224_ctx_t *ctx

 void sha224_update_vector_64 (sha224_ctx_vector_t *ctx, u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const int len)
 {
+  #ifdef IS_AMD
+  volatile const int pos = ctx->len & 63;
+  #else
  const int pos = ctx->len & 63;
+  #endif

  ctx->len += len;

@ -1688,7 +1696,7 @@ void sha224_update_vector_utf16beN (sha224_ctx_vector_t *ctx, const u32x *w, con

 void sha224_final_vector (sha224_ctx_vector_t *ctx)
 {
-  int pos = ctx->len & 63;
+  const int pos = ctx->len & 63;

  append_0x80_4x4 (ctx->w0, ctx->w1, ctx->w2, ctx->w3, pos ^ 3);

--- a/OpenCL/inc_hash_sha256.cl
+++ b/OpenCL/inc_hash_sha256.cl
@ -161,7 +161,11 @@ void sha256_init (sha256_ctx_t *ctx)

 void sha256_update_64 (sha256_ctx_t *ctx, u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const int len)
 {
+  #ifdef IS_AMD
+  volatile const int pos = ctx->len & 63;
+  #else
  const int pos = ctx->len & 63;
+  #endif

  ctx->len += len;

@ -747,7 +751,7 @@ void sha256_update_global_utf16le_swap (sha256_ctx_t *ctx, const __global u32 *w

 void sha256_final (sha256_ctx_t *ctx)
 {
-  int pos = ctx->len & 63;
+  const int pos = ctx->len & 63;

  append_0x80_4x4_S (ctx->w0, ctx->w1, ctx->w2, ctx->w3, pos ^ 3);

@ -1316,7 +1320,11 @@ void sha256_init_vector_from_scalar (sha256_ctx_vector_t *ctx, sha256_ctx_t *ctx

 void sha256_update_vector_64 (sha256_ctx_vector_t *ctx, u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const int len)
 {
+  #ifdef IS_AMD
+  volatile const int pos = ctx->len & 63;
+  #else
  const int pos = ctx->len & 63;
+  #endif

  ctx->len += len;

@ -1688,7 +1696,7 @@ void sha256_update_vector_utf16beN (sha256_ctx_vector_t *ctx, const u32x *w, con

 void sha256_final_vector (sha256_ctx_vector_t *ctx)
 {
-  int pos = ctx->len & 63;
+  const int pos = ctx->len & 63;

  append_0x80_4x4 (ctx->w0, ctx->w1, ctx->w2, ctx->w3, pos ^ 3);

--- a/OpenCL/inc_hash_sha384.cl
+++ b/OpenCL/inc_hash_sha384.cl
@ -185,7 +185,11 @@ void sha384_init (sha384_ctx_t *ctx)

 void sha384_update_128 (sha384_ctx_t *ctx, u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 w4[4], u32 w5[4], u32 w6[4], u32 w7[4], const int len)
 {
+  #ifdef IS_AMD
+  volatile const int pos = ctx->len & 127;
+  #else
  const int pos = ctx->len & 127;
+  #endif

  ctx->len += len;

@ -1191,7 +1195,7 @@ void sha384_update_global_utf16le_swap (sha384_ctx_t *ctx, const __global u32 *w

 void sha384_final (sha384_ctx_t *ctx)
 {
-  int pos = ctx->len & 127;
+  const int pos = ctx->len & 127;

  append_0x80_8x4_S (ctx->w0, ctx->w1, ctx->w2, ctx->w3, ctx->w4, ctx->w5, ctx->w6, ctx->w7, pos ^ 3);

@ -2012,7 +2016,11 @@ void sha384_init_vector_from_scalar (sha384_ctx_vector_t *ctx, sha384_ctx_t *ctx

 void sha384_update_vector_128 (sha384_ctx_vector_t *ctx, u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x w4[4], u32x w5[4], u32x w6[4], u32x w7[4], const int len)
 {
+  #ifdef IS_AMD
+  volatile const int pos = ctx->len & 127;
+  #else
  const int pos = ctx->len & 127;
+  #endif

  ctx->len += len;

@ -2644,7 +2652,7 @@ void sha384_update_vector_utf16beN (sha384_ctx_vector_t *ctx, const u32x *w, con

 void sha384_final_vector (sha384_ctx_vector_t *ctx)
 {
-  int pos = ctx->len & 127;
+  const int pos = ctx->len & 127;

  append_0x80_8x4 (ctx->w0, ctx->w1, ctx->w2, ctx->w3, ctx->w4, ctx->w5, ctx->w6, ctx->w7, pos ^ 3);

--- a/OpenCL/inc_hash_sha512.cl
+++ b/OpenCL/inc_hash_sha512.cl
@ -185,7 +185,11 @@ void sha512_init (sha512_ctx_t *ctx)

 void sha512_update_128 (sha512_ctx_t *ctx, u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 w4[4], u32 w5[4], u32 w6[4], u32 w7[4], const int len)
 {
+  #ifdef IS_AMD
+  volatile const int pos = ctx->len & 127;
+  #else
  const int pos = ctx->len & 127;
+  #endif

  ctx->len += len;

@ -1191,7 +1195,7 @@ void sha512_update_global_utf16le_swap (sha512_ctx_t *ctx, const __global u32 *w

 void sha512_final (sha512_ctx_t *ctx)
 {
-  int pos = ctx->len & 127;
+  const int pos = ctx->len & 127;

  append_0x80_8x4_S (ctx->w0, ctx->w1, ctx->w2, ctx->w3, ctx->w4, ctx->w5, ctx->w6, ctx->w7, pos ^ 3);

@ -2012,7 +2016,11 @@ void sha512_init_vector_from_scalar (sha512_ctx_vector_t *ctx, sha512_ctx_t *ctx

 void sha512_update_vector_128 (sha512_ctx_vector_t *ctx, u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x w4[4], u32x w5[4], u32x w6[4], u32x w7[4], const int len)
 {
+  #ifdef IS_AMD
+  volatile const int pos = ctx->len & 127;
+  #else
  const int pos = ctx->len & 127;
+  #endif

  ctx->len += len;

@ -2644,7 +2652,7 @@ void sha512_update_vector_utf16beN (sha512_ctx_vector_t *ctx, const u32x *w, con

 void sha512_final_vector (sha512_ctx_vector_t *ctx)
 {
-  int pos = ctx->len & 127;
+  const int pos = ctx->len & 127;

  append_0x80_8x4 (ctx->w0, ctx->w1, ctx->w2, ctx->w3, ctx->w4, ctx->w5, ctx->w6, ctx->w7, pos ^ 3);

--- a/OpenCL/inc_hash_whirlpool.cl
+++ b/OpenCL/inc_hash_whirlpool.cl
@ -1344,7 +1344,11 @@ void whirlpool_init (whirlpool_ctx_t *ctx, __local u32 (*s_Ch)[256], __local u32

 void whirlpool_update_64 (whirlpool_ctx_t *ctx, u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const int len)
 {
+  #ifdef IS_AMD
+  volatile const int pos = ctx->len & 63;
+  #else
  const int pos = ctx->len & 63;
+  #endif

  ctx->len += len;

@ -1930,7 +1934,7 @@ void whirlpool_update_global_utf16le_swap (whirlpool_ctx_t *ctx, const __global

 void whirlpool_final (whirlpool_ctx_t *ctx)
 {
-  int pos = ctx->len & 63;
+  const int pos = ctx->len & 63;

  append_0x80_4x4_S (ctx->w0, ctx->w1, ctx->w2, ctx->w3, pos ^ 3);

@ -2603,7 +2607,11 @@ void whirlpool_init_vector_from_scalar (whirlpool_ctx_vector_t *ctx, whirlpool_c

 void whirlpool_update_vector_64 (whirlpool_ctx_vector_t *ctx, u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const int len)
 {
+  #ifdef IS_AMD
+  volatile const int pos = ctx->len & 63;
+  #else
  const int pos = ctx->len & 63;
+  #endif

  ctx->len += len;

@ -2933,7 +2941,7 @@ void whirlpool_update_vector_utf16le_swap (whirlpool_ctx_vector_t *ctx, const u3

 void whirlpool_final_vector (whirlpool_ctx_vector_t *ctx)
 {
-  int pos = ctx->len & 63;
+  const int pos = ctx->len & 63;

  append_0x80_4x4 (ctx->w0, ctx->w1, ctx->w2, ctx->w3, pos ^ 3);